From 3d4a31d304064f88d2d1e414346c790f099743b5 Mon Sep 17 00:00:00 2001 From: Avra Sengupta Date: Wed, 29 Jan 2014 03:06:19 +0000 Subject: glusterd/geo-rep: Looks for state_file and pid-file in gsyncd_template.conf If entries like state_file or pid-file are missing in the gsyncd.conf or if the gsyncd.conf is also missing, glusterd looks for the missing configs in the gsyncd_template.conf status will display "Config Corrupted" as long as the entry is missing in the config file. Missing state-file entry in both config and template will not allow starting a geo-rep session. However stop force will successfully stop an already running session, if the state-file entries are missing in both the config file and the template, as long as either of them have a pid-file entry. if the pid-file entry is missing in the gsyncd.conf file, starting a geo-rep session will not be allowed. if the pid-file entry is missing in an already started session, then stop force will fetch it from the config template and stop the session. if the pid-file entry is missing in both the config and the template, stop force will fail with appropriate error stating pid-file entry is missing. Change-Id: I81d7cbc4af085d82895bbef46ca732555aa5365d BUG: 1059092 Signed-off-by: Avra Sengupta Reviewed-on: http://review.gluster.org/6856 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 544 +++++++++++++++++++-------- xlators/mgmt/glusterd/src/glusterd-op-sm.h | 3 +- xlators/mgmt/glusterd/src/glusterd-utils.c | 71 +++- xlators/mgmt/glusterd/src/glusterd-utils.h | 7 +- 4 files changed, 460 insertions(+), 165 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 9433a128e47..29022a30733 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -668,47 +668,72 @@ glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master, } static int -gsyncd_getpidfile (char *master, char *slave, char *pidfile, char *conf_path) +gsyncd_getpidfile (char *master, char *slave, char *pidfile, + char *conf_path, gf_boolean_t *is_template_in_use) { - int ret = -1; - glusterd_conf_t *priv = NULL; - char *confpath = NULL; - char conf_buf[PATH_MAX] = ""; - struct stat stbuf = {0,}; - + char temp_conf_path[PATH_MAX] = ""; + char *working_conf_path = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + struct stat stbuf = {0,}; GF_ASSERT (THIS); GF_ASSERT (THIS->private); + GF_ASSERT (conf_path); priv = THIS->private; GF_VALIDATE_OR_GOTO ("gsync", master, out); GF_VALIDATE_OR_GOTO ("gsync", slave, out); + snprintf (temp_conf_path, sizeof(temp_conf_path) - 1, + "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); + ret = lstat (conf_path, &stbuf); if (!ret) { gf_log ("", GF_LOG_DEBUG, "Using passed config template(%s).", conf_path); - confpath = conf_path; + working_conf_path = conf_path; } else { - ret = snprintf (conf_buf, sizeof(conf_buf) - 1, - "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); - conf_buf[ret] = '\0'; - confpath = conf_buf; - gf_log ("", GF_LOG_DEBUG, "Using default config template(%s).", - confpath); + gf_log ("", GF_LOG_WARNING, "Config file (%s) missing. " + "Looking for template config file (%s)", + conf_path, temp_conf_path); + ret = lstat (temp_conf_path, &stbuf); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Template config file (%s) missing.", + temp_conf_path); + goto out; + } + gf_log ("", GF_LOG_INFO, "Using default config template(%s).", + temp_conf_path); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; } +fetch_data: + ret = glusterd_gsync_get_param_file (pidfile, "pid", master, - slave, confpath); - if (ret == -1) { - ret = -2; - gf_log ("", GF_LOG_WARNING, "failed to create the pidfile string"); - goto out; + slave, working_conf_path); + if ((ret == -1) || strlen(pidfile) == 0) { + if (*is_template_in_use == _gf_false) { + gf_log ("", GF_LOG_WARNING, + "failed to create the pidfile string. " + "Trying default config template"); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; + goto fetch_data; + } else { + ret = -2; + gf_log ("", GF_LOG_WARNING, + "failed to create the pidfile string from template config"); + goto out; + } } - ret = open (pidfile, O_RDWR); + gf_log ("", GF_LOG_DEBUG, "pidfile = %s", pidfile); + ret = open (pidfile, O_RDWR); out: return ret; } @@ -730,12 +755,14 @@ gsync_status_byfd (int fd) * return -1 when not running */ int -gsync_status (char *master, char *slave, char *conf_path, int *status) +gsync_status (char *master, char *slave, char *conf_path, + int *status, gf_boolean_t *is_template_in_use) { char pidfile[PATH_MAX] = {0,}; int fd = -1; - fd = gsyncd_getpidfile (master, slave, pidfile, conf_path); + fd = gsyncd_getpidfile (master, slave, pidfile, + conf_path, is_template_in_use); if (fd == -2) return -1; @@ -1089,16 +1116,18 @@ glusterd_check_gsync_running_local (char *master, char *slave, { int ret = -1; int ret_status = 0; + gf_boolean_t is_template_in_use = _gf_false; GF_ASSERT (master); GF_ASSERT (slave); GF_ASSERT (is_run); *is_run = _gf_false; - ret = gsync_status (master, slave, conf_path, &ret_status); - if (ret == 0 && ret_status == 0) { + ret = gsync_status (master, slave, conf_path, + &ret_status, &is_template_in_use); + if (ret == 0 && ret_status == 0) *is_run = _gf_true; - } else if (ret == -1) { + else if (ret == -1) { gf_log ("", GF_LOG_WARNING, GEOREP" validation " " failed"); goto out; @@ -1202,7 +1231,8 @@ glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo, gf_boolean_t is_force) { int ret = -1; - gf_boolean_t is_running = _gf_false; + int ret_status = 0; + gf_boolean_t is_template_in_use = _gf_false; char msg[2048] = {0}; uuid_t uuid = {0}; glusterd_conf_t *priv = NULL; @@ -1246,31 +1276,34 @@ glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo, goto out; } - if (is_force) { - ret = 0; - goto out; - } - /*Check if the gsync is already started in cmd. inited host * If so initiate add it into the glusterd's priv*/ - ret = glusterd_check_gsync_running_local (volinfo->volname, - slave, conf_path, - &is_running); - if (ret) { + ret = gsync_status (volinfo->volname, slave, conf_path, + &ret_status, &is_template_in_use); + if (ret == 0) { + if ((ret_status == 0) && !is_force) { + snprintf (msg, sizeof (msg), GEOREP " session between" + " %s & %s already started", volinfo->volname, + slave); + ret = -1; + goto out; + } + } else if (ret == -1) { snprintf (msg, sizeof (msg), GEOREP" start option " "validation failed "); goto out; } - if (_gf_true == is_running) { - snprintf (msg, sizeof (msg), GEOREP " session between" - " %s & %s already started", volinfo->volname, - slave); + + if (is_template_in_use == _gf_true) { + snprintf (msg, sizeof (msg), GEOREP" start " + "failed : pid-file entry missing " + "in config file."); ret = -1; goto out; } ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave); - if (ret) { + if (ret && !is_force) { snprintf (msg, sizeof (msg), "Unable to spawn gsyncd"); gf_log ("", GF_LOG_ERROR, "%s", msg); } @@ -1459,10 +1492,12 @@ glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo, int ret = -1; char msg[2048] = {0}; char pidfile[PATH_MAX] = {0,}; + gf_boolean_t is_template_in_use = _gf_false; GF_ASSERT (THIS && THIS->private); GF_ASSERT (volinfo); GF_ASSERT (slave); + GF_ASSERT (conf_path); GF_ASSERT (op_errstr); if (GLUSTERD_STATUS_STARTED != volinfo->status) { @@ -1472,7 +1507,8 @@ glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo, goto out; } - pfd = gsyncd_getpidfile (volinfo->volname, slave, pidfile, conf_path); + pfd = gsyncd_getpidfile (volinfo->volname, slave, pidfile, + conf_path, &is_template_in_use); if (pfd == -2) { gf_log ("", GF_LOG_ERROR, GEOREP" stop validation " "failed for %s & %s", volinfo->volname, slave); @@ -1488,6 +1524,14 @@ glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo, goto out; } + if (is_template_in_use) { + snprintf (msg, sizeof (msg), "pid-file entry missing in " + "the config file(%s).", conf_path); + gf_log ("", GF_LOG_ERROR, "%s", msg); + ret = -1; + goto out; + } + if (pfd < 0) goto out; @@ -1739,20 +1783,23 @@ out: int glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, - char *conf_path, char **statefile) + char *conf_path, char **statefile, + gf_boolean_t *is_template_in_use) { - glusterd_conf_t *priv = NULL; - int ret = -1; - char *master = NULL; - char *buf = NULL; - dict_t *confd = NULL; - char *confpath = NULL; - char conf_buf[PATH_MAX] = ""; - struct stat stbuf = {0,}; + char *master = NULL; + char *buf = NULL; + char *working_conf_path = NULL; + char temp_conf_path[PATH_MAX] = ""; + dict_t *confd = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + struct stat stbuf = {0,}; GF_ASSERT (THIS); GF_ASSERT (THIS->private); GF_ASSERT (volinfo); + GF_ASSERT (conf_path); + GF_ASSERT (is_template_in_use); master = volinfo->volname; @@ -1764,44 +1811,79 @@ glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, priv = THIS->private; + snprintf (temp_conf_path, sizeof(temp_conf_path) - 1, + "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); + ret = lstat (conf_path, &stbuf); if (!ret) { gf_log ("", GF_LOG_INFO, "Using passed config template(%s).", conf_path); - confpath = conf_path; + working_conf_path = conf_path; } else { - ret = snprintf (conf_buf, sizeof(conf_buf) - 1, - "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); - conf_buf[ret] = '\0'; - confpath = conf_buf; + gf_log ("", GF_LOG_WARNING, "Config file (%s) missing. " + "Looking for template config file (%s)", + conf_path, temp_conf_path); + ret = lstat (temp_conf_path, &stbuf); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Template config file (%s) missing.", + temp_conf_path); + goto out; + } gf_log ("", GF_LOG_INFO, "Using default config template(%s).", - confpath); + temp_conf_path); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; } - ret = glusterd_gsync_get_config (master, slave, confpath, +fetch_data: + ret = glusterd_gsync_get_config (master, slave, working_conf_path, confd); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" - "for %s(master), %s(slave)", master, slave); - goto out; - + if (*is_template_in_use == _gf_false) { + gf_log ("", GF_LOG_ERROR, + "Unable to get configuration data " + "for %s(master), %s(slave). " + "Trying template config.", + master, slave); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; + goto fetch_data; + } else { + gf_log ("", GF_LOG_ERROR, + "Unable to get configuration data " + "for %s(master), %s(slave) from " + "template config", + master, slave); + goto out; + } } ret = dict_get_param (confd, "state_file", &buf); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name."); - goto out; - } - - *statefile = gf_strdup(buf); - if (!*statefile) { - gf_log ("", GF_LOG_ERROR, "Unable to gf_strdup."); - ret = -1; - goto out; + if (*is_template_in_use == _gf_false) { + gf_log ("", GF_LOG_ERROR, + "Unable to get state_file's name. " + "Trying template config."); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; + goto fetch_data; + } else { + gf_log ("", GF_LOG_ERROR, + "Unable to get state_file's " + "name from template."); + goto out; + } } ret = 0; out: + if (buf) { + *statefile = gf_strdup(buf); + if (!*statefile) + ret = -1; + } + if (confd) dict_destroy (confd); @@ -1809,7 +1891,7 @@ glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, return ret; } -static int +int glusterd_create_status_file (char *master, char *slave, char *slave_ip, char *slave_vol, char *status) { @@ -2013,6 +2095,7 @@ glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr) gf_boolean_t is_force = -1; gf_boolean_t is_force_blocker = -1; gf_boolean_t exists = _gf_false; + gf_boolean_t is_template_in_use = _gf_false; glusterd_conf_t *conf = NULL; glusterd_volinfo_t *volinfo = NULL; struct stat stbuf = {0,}; @@ -2164,7 +2247,9 @@ glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr) } } - ret = glusterd_get_statefile_name (volinfo, slave, conf_path, &statefile); + ret = glusterd_get_statefile_name (volinfo, slave, + conf_path, &statefile, + &is_template_in_use); if (ret) { if (!strstr(slave, "::")) snprintf (errmsg, sizeof (errmsg), @@ -2219,6 +2304,7 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) { int ret = 0; int type = 0; + int pfd = -1; char *volname = NULL; char *slave = NULL; char *slave_ip = NULL; @@ -2230,10 +2316,11 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) gf_boolean_t exists = _gf_false; glusterd_volinfo_t *volinfo = NULL; char errmsg[PATH_MAX] = {0,}; + char pidfile[PATH_MAX] = {0,}; dict_t *ctx = NULL; gf_boolean_t is_force = 0; - gf_boolean_t is_force_blocker = -1; gf_boolean_t is_running = _gf_false; + gf_boolean_t is_template_in_use = _gf_false; uuid_t uuid = {0}; char uuid_str [64] = {0}; char *host_uuid = NULL; @@ -2266,10 +2353,8 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) uuid_utoa_r (MY_UUID, uuid_str); if (conf->op_version < 2) { - gf_log ("", GF_LOG_ERROR, "Op Version not supported."); snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" " support the required op version."); - *op_errstr = gf_strdup (errmsg); ret = -1; goto out; } @@ -2277,10 +2362,8 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) exists = glusterd_check_volume_exists (volname); ret = glusterd_volinfo_find (volname, &volinfo); if ((ret) || (!exists)) { - gf_log ("", GF_LOG_WARNING, "volume name does not exist"); snprintf (errmsg, sizeof(errmsg), "Volume name %s does not" " exist", volname); - *op_errstr = gf_strdup (errmsg); ret = -1; goto out; } @@ -2295,39 +2378,40 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) goto out; } - ret = glusterd_get_statefile_name (volinfo, slave, conf_path, &statefile); + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + ret = glusterd_get_statefile_name (volinfo, slave, + conf_path, &statefile, + &is_template_in_use); if (ret) { - /* Checking if slave host is pingable, has proper passwordless - * ssh login setup */ - ret = glusterd_verify_slave (volname, slave_ip, slave_vol, - op_errstr, &is_force_blocker); + if (!strstr(slave, "::")) { + snprintf (errmsg, sizeof(errmsg), + "%s is not a valid slave url.", slave); + ret = -1; + goto out; + } else { + gf_log ("", GF_LOG_ERROR, + "state_file entry missing in config file (%s)", + conf_path); + + if ((type == GF_GSYNC_OPTION_TYPE_STOP) && is_force) { + gf_log ("", GF_LOG_WARNING, "Allowing stop " + "force to bypass missing statefile " + "entry in config file (%s), and " + "template file", conf_path); + ret = 0; + } else + goto out; + } + } else { + ret = dict_set_str (dict, "statefile", statefile); if (ret) { gf_log ("", GF_LOG_ERROR, - "%s is not a valid slave volume. Error: %s", - slave, *op_errstr); + "Unable to store statefile path"); goto out; } - - if (!strstr(slave, "::")) - snprintf (errmsg, sizeof (errmsg), - "%s is not a valid slave url.", slave); - else - snprintf (errmsg, sizeof (errmsg), - "Unable to get statefile's name"); - gf_log ("", GF_LOG_ERROR, "%s", errmsg); - ret = -1; - goto out; - } - - ret = dict_set_str (dict, "statefile", statefile); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Unable to store statefile path"); - goto out; } - is_force = dict_get_str_boolean (dict, "force", _gf_false); - /* Allowing stop force to bypass the statefile check * as this command acts as a fail safe method to stop geo-rep * session. */ @@ -2341,7 +2425,6 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) volinfo->volname, slave); gf_log ("", GF_LOG_ERROR, "%s. statefile = %s", errmsg, statefile); - *op_errstr = gf_strdup (errmsg); ret = -1; goto out; } @@ -2360,7 +2443,6 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) " down. Please bring up the peer and" " retry.", down_peerstr, volinfo->volname); - *op_errstr = gf_strdup (errmsg); ret = -1; GF_FREE (down_peerstr); down_peerstr = NULL; @@ -2371,18 +2453,26 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) switch (type) { case GF_GSYNC_OPTION_TYPE_START: + if (is_template_in_use) { + snprintf (errmsg, sizeof(errmsg), "state-file entry " + "missing in the config file(%s).", + conf_path); + ret = -1; + goto out; + } + /* don't attempt to start gsync if replace-brick is * in progress */ if (glusterd_is_rb_ongoing (volinfo)) { snprintf (errmsg, sizeof(errmsg), "replace-brick is in" " progress, not starting geo-replication"); - *op_errstr = gf_strdup (errmsg); ret = -1; goto out; } ret = glusterd_op_verify_gsync_start_options (volinfo, slave, - conf_path, statefile, + conf_path, + statefile, op_errstr, is_force); if (ret) goto out; @@ -2396,7 +2486,6 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) "start failed", strerror (errno)); snprintf (errmsg, sizeof(errmsg), "fuse unvailable"); - *op_errstr = gf_strdup (errmsg); ret = -1; goto out; } @@ -2405,6 +2494,14 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) case GF_GSYNC_OPTION_TYPE_STOP: if (!is_force) { + if (is_template_in_use) { + snprintf (errmsg, sizeof(errmsg), + "state-file entry missing in " + "the config file(%s).", conf_path); + ret = -1; + goto out; + } + ret = glusterd_op_verify_gsync_running (volinfo, slave, conf_path, op_errstr); @@ -2418,6 +2515,24 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) break; case GF_GSYNC_OPTION_TYPE_CONFIG: + if (is_template_in_use) { + snprintf (errmsg, sizeof(errmsg), "state-file entry " + "missing in the config file(%s).", + conf_path); + ret = -1; + goto out; + } + + pfd = gsyncd_getpidfile (volname, slave, pidfile, + conf_path, &is_template_in_use); + if (is_template_in_use) { + snprintf (errmsg, sizeof(errmsg), "pid-file entry " + "missing in the config file(%s).", + conf_path); + ret = -1; + goto out; + } + ret = gsync_verify_config_options (dict, op_errstr, volname); goto out; break; @@ -2425,13 +2540,19 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) case GF_GSYNC_OPTION_TYPE_DELETE: /* Check if the gsync session is still running * If so ask the user to stop geo-replication first.*/ + if (is_template_in_use) { + snprintf (errmsg, sizeof(errmsg), "state-file entry " + "missing in the config file(%s).", + conf_path); + ret = -1; + goto out; + } + ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); if (ret) { snprintf (errmsg, sizeof(errmsg), "Geo-replication" " session between %s and %s does not exist.", volinfo->volname, slave); - gf_log ("", GF_LOG_ERROR, "%s", errmsg); - *op_errstr = gf_strdup (errmsg); ret = -1; goto out; } else { @@ -2444,8 +2565,6 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) "still active. Please stop the " "session and retry.", volinfo->volname, slave); - gf_log ("", GF_LOG_ERROR, "%s", errmsg); - *op_errstr = gf_strdup (errmsg); ret = -1; goto out; } @@ -2455,36 +2574,49 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) if (ret) { snprintf (errmsg, sizeof (errmsg), "Unable to spawn gsyncd"); - *op_errstr = gf_strdup (errmsg); - gf_log ("", GF_LOG_ERROR, "%s", errmsg); } break; } out: + if (path_list) + GF_FREE (path_list); + + if (ret && errmsg[0] != '\0') { + gf_log (this->name, GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + } + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } static int stop_gsync (char *master, char *slave, char **msg, - char *conf_path, gf_boolean_t is_force) + char *conf_path, char **op_errstr, + gf_boolean_t is_force) { int32_t ret = 0; int pfd = -1; pid_t pid = 0; char pidfile[PATH_MAX] = {0,}; + char errmsg[PATH_MAX] = ""; char buf [1024] = {0,}; int i = 0; + gf_boolean_t is_template_in_use = _gf_false; GF_ASSERT (THIS); GF_ASSERT (THIS->private); - pfd = gsyncd_getpidfile (master, slave, pidfile, conf_path); - if (pfd == -2 && !is_force) { - gf_log ("", GF_LOG_ERROR, GEOREP" stop validation " - " failed for %s & %s", master, slave); + pfd = gsyncd_getpidfile (master, slave, pidfile, + conf_path, &is_template_in_use); + if (pfd == -2) { + snprintf (errmsg, sizeof(errmsg) - 1, + "pid-file entry mising in config file and " + "template config file."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); ret = -1; goto out; } @@ -2502,7 +2634,7 @@ stop_gsync (char *master, char *slave, char **msg, if (ret > 0) { pid = strtol (buf, NULL, 10); ret = kill (-pid, SIGTERM); - if (ret) { + if (ret && !is_force) { gf_log ("", GF_LOG_WARNING, "failed to kill gsyncd"); goto out; @@ -2526,8 +2658,6 @@ stop_gsync (char *master, char *slave, char **msg, out: sys_close (pfd); - if (is_force) - ret = 0; return ret; } @@ -2991,12 +3121,75 @@ out: return ret; } +int +glusterd_fetch_values_from_config (char *master, char *slave, + char *confpath, dict_t *confd, + char **statefile, + char **georep_session_wrkng_dir, + char **socketfile) +{ + int ret = 0; + + ret = glusterd_gsync_get_config (master, slave, confpath, + confd); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" + "for %s(master), %s(slave)", master, slave); + goto out; + } + + if (statefile) { + ret = dict_get_param (confd, "state_file", statefile); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get state_file's name " + "for %s(master), %s(slave). " + "Please check gsync config file.", + master, slave); + goto out; + } + } + + if (georep_session_wrkng_dir) { + ret = dict_get_param (confd, "georep_session_working_dir", + georep_session_wrkng_dir); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get geo-rep session's " + "working directory name for %s(master), " + "%s(slave). Please check gsync config file.", + master, slave); + goto out; + } + } + + if (socketfile) { + ret = dict_get_param (confd, "state_socket_unencoded", + socketfile); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get socket file's name " + "for %s(master), %s(slave). " + "Please check gsync config file.", + master, slave); + goto out; + } + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + int glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, char *conf_path, dict_t *dict, char *node) { char brick_state_file[PATH_MAX] = ""; char brick_path[PATH_MAX] = ""; + char temp_conf_path[PATH_MAX] = ""; + char *working_conf_path = NULL; char *georep_session_wrkng_dir = NULL; char *master = NULL; char tmp[1024] = ""; @@ -3010,37 +3203,74 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, int ret = 0; glusterd_brickinfo_t *brickinfo = NULL; gf_gsync_status_t *sts_val = NULL; + gf_boolean_t is_template_in_use = _gf_false; glusterd_conf_t *priv = NULL; + struct stat stbuf = {0,}; GF_ASSERT (THIS); GF_ASSERT (THIS->private); GF_ASSERT (volinfo); + GF_ASSERT (conf_path); master = volinfo->volname; confd = dict_new (); - if (!dict) { + if (!confd) { gf_log ("", GF_LOG_ERROR, "Not able to create dict."); return -1; } priv = THIS->private; - ret = glusterd_gsync_get_config (master, slave, conf_path, - confd); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" - "for %s(master), %s(slave)", master, slave); - goto out; - - } + snprintf (temp_conf_path, sizeof(temp_conf_path) - 1, + "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); - ret = dict_get_param (confd, "state_file", &statefile); + ret = lstat (conf_path, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_INFO, "Using passed config template(%s).", + conf_path); + working_conf_path = conf_path; + } else { + gf_log ("", GF_LOG_WARNING, "Config file (%s) missing. " + "Looking for template config file (%s)", + conf_path, temp_conf_path); + ret = lstat (temp_conf_path, &stbuf); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Template config file (%s) missing.", + temp_conf_path); + goto out; + } + gf_log ("", GF_LOG_INFO, "Using default config template(%s).", + temp_conf_path); + working_conf_path = temp_conf_path; + is_template_in_use = _gf_true; + } + +fetch_data: + ret = glusterd_fetch_values_from_config (master, slave, + working_conf_path, + confd, + &statefile, + &georep_session_wrkng_dir, + &socketfile); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name " - "for %s(master), %s(slave). Please check gsync " - "config file.", master, slave); - goto out; + if (is_template_in_use == _gf_false) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch config values " + "for %s(master), %s(slave). " + "Trying default config template", + master, slave); + working_conf_path = temp_conf_path; + is_template_in_use = _gf_true; + goto fetch_data; + } else { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch config values " + "for %s(master), %s(slave)", + master, slave); + goto out; + } } ret = glusterd_gsync_read_frm_status (statefile, monitor_status, @@ -3051,23 +3281,6 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, strncpy (monitor_status, "defunct", sizeof (monitor_status)); } - ret = dict_get_param (confd, "georep_session_working_dir", - &georep_session_wrkng_dir); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get geo-rep session's " - "working directory name for %s(master), %s(slave). " - "Please check gsync config file.", master, slave); - goto out; - } - - ret = dict_get_param (confd, "state_socket_unencoded", &socketfile); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get socket file's name " - "for %s(master), %s(slave). Please check gsync " - "config file.", master, slave); - goto out; - } - ret = dict_get_int32 (dict, "gsync-count", &gsync_count); if (ret) gsync_count = 0; @@ -3167,6 +3380,13 @@ store_status: sts_val->checkpoint_status[ret] = '\0'; } + if (is_template_in_use) { + ret = snprintf (sts_val->worker_status, + sizeof(sts_val->worker_status), + "Config Corrupted"); + sts_val->worker_status[ret] = '\0'; + } + if (strcmp (sts_val->worker_status, "Active")) { ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); sts_val->checkpoint_status[ret] = '\0'; @@ -3202,7 +3422,7 @@ store_status: goto out; out: - dict_destroy (confd); + dict_unref (confd); return 0; } @@ -3217,6 +3437,7 @@ glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, glusterd_conf_t *priv = NULL; char *status_msg = NULL; gf_boolean_t is_running = _gf_false; + char *op_errstr = NULL; GF_ASSERT (volinfo); GF_ASSERT (slave); @@ -3233,7 +3454,8 @@ glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, goto out; ret = stop_gsync (volinfo->volname, slave, &status_msg, - conf_path, is_force); + conf_path, &op_errstr, + is_force); if (ret == 0 && status_msg) ret = dict_set_str (resp_dict, "gsync-status", status_msg); @@ -3347,6 +3569,7 @@ glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo, uuid_t uuid = {0, }; glusterd_conf_t *priv = NULL; int ret = 0; + gf_boolean_t is_template_in_use = _gf_false; struct stat stbuf = {0, }; GF_ASSERT (volinfo); @@ -3362,7 +3585,8 @@ glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo, "session is not active", volinfo->volname, slave); ret = glusterd_get_statefile_name (volinfo, slave, - conf_path, &statefile); + conf_path, &statefile, + &is_template_in_use); if (ret) { if (!strstr(slave, "::")) gf_log ("", GF_LOG_INFO, @@ -4027,14 +4251,11 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } - ret = stop_gsync (volname, slave, &status_msg, - conf_path, is_force); + ret = stop_gsync (volname, slave, &status_msg, conf_path, + op_errstr, is_force); if (ret == 0 && status_msg) ret = dict_set_str (rsp_dict, "gsync-status", status_msg); - if (ret != 0 && !is_force && path_list) - *op_errstr = gf_strdup ("internal error"); - if (!ret) { ret = glusterd_create_status_file (volinfo->volname, slave, slave_ip, @@ -4179,6 +4400,7 @@ glusterd_get_slave_info (char *slave, char **slave_ip, gf_log ("", GF_LOG_ERROR, "Failed to gf_strdup"); ret = -1; + GF_FREE (*slave_ip); goto out; } gf_log ("", GF_LOG_DEBUG, "Slave Vol : %s", *slave_vol); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 4a73b08f43a..cb673192d0c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -286,7 +286,8 @@ glusterd_are_all_volumes_stopped (); int glusterd_stop_bricks (glusterd_volinfo_t *volinfo); int -gsync_status (char *master, char *slave, char *conf_path, int *status); +gsync_status (char *master, char *slave, char *conf_path, + int *status, gf_boolean_t *is_template_in_use); int glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 89682724478..9c8d9940142 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -6392,11 +6392,13 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) char buf[1024] = "faulty"; int uuid_len = 0; int ret = 0; + int ret_status = 0; char uuid_str[64] = {0}; glusterd_volinfo_t *volinfo = NULL; char confpath[PATH_MAX] = ""; char *op_errstr = NULL; glusterd_conf_t *priv = NULL; + gf_boolean_t is_template_in_use = _gf_false; GF_ASSERT (THIS); priv = THIS->private; @@ -6434,7 +6436,8 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) /* Fetching the last status of the node */ ret = glusterd_get_statefile_name (volinfo, slave, - confpath, &statefile); + confpath, &statefile, + &is_template_in_use); if (ret) { if (!strstr(slave, "::")) gf_log ("", GF_LOG_INFO, @@ -6445,6 +6448,34 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) goto out; } + /* If state-file entry is missing from the config file, + * do not start gsyncd on restart */ + if (is_template_in_use) { + gf_log ("", GF_LOG_INFO, + "state-file entry is missing in config file." + "Not Restarting"); + goto out; + } + + is_template_in_use = _gf_false; + + ret = gsync_status (volinfo->volname, slave, confpath, + &ret_status, &is_template_in_use); + if (ret == -1) { + gf_log ("", GF_LOG_INFO, + GEOREP" start option validation failed "); + ret = 0; + goto out; + } + + if (is_template_in_use == _gf_true) { + gf_log ("", GF_LOG_INFO, + "pid-file entry is missing in config file." + "Not Restarting"); + ret = 0; + goto out; + } + ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); if (ret < 0) { gf_log ("", GF_LOG_ERROR, "Unable to read the status"); @@ -6463,10 +6494,36 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) goto out; } + if ((!strcmp (buf, "Config Corrupted"))) { + gf_log ("", GF_LOG_INFO, + "Recovering from a corrupted config. " + "Not Restarting. Use start (force) to " + "start the session between %s and %s::%s.", + volinfo->volname, + slave_ip, slave_vol); + goto out; + } + glusterd_start_gsync (volinfo, slave, path_list, confpath, uuid_str, NULL); out: + if (statefile) + GF_FREE (statefile); + + if (is_template_in_use) { + ret = glusterd_create_status_file (volinfo->volname, slave, + slave_ip, slave_vol, + "Config Corrupted"); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to create status file" + ". Error : %s", strerror (errno)); + ret = -1; + goto out; + } + } + GF_FREE (path_list); GF_FREE (op_errstr); @@ -8066,6 +8123,7 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, xlator_t *this = NULL; glusterd_conf_t *priv = NULL; int errcode = 0; + gf_boolean_t is_template_in_use = _gf_false; this = THIS; GF_ASSERT (this); @@ -8081,10 +8139,19 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, goto out; } - ret = gsync_status (master_vol->volname, slave, conf_path, &status); + ret = gsync_status (master_vol->volname, slave, conf_path, + &status, &is_template_in_use); if (status == 0) goto out; + if (is_template_in_use == _gf_true) { + gf_asprintf (op_errstr, GEOREP" start failed for %s %s : " + "pid-file entry missing in config file", + master_vol->volname, slave); + ret = -1; + goto out; + } + uuid_utoa_r (master_vol->volume_id, uuid_str); runinit (&runner); runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 84fa89b0e2d..2d1afc379a8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -633,11 +633,16 @@ glusterd_get_slave_info (char *slave, char **slave_ip, int glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, - char *conf_path, char **statefile); + char *conf_path, char **statefile, + gf_boolean_t *is_template_in_use); int glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); +int +glusterd_create_status_file (char *master, char *slave, char *slave_ip, + char *slave_vol, char *status); + int glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, dict_t *resp_dict, char *path_list, -- cgit