From b198e072cda4bbb98e19701399c4bb4f0743cf20 Mon Sep 17 00:00:00 2001 From: Ajeet Jha Date: Mon, 2 Dec 2013 12:55:18 +0530 Subject: glusterd/geo-rep: more glusterd and cli fixes for geo-rep. -> handle option validation cases in reset case. -> Creating valid conf path when glusterd restarts. -> Reading the gsyncd worker thread status and displaying it. -> Displaying status-detail per worker. -> Fetch checkpoint info in geo-rep status. -> use-tarssh value validation added. misc: misc geo-rep fixes based on cluster, logrotate etc.. -> cluster/dht: fix 'stime' getxattr getting overwritten. -> cluster/afr: return max of 'stime' values in subvol. -> geo-rep-logrotate: Sending SIGHUP to geo-rep auxiliary. -> cluster/dht: fix convoluted logic while aggregating. -> cluster/*: fix 'stime' min/max fetch logic. Change-Id: I811acea0bbd6194797a3e55d89295d1ea021ac85 BUG: 1036552 Signed-off-by: Ajeet Jha Reviewed-on: http://review.gluster.org/6405 Tested-by: Gluster Build System Reviewed-by: Amar Tumballi Reviewed-by: Anand Avati --- xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 563 +++++++++++++++-------- xlators/mgmt/glusterd/src/glusterd-mountbroker.c | 1 - xlators/mgmt/glusterd/src/glusterd-utils.c | 136 +++--- xlators/mgmt/glusterd/src/glusterd-utils.h | 11 + xlators/mgmt/glusterd/src/glusterd.c | 21 +- xlators/mgmt/glusterd/src/glusterd.h | 2 +- 6 files changed, 483 insertions(+), 251 deletions(-) (limited to 'xlators/mgmt') diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 5786694bd..5d724cc2a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -28,17 +28,6 @@ static int dict_get_param (dict_t *dict, char *key, char **param); -static int -glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, - char *conf_path, char **statefile); - -static int -glusterd_get_slave_info (char *slave, char **slave_ip, - char **slave_vol, char **op_errstr); - -static int -glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); - struct gsync_config_opt_vals_ gsync_confopt_vals[] = { {.op_name = "change_detector", .no_of_pos_vals = 2, @@ -55,6 +44,11 @@ struct gsync_config_opt_vals_ gsync_confopt_vals[] = { .case_sensitive = _gf_false, .values = {"critical", "error", "warning", "info", "debug"} }, + {.op_name = "use-tarssh", + .no_of_pos_vals = 6, + .case_sensitive = _gf_false, + .values = {"true", "false", "0", "1", "yes", "no"} + }, {.op_name = NULL, }, }; @@ -74,6 +68,11 @@ static char *gsync_reserved_opts[] = { NULL }; +static char *gsync_no_restart_opts[] = { + "checkpoint", + NULL +}; + int __glusterd_handle_sys_exec (rpcsvc_request_t *req) { @@ -899,6 +898,8 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) } if (op_match) { + if (!op_value) + goto out; val_match = _gf_false; for (i = 0; i < conf_vals->no_of_pos_vals; i++) { if(conf_vals->case_sensitive){ @@ -912,7 +913,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) if (!val_match) { ret = snprintf (errmsg, sizeof(errmsg) - 1, - "Invalid values (%s) for" + "Invalid value(%s) for" " option %s", op_value, op_name); errmsg[ret] = '\0'; @@ -923,7 +924,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) } } } - +out: return 0; } @@ -1581,7 +1582,7 @@ out: return ret; } -static int +int glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, char *conf_path, char **statefile) { @@ -1736,7 +1737,7 @@ glusterd_verify_slave (char *volname, char *slave_ip, char *slave, gf_log ("", GF_LOG_ERROR, "Not a valid slave"); ret = glusterd_gsync_read_frm_status (log_file_path, buf, sizeof(buf)); - if (ret) { + if (ret <= 0) { gf_log ("", GF_LOG_ERROR, "Unable to read from %s", log_file_path); goto out; @@ -2391,6 +2392,8 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, char *slave_ip = NULL; char *slave_vol = NULL; struct stat stbuf = {0, }; + gf_boolean_t restart_required = _gf_true; + char **resopt = NULL; GF_ASSERT (slave); GF_ASSERT (op_errstr); @@ -2495,18 +2498,28 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, out: if (!ret && volinfo) { + for (resopt = gsync_no_restart_opts; *resopt; resopt++) { + restart_required = _gf_true; + if (!strcmp ((*resopt), op_name)){ + restart_required = _gf_false; + break; + } + } + + if (restart_required) { ret = glusterd_check_restart_gsync_session (volinfo, slave, resp_dict, path_list, conf_path, 0); if (ret) - *op_errstr = gf_strdup ("internal error"); + *op_errstr = gf_strdup ("internal error"); + } } gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } -static int +int glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) { int ret = 0; @@ -2530,7 +2543,6 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) char *p = buf + len - 1; while (isspace (*p)) *p-- = '\0'; - ret = 0; } } else if (ret < 0) gf_log ("", GF_LOG_ERROR, "Status file of gsyncd is corrupt"); @@ -2540,20 +2552,146 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) } static int -glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen) +dict_get_param (dict_t *dict, char *key, char **param) +{ + char *dk = NULL; + char *s = NULL; + char x = '\0'; + int ret = 0; + + if (dict_get_str (dict, key, param) == 0) + return 0; + + dk = gf_strdup (key); + if (!key) + return -1; + + s = strpbrk (dk, "-_"); + if (!s) + return -1; + x = (*s == '-') ? '_' : '-'; + *s++ = x; + while ((s = strpbrk (s, "-_"))) + *s++ = x; + + ret = dict_get_str (dict, dk, param); + + GF_FREE (dk); + return ret; +} + +static int +glusterd_parse_gsync_status (char *buf, gf_gsync_status_t *sts_val) +{ + int ret = -1; + int i = -1; + int num_of_fields = 8; + char *token = NULL; + char **tokens = NULL; + char **ptr = NULL; + char *save_ptr = NULL; + char na_buf[] = "N/A"; + + if (!buf) { + gf_log ("", GF_LOG_ERROR, "Empty buf"); + goto out; + } + + tokens = calloc (num_of_fields, sizeof (char *)); + if (!tokens) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + goto out; + } + + ptr = tokens; + + for (token = strtok_r (buf, ",", &save_ptr); token; + token = strtok_r (NULL, ",", &save_ptr)) { + *ptr = gf_strdup(token); + if (!*ptr) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + goto out; + } + ptr++; + } + + for (i = 0; i < num_of_fields; i++) { + token = strtok_r (tokens[i], ":", &save_ptr); + token = strtok_r (NULL, "\0", &save_ptr); + token++; + + /* token NULL check */ + if (!token && (i != 0) && + (i != 5) && (i != 7)) + token = na_buf; + + if (i == 0) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 1] = '\0'; + } + memcpy (sts_val->slave_node, token, strlen(token)); + } + if (i == 1) + memcpy (sts_val->files_syncd, token, strlen(token)); + if (i == 2) + memcpy (sts_val->purges_remaining, token, strlen(token)); + if (i == 3) + memcpy (sts_val->total_files_skipped, token, strlen(token)); + if (i == 4) + memcpy (sts_val->files_remaining, token, strlen(token)); + if (i == 5) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 1] = '\0'; + } + memcpy (sts_val->worker_status, token, strlen(token)); + } + if (i == 6) + memcpy (sts_val->bytes_remaining, token, strlen(token)); + if (i == 7) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 2] = '\0'; + } + memcpy (sts_val->crawl_status, token, strlen(token)); + } + } + + ret = 0; +out: + for (i = 0; i< num_of_fields; i++) + if (tokens[i]) + GF_FREE(tokens[i]); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_gsync_fetch_status_extra (char *path, gf_gsync_status_t *sts_val) { char sockpath[PATH_MAX] = {0,}; struct sockaddr_un sa = {0,}; - size_t l = 0; int s = -1; struct pollfd pfd = {0,}; int ret = 0; - l = strlen (buf); - /* seek to end of data in buf */ - buf += l; - blen -= l; - glusterd_set_socket_filepath (path, sockpath, sizeof (sockpath)); strncpy(sa.sun_path, sockpath, sizeof(sa.sun_path)); @@ -2581,66 +2719,40 @@ glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen) ret = -1; goto out; } - ret = read(s, buf, blen); + ret = read(s, sts_val->checkpoint_status, + sizeof(sts_val->checkpoint_status)); /* we expect a terminating 0 byte */ - if (ret == 0 || (ret > 0 && buf[ret - 1])) + if (ret == 0 || (ret > 0 && sts_val->checkpoint_status[ret - 1])) ret = -1; - if (ret > 0) + if (ret > 0) { ret = 0; + } - out: +out: close (s); return ret; } -static int -dict_get_param (dict_t *dict, char *key, char **param) -{ - char *dk = NULL; - char *s = NULL; - char x = '\0'; - int ret = 0; - - if (dict_get_str (dict, key, param) == 0) - return 0; - - dk = gf_strdup (key); - if (!key) - return -1; - - s = strpbrk (dk, "-_"); - if (!s) - return -1; - x = (*s == '-') ? '_' : '-'; - *s++ = x; - while ((s = strpbrk (s, "-_"))) - *s++ = x; - - ret = dict_get_str (dict, dk, param); - - GF_FREE (dk); - return ret; -} - -static int +int glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, char *conf_path, dict_t *dict, char *node) { - glusterd_conf_t *priv = NULL; - int ret = 0; - char *statefile = NULL; - char *master = NULL; - char buf[1024] = "defunct"; - char nds[1024] = {0, }; - char mst[1024] = {0, }; - char slv[1024] = {0, }; - char sts[1024] = {0, }; - char *bufp = NULL; - dict_t *confd = NULL; - int gsync_count = 0; - int status = 0; - char *dyn_node = NULL; - char *path_list = NULL; + char brick_state_file[PATH_MAX] = ""; + char brick_path[PATH_MAX] = ""; + char *georep_session_wrkng_dir = NULL; + char *master = NULL; + char tmp[1024] = ""; + char sts_val_name[1024] = ""; + char monitor_status[PATH_MAX] = ""; + char *statefile = NULL; + char *socketfile = NULL; + dict_t *confd = NULL; + int gsync_count = 0; + int i = 0; + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + gf_gsync_status_t *sts_val = NULL; + glusterd_conf_t *priv = NULL; GF_ASSERT (THIS); GF_ASSERT (THIS->private); @@ -2661,7 +2773,7 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" "for %s(master), %s(slave)", master, slave); - goto done; + goto out; } @@ -2670,120 +2782,168 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name " "for %s(master), %s(slave). Please check gsync " "config file.", master, slave); - goto done; + goto out; } - ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); - if (ret) { + + ret = glusterd_gsync_read_frm_status (statefile, monitor_status, + sizeof (monitor_status)); + if (ret <= 0) { gf_log ("", GF_LOG_ERROR, "Unable to read the status" "file for %s(master), %s(slave)", master, slave); - strncpy (buf, "defunct", sizeof (buf)); - goto done; - } - - ret = gsync_status (master, slave, conf_path, &status); - if (ret == 0 && status == -1) { - if ((strcmp (buf, "Not Started")) && - (strcmp (buf, "Stopped"))) - strncpy (buf, "defunct", sizeof (buf)); - goto done; - } else if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Unable to get gsync status"); - goto done; + strncpy (monitor_status, "defunct", sizeof (monitor_status)); } - if (strcmp (buf, "Stable") != 0) - goto done; - - ret = dict_get_param (confd, "state_socket_unencoded", &statefile); + ret = dict_get_param (confd, "georep_session_working_dir", + &georep_session_wrkng_dir); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get state_socket_unencoded" - " filepath. Please check gsync config file."); - goto done; + gf_log ("", GF_LOG_ERROR, "Unable to get geo-rep session's " + "working directory name for %s(master), %s(slave). " + "Please check gsync config file.", master, slave); + goto out; } - ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf)); + + ret = dict_get_param (confd, "state_socket_unencoded", &socketfile); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status" - "for %s(master), %s(slave)", master, slave); - /* there is a slight chance that this occurs due to race - * -- in that case, the following options all seem bad: - * - * - suppress irregurlar behavior by just leaving status - * on "OK" - * - freak out users with a misleading "defunct" - * - overload the meaning of the regular error signal - * mechanism of gsyncd, that is, when status is "faulty" - * - * -- so we just come up with something new... - */ - strncpy (buf, "N/A", sizeof (buf)); - goto done; + gf_log ("", GF_LOG_ERROR, "Unable to get socket file's name " + "for %s(master), %s(slave). Please check gsync " + "config file.", master, slave); + goto out; } - done: - if ((!strcmp (buf, "defunct")) || - (!strcmp (buf, "Not Started")) || - (!strcmp (buf, "Stopped"))) { - ret = glusterd_get_local_brickpaths (volinfo, &path_list); - if (!path_list) { - gf_log ("", GF_LOG_DEBUG, "This node not being part of" - " volume should not be running gsyncd. Hence" - " shouldn't display status for this node."); - ret = 0; + ret = dict_get_int32 (dict, "gsync-count", &gsync_count); + if (ret) + gsync_count = 0; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t), + gf_common_mt_gsync_status_t); + if (!sts_val) { + gf_log ("", GF_LOG_ERROR, "Out Of Memory"); goto out; } - } - ret = dict_get_int32 (dict, "gsync-count", &gsync_count); + /* Creating the brick state file's path */ + memset(brick_state_file, '\0', PATH_MAX); + memcpy (brick_path, brickinfo->path, PATH_MAX - 1); + for (i = 0; i < strlen(brick_path) - 1; i++) + if (brick_path[i] == '/') + brick_path[i] = '_'; + ret = snprintf(brick_state_file, PATH_MAX - 1, "%s%s.status", + georep_session_wrkng_dir, brick_path); + brick_state_file[ret] = '\0'; + + gf_log ("", GF_LOG_DEBUG, "brick_state_file = %s", brick_state_file); + + memset (tmp, '\0', sizeof(tmp)); + + ret = glusterd_gsync_read_frm_status (brick_state_file, + tmp, sizeof (tmp)); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read the status" + "file for %s brick for %s(master), %s(slave) " + "session", brickinfo->path, master, slave); + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); + sts_val->worker_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A"); + sts_val->files_syncd[ret] = '\0'; + ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A"); + sts_val->purges_remaining[ret] = '\0'; + ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A"); + sts_val->total_files_skipped[ret] = '\0'; + ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A"); + sts_val->files_remaining[ret] = '\0'; + ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A"); + sts_val->bytes_remaining[ret] = '\0'; + goto store_status; + } - if (ret) - gsync_count = 1; - else - gsync_count++; + ret = glusterd_gsync_fetch_status_extra (socketfile, sts_val); + if (ret || strlen(sts_val->checkpoint_status) == 0) { + gf_log ("", GF_LOG_DEBUG, "No checkpoint status" + "for %s(master), %s(slave)", master, slave); + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + } - (void) snprintf (nds, sizeof (nds), "node%d", gsync_count); - dyn_node = gf_strdup (node); - if (!dyn_node) - goto out; - ret = dict_set_dynstr (dict, nds, dyn_node); - if (ret) { - GF_FREE (dyn_node); - goto out; - } + ret = glusterd_parse_gsync_status (tmp, sts_val); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to parse the gsync status for %s", + brickinfo->path); + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); + sts_val->worker_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A"); + sts_val->files_syncd[ret] = '\0'; + ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A"); + sts_val->purges_remaining[ret] = '\0'; + ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A"); + sts_val->total_files_skipped[ret] = '\0'; + ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A"); + sts_val->files_remaining[ret] = '\0'; + ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A"); + sts_val->bytes_remaining[ret] = '\0'; + } - snprintf (mst, sizeof (mst), "master%d", gsync_count); - master = gf_strdup (master); - if (!master) - goto out; - ret = dict_set_dynstr (dict, mst, master); - if (ret) { - GF_FREE (master); - goto out; - } +store_status: + if ((strcmp (monitor_status, "Stable"))) { + memcpy (sts_val->worker_status, monitor_status, strlen(monitor_status)); + sts_val->worker_status[strlen(monitor_status)] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + } - snprintf (slv, sizeof (slv), "slave%d", gsync_count); - slave = gf_strdup (slave); - if (!slave) - goto out; - ret = dict_set_dynstr (dict, slv, slave); - if (ret) { - GF_FREE (slave); - goto out; - } + if (strcmp (sts_val->worker_status, "Active")) { + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + } - snprintf (sts, sizeof (slv), "status%d", gsync_count); - bufp = gf_strdup (buf); - if (!bufp) - goto out; - ret = dict_set_dynstr (dict, sts, bufp); - if (ret) { - GF_FREE (bufp); - goto out; + if (!strcmp (sts_val->slave_node, "N/A")) { + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + } + + memcpy (sts_val->node, node, strlen(node)); + sts_val->node[strlen(node)] = '\0'; + memcpy (sts_val->brick, brickinfo->path, strlen(brickinfo->path)); + sts_val->brick[strlen(brickinfo->path)] = '\0'; + memcpy (sts_val->master, master, strlen(master)); + sts_val->master[strlen(master)] = '\0'; + + snprintf (sts_val_name, sizeof (sts_val_name), "status_value%d", gsync_count); + ret = dict_set_bin (dict, sts_val_name, sts_val, sizeof(gf_gsync_status_t)); + if (ret) { + GF_FREE (sts_val); + goto out; + } + + gsync_count++; + sts_val = NULL; } + ret = dict_set_int32 (dict, "gsync-count", gsync_count); if (ret) goto out; - out: +out: dict_destroy (confd); return 0; @@ -3246,30 +3406,32 @@ glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } - ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); - if (ptr) { - ret = dict_get_int32 (rsp_dict, "output_count", &output_count); - if (ret) - output_count = 1; - else - output_count++; - memset (output_name, '\0', sizeof (output_name)); - snprintf (output_name, sizeof (output_name), - "output_%d", output_count); - if (buf[strlen(buf) - 1] == '\n') - buf[strlen(buf) - 1] = '\0'; - bufp = gf_strdup (buf); - if (!bufp) - gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); - ret = dict_set_dynstr (rsp_dict, output_name, bufp); - if (ret) { - GF_FREE (bufp); - gf_log ("", GF_LOG_ERROR, "output set failed."); + do { + ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + ret = dict_get_int32 (rsp_dict, "output_count", &output_count); + if (ret) + output_count = 1; + else + output_count++; + memset (output_name, '\0', sizeof (output_name)); + snprintf (output_name, sizeof (output_name), + "output_%d", output_count); + if (buf[strlen(buf) - 1] == '\n') + buf[strlen(buf) - 1] = '\0'; + bufp = gf_strdup (buf); + if (!bufp) + gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); + ret = dict_set_dynstr (rsp_dict, output_name, bufp); + if (ret) { + GF_FREE (bufp); + gf_log ("", GF_LOG_ERROR, "output set failed."); + } + ret = dict_set_int32 (rsp_dict, "output_count", output_count); + if (ret) + gf_log ("", GF_LOG_ERROR, "output_count set failed."); } - ret = dict_set_int32 (rsp_dict, "output_count", output_count); - if (ret) - gf_log ("", GF_LOG_ERROR, "output_count set failed."); - } + } while (ptr); ret = runner_end (&runner); if (ret) { @@ -3708,7 +3870,7 @@ out: } -static int +int glusterd_get_slave_info (char *slave, char **slave_ip, char **slave_vol, char **op_errstr) { @@ -3888,7 +4050,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", ".", NULL); RUN_GSYNCD_CMD; @@ -3902,6 +4064,16 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* ssh-command tar */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "ssh-command-tar"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/tar_ssh.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* pid-file */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_arg (&runner, "pid-file"); @@ -3909,6 +4081,13 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* geo-rep-working-dir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "georep-session-working-dir"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* state-file */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_arg (&runner, "state-file"); @@ -3986,7 +4165,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", NULL); RUN_GSYNCD_CMD; diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c index 0d67d1303..4ce441da8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c @@ -231,7 +231,6 @@ parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc) const char *georep_mnt_desc_template = "SUP(" - "xlator-option=\\*-dht.assert-no-child-down=true " "volfile-server=localhost " "client-pid=%d " "user-map-root=%s " diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 15c40f3e4..8cf9f790f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -4591,14 +4591,24 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) { char *path_list = NULL; char *slave = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *statefile = NULL; + char buf[1024] = "faulty"; int uuid_len = 0; int ret = 0; char uuid_str[64] = {0}; - glusterd_volinfo_t *volinfo = NULL; - char *conf_path = NULL; + glusterd_volinfo_t *volinfo = NULL; + char confpath[PATH_MAX] = ""; + char *op_errstr = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (THIS); + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (data); volinfo = data; - GF_ASSERT (volinfo); slave = strchr(value->data, ':'); if (slave) slave ++; @@ -4608,22 +4618,63 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) strncpy (uuid_str, (char*)value->data, uuid_len); + /* Getting Local Brickpaths */ ret = glusterd_get_local_brickpaths (volinfo, &path_list); - ret = dict_get_str (this, "conf_path", &conf_path); + /*Generating the conf file path needed by gsyncd */ + ret = glusterd_get_slave_info (slave, &slave_ip, + &slave_vol, &op_errstr); if (ret) { gf_log ("", GF_LOG_ERROR, - "Unable to fetch conf file path."); + "Unable to fetch slave details."); + ret = -1; goto out; } - glusterd_start_gsync (volinfo, slave, path_list, conf_path, - uuid_str, NULL); + ret = snprintf (confpath, sizeof(confpath) - 1, + "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, volinfo->volname, + slave_ip, slave_vol); + confpath[ret] = '\0'; + + /* Fetching the last status of the node */ + ret = glusterd_get_statefile_name (volinfo, slave, + confpath, &statefile); + if (ret) { + if (!strstr(slave, "::")) + gf_log ("", GF_LOG_INFO, + "%s is not a valid slave url.", slave); + else + gf_log ("", GF_LOG_INFO, "Unable to get" + " statefile's name"); + goto out; + } + + ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read the status"); + goto out; + } - GF_FREE (path_list); - path_list = NULL; + /* Looks for the last status, to find if the sessiom was running + * when the node went down. If the session was not started or + * not started, do not restart the geo-rep session */ + if ((!strcmp (buf, "Not Started")) || + (!strcmp (buf, "Stopped"))) { + gf_log ("", GF_LOG_INFO, + "Geo-Rep Session was not started between " + "%s and %s::%s. Not Restarting", volinfo->volname, + slave_ip, slave_vol); + goto out; + } + + glusterd_start_gsync (volinfo, slave, path_list, confpath, + uuid_str, NULL); out: + if (path_list) + GF_FREE (path_list); + return ret; } @@ -7205,21 +7256,16 @@ glusterd_append_gsync_status (dict_t *dst, dict_t *src) } -static int32_t +int32_t glusterd_append_status_dicts (dict_t *dst, dict_t *src) { - int dst_count = 0; - int src_count = 0; - int i = 0; - int ret = 0; - char mst[PATH_MAX] = {0,}; - char slv[PATH_MAX] = {0, }; - char sts[PATH_MAX] = {0, }; - char nds[PATH_MAX] = {0, }; - char *mst_val = NULL; - char *slv_val = NULL; - char *sts_val = NULL; - char *nds_val = NULL; + char sts_val_name[PATH_MAX] = {0, }; + int dst_count = 0; + int src_count = 0; + int i = 0; + int ret = 0; + gf_gsync_status_t *sts_val = NULL; + gf_gsync_status_t *dst_sts_val = NULL; GF_ASSERT (dst); @@ -7237,49 +7283,29 @@ glusterd_append_status_dicts (dict_t *dst, dict_t *src) goto out; } - for (i = 1; i <= src_count; i++) { - snprintf (nds, sizeof(nds), "node%d", i); - snprintf (mst, sizeof(mst), "master%d", i); - snprintf (slv, sizeof(slv), "slave%d", i); - snprintf (sts, sizeof(sts), "status%d", i); - - ret = dict_get_str (src, nds, &nds_val); - if (ret) - goto out; - - ret = dict_get_str (src, mst, &mst_val); - if (ret) - goto out; - - ret = dict_get_str (src, slv, &slv_val); - if (ret) - goto out; + for (i = 0; i < src_count; i++) { + memset (sts_val_name, '\0', sizeof(sts_val_name)); + snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i); - ret = dict_get_str (src, sts, &sts_val); + ret = dict_get_bin (src, sts_val_name, (void **) &sts_val); if (ret) goto out; - snprintf (nds, sizeof(nds), "node%d", i+dst_count); - snprintf (mst, sizeof(mst), "master%d", i+dst_count); - snprintf (slv, sizeof(slv), "slave%d", i+dst_count); - snprintf (sts, sizeof(sts), "status%d", i+dst_count); - - ret = dict_set_dynstr (dst, nds, gf_strdup (nds_val)); - if (ret) + dst_sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t), + gf_common_mt_gsync_status_t); + if (!dst_sts_val) { + gf_log ("", GF_LOG_ERROR, "Out Of Memory"); goto out; + } - ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val)); - if (ret) - goto out; + memcpy (dst_sts_val, sts_val, sizeof(gf_gsync_status_t)); - ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val)); - if (ret) - goto out; + memset (sts_val_name, '\0', sizeof(sts_val_name)); + snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i + dst_count); - ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val)); + ret = dict_set_bin (dst, sts_val_name, dst_sts_val, sizeof(gf_gsync_status_t)); if (ret) goto out; - } ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 970b1f8a6..20cd00cbe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -582,6 +582,17 @@ glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict, char **slave_ip, char **slave_vol, char **conf_path, char **op_errstr); +int +glusterd_get_slave_info (char *slave, char **slave_ip, + char **slave_vol, char **op_errstr); + +int +glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile); + +int +glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); + int glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, dict_t *resp_dict, char *path_list, diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index c2be2c9da..58833869a 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -594,7 +594,7 @@ configure_syncdaemon (glusterd_conf_t *conf) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", ".", NULL); RUN_GSYNCD_CMD; @@ -608,6 +608,16 @@ configure_syncdaemon (glusterd_conf_t *conf) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* ssh-command tar */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "ssh-command-tar"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/tar_ssh.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* pid-file */ runinit_gsyncd_setrx (&runner, conf); runner_add_arg (&runner, "pid-file"); @@ -615,6 +625,13 @@ configure_syncdaemon (glusterd_conf_t *conf) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* geo-rep working dir */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "georep-session-working-dir"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* state-file */ runinit_gsyncd_setrx (&runner, conf); runner_add_arg (&runner, "state-file"); @@ -701,7 +718,7 @@ configure_syncdaemon (glusterd_conf_t *conf) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", NULL); RUN_GSYNCD_CMD; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index e1e9e591f..23b4205b0 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -47,7 +47,7 @@ #define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" #define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" #define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub" -#define GEO_CONF_MAX_OPT_VALS 5 +#define GEO_CONF_MAX_OPT_VALS 6 #define GLUSTERD_CREATE_HOOK_SCRIPT "/hooks/1/gsync-create/post/" \ "S56glusterd-geo-rep-create-post.sh" -- cgit