diff options
| author | Ajeet Jha <ajha@redhat.com> | 2013-12-02 12:55:18 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2014-01-27 09:44:33 -0800 | 
| commit | f33d09f23b089bd07437eb714f8dffa43460d6b5 (patch) | |
| tree | d574dcc8b030a59425d9b0717879eb4033866c3e /xlators | |
| parent | 30592e7f92515c5df620f300d6a3df6373ac6200 (diff) | |
glusterd/geo-rep: more glusterd and cli fixes for geo-rep.
    -> handle option validation cases in reset case.
    -> Creating valid conf path when glusterd restarts.
    -> Reading the gsyncd worker thread status and displaying it.
    -> Displaying status-detail per worker.
    -> Fetch checkpoint info in geo-rep status.
    -> use-tarssh value validation added.
misc: misc geo-rep fixes based on cluster, logrotate etc..
    -> cluster/dht: fix 'stime' getxattr getting overwritten.
    -> cluster/afr: return max of 'stime' values in subvol.
    -> geo-rep-logrotate: Sending SIGHUP to geo-rep auxiliary.
    -> cluster/dht: fix convoluted logic while aggregating.
    -> cluster/*: fix 'stime' min/max fetch logic.
Change-Id: I811acea0bbd6194797a3e55d89295d1ea021ac85
BUG: 1036552
Signed-off-by: Ajeet Jha <ajha@redhat.com>
Reviewed-on: http://review.gluster.org/6405
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Amar Tumballi <amarts@gmail.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Reviewed-on: http://review.gluster.org/6810
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 8 | ||||
| -rw-r--r-- | xlators/lib/src/libxlator.c | 55 | ||||
| -rw-r--r-- | xlators/lib/src/libxlator.h | 3 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 563 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-mountbroker.c | 1 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 136 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 11 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 21 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 2 | 
10 files changed, 543 insertions, 259 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 8a2853319c5..3e80129f935 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1348,7 +1348,7 @@ afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)          int ret = 0;          if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) -                ret = gf_get_min_stime (THIS, data, key, value); +                ret = gf_get_max_stime (THIS, data, key, value);          return ret;  } diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 898f41f0ef8..ed4babd328c 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2015,13 +2015,7 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (!local->xattr) {                  local->xattr = dict_copy_with_ref (xattr, NULL);          } else { -                /* first aggregate everything into xattr and then copy into -                 * local->xattr. This is required as we want to have -                 * 'local->xattr' as the proper final dictionary passed above -                 * distribute xlator. -                 */ -                dht_aggregate_xattr (xattr, local->xattr); -                local->xattr = dict_copy (xattr, local->xattr); +                dht_aggregate_xattr (local->xattr, xattr);          }  out:          if (is_last_call (this_call_cnt)) { diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c index 9e535725544..63e9bcf9f0a 100644 --- a/xlators/lib/src/libxlator.c +++ b/xlators/lib/src/libxlator.c @@ -452,6 +452,61 @@ gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)          /* can't use 'min()' macro here as we need to compare two fields             in the array, selectively */ +        if ((host_value_timebuf[0] < host_timebuf[0]) || +            ((host_value_timebuf[0] == host_timebuf[0]) && +             (host_value_timebuf[1] < host_timebuf[1]))) { +                update_timebuf (value_timebuf, net_timebuf); +        } + +        ret = 0; +out: +        return ret; +error: +        /* To be used only when net_timebuf is not set in the dict */ +        if (net_timebuf) +                GF_FREE (net_timebuf); + +        return ret; +} + +int +gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value) +{ +        int ret = -1; +        uint32_t *net_timebuf = NULL; +        uint32_t *value_timebuf = NULL; +        uint32_t host_timebuf[2] = {0,}; +        uint32_t host_value_timebuf[2] = {0,}; + +        /* stime should be maximum of all the other nodes */ +        ret = dict_get_bin (dst, key, (void **)&net_timebuf); +        if (ret < 0) { +                net_timebuf = GF_CALLOC (1, sizeof (int64_t), +                                           gf_common_mt_char); +                if (!net_timebuf) +                        goto out; + +                ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t)); +                if (ret < 0) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "key=%s: dict set failed", key); +                        goto error; +                } +        } + +        value_timebuf = data_to_bin (value); +        if (!value_timebuf) { +                gf_log (this->name, GF_LOG_WARNING, +                        "key=%s: getting value of stime failed", key); +                ret = -1; +                goto out; +        } + +        get_hosttime (value_timebuf, host_value_timebuf); +        get_hosttime (net_timebuf, host_timebuf); + +        /* can't use 'max()' macro here as we need to compare two fields +           in the array, selectively */          if ((host_value_timebuf[0] > host_timebuf[0]) ||              ((host_value_timebuf[0] == host_timebuf[0]) &&               (host_value_timebuf[1] > host_timebuf[1]))) { diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h index 08bd77b918c..175d3141d45 100644 --- a/xlators/lib/src/libxlator.h +++ b/xlators/lib/src/libxlator.h @@ -151,4 +151,7 @@ match_uuid_local (const char *name, char *uuid);  int  gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value); +int +gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value); +  #endif /* !_LIBXLATOR_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 5786694bdf9..5d724cc2a32 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -28,17 +28,6 @@  static int  dict_get_param (dict_t *dict, char *key, char **param); -static int -glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, -                             char *conf_path, char **statefile); - -static int -glusterd_get_slave_info (char *slave, char **slave_ip, -                         char **slave_vol, char **op_errstr); - -static int -glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); -  struct gsync_config_opt_vals_ gsync_confopt_vals[] = {          {.op_name        = "change_detector",           .no_of_pos_vals = 2, @@ -55,6 +44,11 @@ struct gsync_config_opt_vals_ gsync_confopt_vals[] = {           .case_sensitive = _gf_false,           .values         = {"critical", "error", "warning", "info", "debug"}          }, +        {.op_name        = "use-tarssh", +         .no_of_pos_vals = 6, +         .case_sensitive = _gf_false, +         .values         = {"true", "false", "0", "1", "yes", "no"} +        },          {.op_name = NULL,          },  }; @@ -74,6 +68,11 @@ static char *gsync_reserved_opts[] = {          NULL  }; +static char *gsync_no_restart_opts[] = { +        "checkpoint", +        NULL +}; +  int  __glusterd_handle_sys_exec (rpcsvc_request_t *req)  { @@ -899,6 +898,8 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)                  }                  if (op_match) { +                        if (!op_value) +                                goto out;                          val_match = _gf_false;                          for (i = 0; i < conf_vals->no_of_pos_vals; i++) {                                  if(conf_vals->case_sensitive){ @@ -912,7 +913,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)                          if (!val_match) {                                  ret = snprintf (errmsg, sizeof(errmsg) - 1, -                                                "Invalid values (%s) for" +                                                "Invalid value(%s) for"                                                  " option %s", op_value,                                                  op_name);                                  errmsg[ret] = '\0'; @@ -923,7 +924,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)                          }                  }          } - +out:          return 0;  } @@ -1581,7 +1582,7 @@ out:          return ret;  } -static int +int  glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,                               char *conf_path, char **statefile)  { @@ -1736,7 +1737,7 @@ glusterd_verify_slave (char *volname, char *slave_ip, char *slave,                  gf_log ("", GF_LOG_ERROR, "Not a valid slave");                  ret = glusterd_gsync_read_frm_status (log_file_path,                                                        buf, sizeof(buf)); -                if (ret) { +                if (ret <= 0) {                          gf_log ("", GF_LOG_ERROR, "Unable to read from %s",                                  log_file_path);                          goto out; @@ -2391,6 +2392,8 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave,          char            *slave_ip  = NULL;          char            *slave_vol = NULL;          struct stat      stbuf     = {0, }; +        gf_boolean_t     restart_required = _gf_true; +        char           **resopt    = NULL;          GF_ASSERT (slave);          GF_ASSERT (op_errstr); @@ -2495,18 +2498,28 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave,  out:          if (!ret && volinfo) { +            for (resopt = gsync_no_restart_opts; *resopt; resopt++) { +                restart_required = _gf_true; +                if (!strcmp ((*resopt), op_name)){ +                    restart_required = _gf_false; +                    break; +                } +            } + +            if (restart_required) {                  ret = glusterd_check_restart_gsync_session (volinfo, slave,                                                              resp_dict, path_list,                                                              conf_path, 0);                  if (ret) -                        *op_errstr = gf_strdup ("internal error"); +                    *op_errstr = gf_strdup ("internal error"); +            }          }          gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);          return ret;  } -static int +int  glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)  {          int                 ret = 0; @@ -2530,7 +2543,6 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)                          char *p = buf + len - 1;                          while (isspace (*p))                                  *p-- = '\0'; -                        ret = 0;                  }          } else if (ret < 0)                  gf_log ("", GF_LOG_ERROR, "Status file of gsyncd is corrupt"); @@ -2540,20 +2552,146 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)  }  static int -glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen) +dict_get_param (dict_t *dict, char *key, char **param) +{ +        char  *dk = NULL; +        char   *s = NULL; +        char    x = '\0'; +        int   ret = 0; + +        if (dict_get_str (dict, key, param) == 0) +                return 0; + +        dk = gf_strdup (key); +        if (!key) +                return -1; + +        s = strpbrk (dk, "-_"); +        if (!s) +                return -1; +        x = (*s == '-') ? '_' : '-'; +        *s++ = x; +        while ((s = strpbrk (s, "-_"))) +                *s++ = x; + +        ret = dict_get_str (dict, dk, param); + +        GF_FREE (dk); +        return ret; +} + +static int +glusterd_parse_gsync_status (char *buf, gf_gsync_status_t *sts_val) +{ +        int              ret      = -1; +        int              i      = -1; +        int              num_of_fields = 8; +        char            *token    = NULL; +        char           **tokens   = NULL; +        char           **ptr   = NULL; +        char            *save_ptr = NULL; +        char             na_buf[] = "N/A"; + +        if (!buf) { +                gf_log ("", GF_LOG_ERROR, "Empty buf"); +                goto out; +        } + +        tokens = calloc (num_of_fields, sizeof (char *)); +        if (!tokens) { +                gf_log ("", GF_LOG_ERROR, "Out of memory"); +                goto out; +        } + +        ptr = tokens; + +        for (token = strtok_r (buf, ",", &save_ptr); token; +             token = strtok_r (NULL, ",", &save_ptr)) { +                *ptr = gf_strdup(token); +                if (!*ptr) { +                        gf_log ("", GF_LOG_ERROR, "Out of memory"); +                        goto out; +                } +                ptr++; +        } + +        for (i = 0; i < num_of_fields; i++) { +                token = strtok_r (tokens[i], ":", &save_ptr); +                token = strtok_r (NULL, "\0", &save_ptr); +                token++; + +                /* token NULL check */ +                if (!token && (i != 0) && +                    (i != 5) && (i != 7)) +                    token = na_buf; + +                if (i == 0) { +                        if (!token) +                            token = na_buf; +                        else { +                            token++; +                            if (!token) +                                token = na_buf; +                            else +                                token[strlen(token) - 1] = '\0'; +                        } +                        memcpy (sts_val->slave_node, token, strlen(token)); +                } +                if (i == 1) +                        memcpy (sts_val->files_syncd, token, strlen(token)); +                if (i == 2) +                        memcpy (sts_val->purges_remaining, token, strlen(token)); +                if (i == 3) +                        memcpy (sts_val->total_files_skipped, token, strlen(token)); +                if (i == 4) +                        memcpy (sts_val->files_remaining, token, strlen(token)); +                if (i == 5) { +                        if (!token) +                            token = na_buf; +                        else { +                            token++; +                            if (!token) +                                token = na_buf; +                            else +                                token[strlen(token) - 1] = '\0'; +                        } +                        memcpy (sts_val->worker_status, token, strlen(token)); +                } +                if (i == 6) +                        memcpy (sts_val->bytes_remaining, token, strlen(token)); +                if (i == 7) { +                        if (!token) +                            token = na_buf; +                        else { +                            token++; +                            if (!token) +                                token = na_buf; +                            else +                                token[strlen(token) - 2] = '\0'; +                        } +                        memcpy (sts_val->crawl_status, token, strlen(token)); +                } +        } + +        ret = 0; +out: +        for (i = 0; i< num_of_fields; i++) +               if (tokens[i]) +                       GF_FREE(tokens[i]); + +        gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); +        return ret; +} + +static int +glusterd_gsync_fetch_status_extra (char *path, gf_gsync_status_t *sts_val)  {          char sockpath[PATH_MAX] = {0,};          struct sockaddr_un   sa = {0,}; -        size_t                l = 0;          int                   s = -1;          struct pollfd       pfd = {0,};          int                 ret = 0; -        l = strlen (buf); -        /* seek to end of data in buf */ -        buf += l; -        blen -= l; -          glusterd_set_socket_filepath (path, sockpath, sizeof (sockpath));          strncpy(sa.sun_path, sockpath, sizeof(sa.sun_path)); @@ -2581,66 +2719,40 @@ glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen)                  ret = -1;                  goto out;          } -        ret = read(s, buf, blen); +        ret = read(s, sts_val->checkpoint_status, +                   sizeof(sts_val->checkpoint_status));          /* we expect a terminating 0 byte */ -        if (ret == 0 || (ret > 0 && buf[ret - 1])) +        if (ret == 0 || (ret > 0 && sts_val->checkpoint_status[ret - 1]))                  ret = -1; -        if (ret > 0) +        if (ret > 0) {                  ret = 0; +        } - out: +out:          close (s);          return ret;  } -static int -dict_get_param (dict_t *dict, char *key, char **param) -{ -        char  *dk = NULL; -        char   *s = NULL; -        char    x = '\0'; -        int   ret = 0; - -        if (dict_get_str (dict, key, param) == 0) -                return 0; - -        dk = gf_strdup (key); -        if (!key) -                return -1; - -        s = strpbrk (dk, "-_"); -        if (!s) -                return -1; -        x = (*s == '-') ? '_' : '-'; -        *s++ = x; -        while ((s = strpbrk (s, "-_"))) -                *s++ = x; - -        ret = dict_get_str (dict, dk, param); - -        GF_FREE (dk); -        return ret; -} - -static int +int  glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave,                             char *conf_path, dict_t *dict, char *node)  { -        glusterd_conf_t *priv = NULL; -        int              ret = 0; -        char            *statefile = NULL; -        char            *master    = NULL; -        char             buf[1024] = "defunct"; -        char             nds[1024] = {0, }; -        char             mst[1024] = {0, }; -        char             slv[1024] = {0, }; -        char             sts[1024] = {0, }; -        char            *bufp = NULL; -        dict_t          *confd = NULL; -        int              gsync_count = 0; -        int              status = 0; -        char *dyn_node = NULL; -        char *path_list = NULL; +        char                    brick_state_file[PATH_MAX] = ""; +        char                    brick_path[PATH_MAX]       = ""; +        char                   *georep_session_wrkng_dir   = NULL; +        char                   *master                     = NULL; +        char                    tmp[1024]                  = ""; +        char                    sts_val_name[1024]         = ""; +        char                    monitor_status[PATH_MAX]   = ""; +        char                   *statefile                  = NULL; +        char                   *socketfile                 = NULL; +        dict_t                 *confd                      = NULL; +        int                     gsync_count                = 0; +        int                     i                          = 0; +        int                     ret                        = 0; +        glusterd_brickinfo_t   *brickinfo                  = NULL; +        gf_gsync_status_t      *sts_val                    = NULL; +        glusterd_conf_t        *priv                       = NULL;          GF_ASSERT (THIS);          GF_ASSERT (THIS->private); @@ -2661,7 +2773,7 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave,          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to get configuration data"                          "for %s(master), %s(slave)", master, slave); -                goto done; +                goto out;          } @@ -2670,120 +2782,168 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave,                  gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name "                          "for %s(master), %s(slave). Please check gsync "                          "config file.", master, slave); -                goto done; +                goto out;          } -        ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); -        if (ret) { + +        ret = glusterd_gsync_read_frm_status (statefile, monitor_status, +                                              sizeof (monitor_status)); +        if (ret <= 0) {                  gf_log ("", GF_LOG_ERROR, "Unable to read the status"                          "file for %s(master), %s(slave)", master, slave); -                strncpy (buf, "defunct", sizeof (buf)); -                goto done; -        } - -        ret = gsync_status (master, slave, conf_path, &status); -        if (ret == 0 && status == -1) { -                if ((strcmp (buf, "Not Started")) && -                    (strcmp (buf, "Stopped"))) -                    strncpy (buf, "defunct", sizeof (buf)); -                goto done; -        } else if (ret == -1) { -                gf_log ("", GF_LOG_ERROR, "Unable to get gsync status"); -                goto done; +                strncpy (monitor_status, "defunct", sizeof (monitor_status));          } -        if (strcmp (buf, "Stable") != 0) -                goto done; - -        ret = dict_get_param (confd, "state_socket_unencoded", &statefile); +        ret = dict_get_param (confd, "georep_session_working_dir", +                              &georep_session_wrkng_dir);          if (ret) { -                gf_log ("", GF_LOG_ERROR, "Unable to get state_socket_unencoded" -                        " filepath. Please check gsync config file."); -                goto done; +                gf_log ("", GF_LOG_ERROR, "Unable to get geo-rep session's " +                        "working directory name for %s(master), %s(slave). " +                        "Please check gsync config file.", master, slave); +                goto out;          } -        ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf)); + +        ret = dict_get_param (confd, "state_socket_unencoded", &socketfile);          if (ret) { -                gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status" -                        "for %s(master), %s(slave)", master, slave); -                /* there is a slight chance that this occurs due to race -                 * -- in that case, the following options all seem bad: -                 * -                 * - suppress irregurlar behavior by just leaving status -                 *   on "OK" -                 * - freak out users with a misleading "defunct" -                 * - overload the meaning of the regular error signal -                 *   mechanism of gsyncd, that is, when status is "faulty" -                 * -                 * -- so we just come up with something new... -                 */ -                strncpy (buf, "N/A", sizeof (buf)); -                goto done; +                gf_log ("", GF_LOG_ERROR, "Unable to get socket file's name " +                        "for %s(master), %s(slave). Please check gsync " +                        "config file.", master, slave); +                goto out;          } - done: -        if ((!strcmp (buf, "defunct")) || -            (!strcmp (buf, "Not Started")) || -            (!strcmp (buf, "Stopped"))) { -                ret = glusterd_get_local_brickpaths (volinfo, &path_list); -                if (!path_list) { -                        gf_log ("", GF_LOG_DEBUG, "This node not being part of" -                                " volume should not be running gsyncd. Hence" -                                " shouldn't display status for this node."); -                        ret = 0; +        ret = dict_get_int32 (dict, "gsync-count", &gsync_count); +        if (ret) +                gsync_count = 0; + +        list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { +                if (uuid_compare (brickinfo->uuid, MY_UUID)) +                        continue; + +                sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t), +                                     gf_common_mt_gsync_status_t); +                if (!sts_val) { +                        gf_log ("", GF_LOG_ERROR, "Out Of Memory");                          goto out;                  } -        } -        ret = dict_get_int32 (dict, "gsync-count", &gsync_count); +                /* Creating the brick state file's path */ +                memset(brick_state_file, '\0', PATH_MAX); +                memcpy (brick_path, brickinfo->path, PATH_MAX - 1); +                for (i = 0; i < strlen(brick_path) - 1; i++) +                        if (brick_path[i] == '/') +                                brick_path[i] = '_'; +                ret = snprintf(brick_state_file, PATH_MAX - 1, "%s%s.status", +                               georep_session_wrkng_dir, brick_path); +                brick_state_file[ret] = '\0'; + +                gf_log ("", GF_LOG_DEBUG, "brick_state_file = %s", brick_state_file); + +                memset (tmp, '\0', sizeof(tmp)); + +                ret = glusterd_gsync_read_frm_status (brick_state_file, +                                                      tmp, sizeof (tmp)); +                if (ret <= 0) { +                        gf_log ("", GF_LOG_ERROR, "Unable to read the status" +                                "file for %s brick for  %s(master), %s(slave) " +                                "session", brickinfo->path, master, slave); +                        memcpy (sts_val->slave_node, slave, strlen(slave)); +                        sts_val->slave_node[strlen(slave)] = '\0'; +                        ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); +                        sts_val->worker_status[ret] = '\0'; +                        ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); +                        sts_val->checkpoint_status[ret] = '\0'; +                        ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); +                        sts_val->crawl_status[ret] = '\0'; +                        ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A"); +                        sts_val->files_syncd[ret] = '\0'; +                        ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A"); +                        sts_val->purges_remaining[ret] = '\0'; +                        ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A"); +                        sts_val->total_files_skipped[ret] = '\0'; +                        ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A"); +                        sts_val->files_remaining[ret] = '\0'; +                        ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A"); +                        sts_val->bytes_remaining[ret] = '\0'; +                        goto store_status; +                } -        if (ret) -                gsync_count = 1; -        else -                gsync_count++; +                ret = glusterd_gsync_fetch_status_extra (socketfile, sts_val); +                if (ret || strlen(sts_val->checkpoint_status) == 0) { +                        gf_log ("", GF_LOG_DEBUG, "No checkpoint status" +                                "for %s(master), %s(slave)", master, slave); +                        ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); +                        sts_val->checkpoint_status[ret] = '\0'; +                } -        (void) snprintf (nds, sizeof (nds), "node%d", gsync_count); -        dyn_node = gf_strdup (node); -        if (!dyn_node) -                goto out; -        ret = dict_set_dynstr (dict, nds, dyn_node); -        if (ret) { -                GF_FREE (dyn_node); -                goto out; -        } +                ret = glusterd_parse_gsync_status (tmp, sts_val); +                if (ret) { +                        gf_log ("", GF_LOG_ERROR, +                                "Unable to parse the gsync status for %s", +                                brickinfo->path); +                        memcpy (sts_val->slave_node, slave, strlen(slave)); +                        sts_val->slave_node[strlen(slave)] = '\0'; +                        ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); +                        sts_val->worker_status[ret] = '\0'; +                        ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); +                        sts_val->checkpoint_status[ret] = '\0'; +                        ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); +                        sts_val->crawl_status[ret] = '\0'; +                        ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A"); +                        sts_val->files_syncd[ret] = '\0'; +                        ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A"); +                        sts_val->purges_remaining[ret] = '\0'; +                        ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A"); +                        sts_val->total_files_skipped[ret] = '\0'; +                        ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A"); +                        sts_val->files_remaining[ret] = '\0'; +                        ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A"); +                        sts_val->bytes_remaining[ret] = '\0'; +                } -        snprintf (mst, sizeof (mst), "master%d", gsync_count); -        master = gf_strdup (master); -        if (!master) -                goto out; -        ret = dict_set_dynstr (dict, mst, master); -        if (ret) { -                GF_FREE (master); -                goto out; -        } +store_status: +                if ((strcmp (monitor_status, "Stable"))) { +                        memcpy (sts_val->worker_status, monitor_status, strlen(monitor_status)); +                        sts_val->worker_status[strlen(monitor_status)] = '\0'; +                        ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); +                        sts_val->crawl_status[ret] = '\0'; +                        ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); +                        sts_val->checkpoint_status[ret] = '\0'; +                } -        snprintf (slv, sizeof (slv), "slave%d", gsync_count); -        slave = gf_strdup (slave); -        if (!slave) -                goto out; -        ret = dict_set_dynstr (dict, slv, slave); -        if (ret) { -                GF_FREE (slave); -                goto out; -        } +                if (strcmp (sts_val->worker_status, "Active")) { +                        ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); +                        sts_val->checkpoint_status[ret] = '\0'; +                        ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); +                        sts_val->crawl_status[ret] = '\0'; +                } -        snprintf (sts, sizeof (slv), "status%d", gsync_count); -        bufp = gf_strdup (buf); -        if (!bufp) -                goto out; -        ret = dict_set_dynstr (dict, sts, bufp); -        if (ret) { -                GF_FREE (bufp); -                goto out; +                if (!strcmp (sts_val->slave_node, "N/A")) { +                        memcpy (sts_val->slave_node, slave, strlen(slave)); +                        sts_val->slave_node[strlen(slave)] = '\0'; +                } + +                memcpy (sts_val->node, node, strlen(node)); +                sts_val->node[strlen(node)] = '\0'; +                memcpy (sts_val->brick, brickinfo->path, strlen(brickinfo->path)); +                sts_val->brick[strlen(brickinfo->path)] = '\0'; +                memcpy (sts_val->master, master, strlen(master)); +                sts_val->master[strlen(master)] = '\0'; + +                snprintf (sts_val_name, sizeof (sts_val_name), "status_value%d", gsync_count); +                ret = dict_set_bin (dict, sts_val_name, sts_val, sizeof(gf_gsync_status_t)); +                if (ret) { +                        GF_FREE (sts_val); +                        goto out; +                } + +                gsync_count++; +                sts_val = NULL;          } +          ret = dict_set_int32 (dict, "gsync-count", gsync_count);          if (ret)                  goto out; - out: +out:          dict_destroy (confd);          return 0; @@ -3246,30 +3406,32 @@ glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict)                  goto out;          } -        ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); -        if (ptr) { -                ret = dict_get_int32 (rsp_dict, "output_count", &output_count); -                if (ret) -                        output_count = 1; -                else -                        output_count++; -                memset (output_name, '\0', sizeof (output_name)); -                snprintf (output_name, sizeof (output_name), -                          "output_%d", output_count); -                if (buf[strlen(buf) - 1] == '\n') -                        buf[strlen(buf) - 1] = '\0'; -                bufp = gf_strdup (buf); -                if (!bufp) -                        gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); -                ret = dict_set_dynstr (rsp_dict, output_name, bufp); -                if (ret) { -                        GF_FREE (bufp); -                        gf_log ("", GF_LOG_ERROR, "output set failed."); +        do { +                ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); +                if (ptr) { +                        ret = dict_get_int32 (rsp_dict, "output_count", &output_count); +                        if (ret) +                                output_count = 1; +                        else +                                output_count++; +                        memset (output_name, '\0', sizeof (output_name)); +                        snprintf (output_name, sizeof (output_name), +                                  "output_%d", output_count); +                        if (buf[strlen(buf) - 1] == '\n') +                                buf[strlen(buf) - 1] = '\0'; +                        bufp = gf_strdup (buf); +                        if (!bufp) +                                gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); +                        ret = dict_set_dynstr (rsp_dict, output_name, bufp); +                        if (ret) { +                                GF_FREE (bufp); +                                gf_log ("", GF_LOG_ERROR, "output set failed."); +                        } +                        ret = dict_set_int32 (rsp_dict, "output_count", output_count); +                        if (ret) +                                gf_log ("", GF_LOG_ERROR, "output_count set failed.");                  } -                ret = dict_set_int32 (rsp_dict, "output_count", output_count); -                if (ret) -                        gf_log ("", GF_LOG_ERROR, "output_count set failed."); -        } +        } while (ptr);          ret = runner_end (&runner);          if (ret) { @@ -3708,7 +3870,7 @@ out:  } -static int +int  glusterd_get_slave_info (char *slave, char **slave_ip,                           char **slave_vol, char **op_errstr)  { @@ -3888,7 +4050,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path)          /* gluster-params */          runinit_gsyncd_setrx (&runner, conf_path);          runner_add_args (&runner, "gluster-params", -                         "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", +                         "aux-gfid-mount",                           ".", ".", NULL);          RUN_GSYNCD_CMD; @@ -3902,6 +4064,16 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path)          runner_add_args (&runner, ".", ".", NULL);          RUN_GSYNCD_CMD; +        /* ssh-command tar */ +        runinit_gsyncd_setrx (&runner, conf_path); +        runner_add_arg (&runner, "ssh-command-tar"); +        runner_argprintf (&runner, +                          "ssh -oPasswordAuthentication=no " +                           "-oStrictHostKeyChecking=no " +                           "-i %s/tar_ssh.pem", georepdir); +        runner_add_args (&runner, ".", ".", NULL); +        RUN_GSYNCD_CMD; +          /* pid-file */          runinit_gsyncd_setrx (&runner, conf_path);          runner_add_arg (&runner, "pid-file"); @@ -3909,6 +4081,13 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path)          runner_add_args (&runner, ".", ".", NULL);          RUN_GSYNCD_CMD; +        /* geo-rep-working-dir */ +        runinit_gsyncd_setrx (&runner, conf_path); +        runner_add_arg (&runner, "georep-session-working-dir"); +        runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir); +        runner_add_args (&runner, ".", ".", NULL); +        RUN_GSYNCD_CMD; +          /* state-file */          runinit_gsyncd_setrx (&runner, conf_path);          runner_add_arg (&runner, "state-file"); @@ -3986,7 +4165,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path)          /* gluster-params */          runinit_gsyncd_setrx (&runner, conf_path);          runner_add_args (&runner, "gluster-params", -                         "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", +                         "aux-gfid-mount",                           ".", NULL);          RUN_GSYNCD_CMD; diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c index 0d67d130360..4ce441da801 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c @@ -231,7 +231,6 @@ parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc)  const char *georep_mnt_desc_template =          "SUP(" -                "xlator-option=\\*-dht.assert-no-child-down=true "                  "volfile-server=localhost "                  "client-pid=%d "                  "user-map-root=%s " diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index fe20e95f276..e2e363bc311 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -4726,14 +4726,24 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)  {          char                        *path_list = NULL;          char                        *slave = NULL; +        char                        *slave_ip = NULL; +        char                        *slave_vol = NULL; +        char                        *statefile = NULL; +        char                         buf[1024] = "faulty";          int                          uuid_len = 0;          int                          ret = 0;          char                         uuid_str[64] = {0}; -        glusterd_volinfo_t           *volinfo = NULL; -        char                         *conf_path = NULL; +        glusterd_volinfo_t          *volinfo = NULL; +        char                         confpath[PATH_MAX] = ""; +        char                        *op_errstr = NULL; +        glusterd_conf_t             *priv = NULL; + +        GF_ASSERT (THIS); +        priv = THIS->private; +        GF_ASSERT (priv); +        GF_ASSERT (data);          volinfo = data; -        GF_ASSERT (volinfo);          slave = strchr(value->data, ':');          if (slave)                  slave ++; @@ -4743,22 +4753,63 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)          strncpy (uuid_str, (char*)value->data, uuid_len); +        /* Getting Local Brickpaths */          ret = glusterd_get_local_brickpaths (volinfo, &path_list); -        ret = dict_get_str (this, "conf_path", &conf_path); +        /*Generating the conf file path needed by gsyncd */ +        ret = glusterd_get_slave_info (slave, &slave_ip, +                                       &slave_vol, &op_errstr);          if (ret) {                  gf_log ("", GF_LOG_ERROR, -                        "Unable to fetch conf file path."); +                        "Unable to fetch slave details."); +                ret = -1;                  goto out;          } -        glusterd_start_gsync (volinfo, slave, path_list, conf_path, -                              uuid_str, NULL); +        ret = snprintf (confpath, sizeof(confpath) - 1, +                        "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", +                        priv->workdir, volinfo->volname, +                        slave_ip, slave_vol); +        confpath[ret] = '\0'; + +        /* Fetching the last status of the node */ +        ret = glusterd_get_statefile_name (volinfo, slave, +                                           confpath, &statefile); +        if (ret) { +                if (!strstr(slave, "::")) +                        gf_log ("", GF_LOG_INFO, +                                "%s is not a valid slave url.", slave); +                else +                        gf_log ("", GF_LOG_INFO, "Unable to get" +                                " statefile's name"); +                goto out; +        } + +        ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); +        if (ret < 0) { +                gf_log ("", GF_LOG_ERROR, "Unable to read the status"); +                goto out; +        } -        GF_FREE (path_list); -        path_list = NULL; +        /* Looks for the last status, to find if the sessiom was running +         * when the node went down. If the session was not started or +         * not started, do not restart the geo-rep session */ +        if ((!strcmp (buf, "Not Started")) || +            (!strcmp (buf, "Stopped"))) { +                gf_log ("", GF_LOG_INFO, +                        "Geo-Rep Session was not started between " +                        "%s and %s::%s. Not Restarting", volinfo->volname, +                        slave_ip, slave_vol); +                goto out; +        } + +        glusterd_start_gsync (volinfo, slave, path_list, confpath, +                              uuid_str, NULL);  out: +        if (path_list) +                GF_FREE (path_list); +          return ret;  } @@ -7340,21 +7391,16 @@ glusterd_append_gsync_status (dict_t *dst, dict_t *src)  } -static int32_t +int32_t  glusterd_append_status_dicts (dict_t *dst, dict_t *src)  { -        int              dst_count = 0; -        int              src_count = 0; -        int              i = 0; -        int              ret = 0; -        char             mst[PATH_MAX] = {0,}; -        char             slv[PATH_MAX] = {0, }; -        char             sts[PATH_MAX] = {0, }; -        char             nds[PATH_MAX] = {0, }; -        char             *mst_val = NULL; -        char             *slv_val = NULL; -        char             *sts_val = NULL; -        char             *nds_val = NULL; +        char                sts_val_name[PATH_MAX] = {0, }; +        int                 dst_count              = 0; +        int                 src_count              = 0; +        int                 i                      = 0; +        int                 ret                    = 0; +        gf_gsync_status_t  *sts_val                = NULL; +        gf_gsync_status_t  *dst_sts_val            = NULL;          GF_ASSERT (dst); @@ -7372,49 +7418,29 @@ glusterd_append_status_dicts (dict_t *dst, dict_t *src)                  goto out;          } -        for (i = 1; i <= src_count; i++) { -                snprintf (nds, sizeof(nds), "node%d", i); -                snprintf (mst, sizeof(mst), "master%d", i); -                snprintf (slv, sizeof(slv), "slave%d", i); -                snprintf (sts, sizeof(sts), "status%d", i); - -                ret = dict_get_str (src, nds, &nds_val); -                if (ret) -                        goto out; - -                ret = dict_get_str (src, mst, &mst_val); -                if (ret) -                        goto out; - -                ret = dict_get_str (src, slv, &slv_val); -                if (ret) -                        goto out; +        for (i = 0; i < src_count; i++) { +                memset (sts_val_name, '\0', sizeof(sts_val_name)); +                snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i); -                ret = dict_get_str (src, sts, &sts_val); +                ret = dict_get_bin (src, sts_val_name, (void **) &sts_val);                  if (ret)                          goto out; -                snprintf (nds, sizeof(nds), "node%d", i+dst_count); -                snprintf (mst, sizeof(mst), "master%d", i+dst_count); -                snprintf (slv, sizeof(slv), "slave%d", i+dst_count); -                snprintf (sts, sizeof(sts), "status%d", i+dst_count); - -                ret = dict_set_dynstr (dst, nds, gf_strdup (nds_val)); -                if (ret) +                dst_sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t), +                                         gf_common_mt_gsync_status_t); +                if (!dst_sts_val) { +                        gf_log ("", GF_LOG_ERROR, "Out Of Memory");                          goto out; +                } -                ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val)); -                if (ret) -                        goto out; +                memcpy (dst_sts_val, sts_val, sizeof(gf_gsync_status_t)); -                ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val)); -                if (ret) -                        goto out; +                memset (sts_val_name, '\0', sizeof(sts_val_name)); +                snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i + dst_count); -                ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val)); +                ret = dict_set_bin (dst, sts_val_name, dst_sts_val, sizeof(gf_gsync_status_t));                  if (ret)                          goto out; -          }          ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index ec515591885..05d5c7172b2 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -589,6 +589,17 @@ glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict,                                       char **conf_path, char **op_errstr);  int +glusterd_get_slave_info (char *slave, char **slave_ip, +                         char **slave_vol, char **op_errstr); + +int +glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, +                             char *conf_path, char **statefile); + +int +glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); + +int  glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,                                        dict_t *resp_dict, char *path_list,                                        char *conf_path, gf_boolean_t is_force); diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index c2be2c9da59..58833869a3e 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -594,7 +594,7 @@ configure_syncdaemon (glusterd_conf_t *conf)          /* gluster-params */          runinit_gsyncd_setrx (&runner, conf);          runner_add_args (&runner, "gluster-params", -                         "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", +                         "aux-gfid-mount",                           ".", ".", NULL);          RUN_GSYNCD_CMD; @@ -608,6 +608,16 @@ configure_syncdaemon (glusterd_conf_t *conf)          runner_add_args (&runner, ".", ".", NULL);          RUN_GSYNCD_CMD; +        /* ssh-command tar */ +        runinit_gsyncd_setrx (&runner, conf); +        runner_add_arg (&runner, "ssh-command-tar"); +        runner_argprintf (&runner, +                          "ssh -oPasswordAuthentication=no " +                           "-oStrictHostKeyChecking=no " +                           "-i %s/tar_ssh.pem", georepdir); +        runner_add_args (&runner, ".", ".", NULL); +        RUN_GSYNCD_CMD; +          /* pid-file */          runinit_gsyncd_setrx (&runner, conf);          runner_add_arg (&runner, "pid-file"); @@ -615,6 +625,13 @@ configure_syncdaemon (glusterd_conf_t *conf)          runner_add_args (&runner, ".", ".", NULL);          RUN_GSYNCD_CMD; +        /* geo-rep working dir */ +        runinit_gsyncd_setrx (&runner, conf); +        runner_add_arg (&runner, "georep-session-working-dir"); +        runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir); +        runner_add_args (&runner, ".", ".", NULL); +        RUN_GSYNCD_CMD; +          /* state-file */          runinit_gsyncd_setrx (&runner, conf);          runner_add_arg (&runner, "state-file"); @@ -701,7 +718,7 @@ configure_syncdaemon (glusterd_conf_t *conf)          /* gluster-params */          runinit_gsyncd_setrx (&runner, conf);          runner_add_args (&runner, "gluster-params", -                         "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", +                         "aux-gfid-mount",                           ".", NULL);          RUN_GSYNCD_CMD; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index adef551280e..ab383ac1c9a 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -47,7 +47,7 @@  #define GLUSTERD_QUORUM_RATIO_KEY       "cluster.server-quorum-ratio"  #define GLUSTERD_GLOBAL_OPT_VERSION     "global-option-version"  #define GLUSTERD_COMMON_PEM_PUB_FILE    "/geo-replication/common_secret.pem.pub" -#define GEO_CONF_MAX_OPT_VALS           5 +#define GEO_CONF_MAX_OPT_VALS           6  #define GLUSTERD_CREATE_HOOK_SCRIPT     "/hooks/1/gsync-create/post/" \                                          "S56glusterd-geo-rep-create-post.sh"  | 
