diff options
| author | Jeff Darcy <jdarcy@redhat.com> | 2012-03-23 16:44:38 -0400 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2012-06-05 10:43:34 -0700 | 
| commit | 0baa65b651036ada96d9fc190232e4f100dc12e8 (patch) | |
| tree | be3b9f395558577d0f3f9f9db124e3517af07e4f /xlators | |
| parent | 4ffd6292486f42628e1e9e01e3267daadc86698a (diff) | |
replicate: default read_child to a local brick if there is one.
Controlled by the "choose-local" option (on by default).
Change-Id: I560f27c81703f2c9c62fdb51532c8eb763826df7
BUG: 806462
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.com/3005
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 88 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 56 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 7 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 11 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 3 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 1 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client3_1-fops.c | 4 | 
7 files changed, 165 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 21a2be3dd6f..f24bd8b7d7f 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2035,12 +2035,79 @@ afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,          afr_set_root_inode_on_first_lookup (local, this, inode);  } +static int32_t +afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                   int32_t op_ret, int32_t op_errno, dict_t *dict, +                   dict_t *xdata) +{ +        int              ret            = 0; +        char            *pathinfo       = NULL; +        gf_boolean_t     is_local        = _gf_false; +        afr_private_t   *priv           = NULL; +        int32_t          child_index    = -1; + +        if (op_ret != 0) { +                goto out; +        } + +        ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo); +        if (ret != 0) { +                goto out; +        } + +        ret = afr_local_pathinfo (pathinfo, &is_local); +        if (ret) { +                goto out; +        } + +        priv = this->private; +        /* +         * Note that one local subvolume will override another here.  The only +         * way to avoid that would be to retain extra information about whether +         * the previous read_child is local, and it's just not worth it.  Even +         * the slowest local subvolume is far preferable to a remote one. +         */ +        if (is_local) { +                child_index = (int32_t)(long)cookie; +                gf_log (this->name, GF_LOG_INFO, +                        "selecting local read_child %s", +                        priv->children[child_index]->name); +                priv->read_child = child_index; +        } + +out: +        STACK_DESTROY(frame->root); +        return 0; +} + +static void +afr_attempt_local_discovery (xlator_t *this, int32_t child_index) +{ +        call_frame_t    *newframe = NULL; +        loc_t            tmploc = {0,}; +        afr_private_t   *priv = this->private; + +        newframe = create_frame(this,this->ctx->pool); +        if (!newframe) { +                return; +        } + +        tmploc.gfid[sizeof(tmploc.gfid)-1] = 1; +        STACK_WIND_COOKIE (newframe, afr_discovery_cbk, +                           (void *)(long)child_index, +                           priv->children[child_index], +                           priv->children[child_index]->fops->getxattr, +                           &tmploc, GF_XATTR_PATHINFO_KEY, NULL); +} +  static void  afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,                             int32_t op_ret, int32_t op_errno, inode_t *inode,                             struct iatt *buf, dict_t *xattr,                             struct iatt *postparent)  { +        afr_private_t   *priv   = this->private; +          if (local->success_count == 0) {                  if (local->op_errno != ESTALE) {                          local->op_ret = op_ret; @@ -2053,6 +2120,11 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind          afr_lookup_cache_args (local, child_index, xattr,                                 buf, postparent); + +        if (local->do_discovery && (priv->read_child == (-1))) { +                afr_attempt_local_discovery(this,child_index); +        } +          local->cont.lookup.success_children[local->success_count] = child_index;          local->success_count++;  } @@ -2214,8 +2286,6 @@ afr_lookup (call_frame_t *frame, xlator_t *this,          /* By default assume ENOTCONN. On success it will be set to 0. */          local->op_errno = ENOTCONN; -        local->call_count = afr_up_children_count (local->child_up, -                                                   priv->child_count);          ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,                                              &gfid_req);          if (ret) { @@ -2225,6 +2295,12 @@ afr_lookup (call_frame_t *frame, xlator_t *this,          afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,                                &local->loc);          local->fop = GF_FOP_LOOKUP; +        if (priv->choose_local && !priv->did_discovery) { +                if (__is_root_gfid(gfid_req)) { +                        local->do_discovery = _gf_true; +                        priv->did_discovery = _gf_true; +                } +        }          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) {                          STACK_WIND_COOKIE (frame, afr_lookup_cbk, @@ -3626,6 +3702,14 @@ afr_notify (xlator_t *this, int32_t event,          if (!priv)                  return 0; +        /* +         * We need to reset this in case children come up in "staggered" +         * fashion, so that we discover a late-arriving local subvolume.  Note +         * that we could end up issuing N lookups to the first subvolume, and +         * O(N^2) overall, but N is small for AFR so it shouldn't be an issue. +         */ +        priv->did_discovery = _gf_false; +          had_heard_from_all = 1;          for (i = 0; i < priv->child_count; i++) {                  if (!priv->last_event[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 575bf4361d2..c5deb18b8af 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -671,6 +671,62 @@ out:          return;  } +static int +get_pathinfo_host (char *pathinfo, char *hostname, size_t size) +{ +        char    *start = NULL; +        char    *end = NULL; +        int     ret  = -1; +        int     i    = 0; + +        if (!pathinfo) +                goto out; + +        start = strchr (pathinfo, ':'); +        if (!start) +                goto out; +        end = strrchr (pathinfo, ':'); +        if (start == end) +                goto out; + +        memset (hostname, 0, size); +        i = 0; +        while (++start != end) +                hostname[i++] = *start; +        ret = 0; +out: +        return ret; +} + +int +afr_local_pathinfo (char *pathinfo, gf_boolean_t *local) +{ +        int             ret   = 0; +        char            pathinfohost[1024] = {0}; +        char            localhost[1024] = {0}; +        xlator_t        *this = THIS; + +        *local = _gf_false; +        ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost)); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s", +                        pathinfo); +                goto out; +        } + +        ret = gethostname (localhost, sizeof (localhost)); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, " +                        "reason: %s", strerror (errno)); +                goto out; +        } + +        if (!strcmp (localhost, pathinfohost)) +                *local = _gf_true; +out: +        return ret; +} +  int  afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,                             loc_t *dirloc) diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 8e608459684..32a8aaca50c 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -42,4 +42,11 @@ afr_proactive_self_heal (void *data);  int  afr_xl_op (xlator_t *this, dict_t *input, dict_t *output); + +/* + * In addition to its self-heal use, this is used to find a local default + * read_child. + */ +int +afr_local_pathinfo (char *pathinfo, gf_boolean_t *local);  #endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index b7ba2619711..4f7bf2de004 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -170,6 +170,9 @@ reconfigure (xlator_t *this, dict_t *options)          GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,                            int32, out); +        /* Reset this so we re-discover in case the topology changed.  */ +        priv->did_discovery = _gf_false; +          ret = 0;  out:          return ret; @@ -227,7 +230,6 @@ init (xlator_t *this)          priv->child_count = child_count; -          priv->read_child = -1;          GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out); @@ -239,6 +241,7 @@ init (xlator_t *this)                          goto out;                  }          } +        GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);          GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out); @@ -508,6 +511,12 @@ struct volume_options options[] = {                           "1 = hash by GFID (all clients use same subvolume), "                           "2 = hash by GFID and client PID",          }, +        { .key  = {"choose-local" }, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "true", +          .description = "Choose a local subvolume to read from if " +                         "read-subvolume is not explicitly set.", +        },          { .key  = {"favorite-child"},            .type = GF_OPTION_TYPE_XLATOR          }, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index a1a30562bf1..c8e01fcb841 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -156,6 +156,8 @@ typedef struct _afr_private {          char                   vol_uuid[UUID_SIZE + 1];          int32_t                *last_event;          afr_self_heald_t       shd; +        gf_boolean_t           choose_local; +        gf_boolean_t           did_discovery;  } afr_private_t;  typedef struct { @@ -697,6 +699,7 @@ typedef struct _afr_local {          mode_t          umask;          int             xflag; +        gf_boolean_t    do_discovery;  } afr_local_t;  typedef enum { diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 515bff359bc..219352c6147 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -136,6 +136,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {          {"cluster.eager-lock",                   "cluster/replicate",  NULL, NULL, NO_DOC, 0     },          {"cluster.quorum-type",                  "cluster/replicate",  "quorum-type", NULL, NO_DOC, 0},          {"cluster.quorum-count",                 "cluster/replicate",  "quorum-count", NULL, NO_DOC, 0}, +        {"cluster.choose-local",                 "cluster/replicate",  NULL, NULL, DOC, 0},          {"cluster.stripe-block-size",            "cluster/stripe",     "block-size", NULL, DOC, 0}, diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c index 266f84a1dfb..d44eb86c46c 100644 --- a/xlators/protocol/client/src/client3_1-fops.c +++ b/xlators/protocol/client/src/client3_1-fops.c @@ -4688,7 +4688,7 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this,          }          args = data; -        if (!(args->loc && args->loc->inode)) { +        if (!args->loc) {                  op_errno = EINVAL;                  goto unwind;          } @@ -4729,7 +4729,7 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this,          rsp_iobuf = NULL;          rsp_iobref = NULL; -        if (!uuid_is_null (args->loc->inode->gfid)) +        if (args->loc->inode && !uuid_is_null (args->loc->inode->gfid))                  memcpy (req.gfid,  args->loc->inode->gfid, 16);          else                  memcpy (req.gfid, args->loc->gfid, 16);  | 
