From 0baa65b651036ada96d9fc190232e4f100dc12e8 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Fri, 23 Mar 2012 16:44:38 -0400 Subject: replicate: default read_child to a local brick if there is one. Controlled by the "choose-local" option (on by default). Change-Id: I560f27c81703f2c9c62fdb51532c8eb763826df7 BUG: 806462 Signed-off-by: Jeff Darcy Reviewed-on: http://review.gluster.com/3005 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-common.c | 88 +++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 2 deletions(-) (limited to 'xlators/cluster/afr/src/afr-common.c') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 21a2be3d..f24bd8b7 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2035,12 +2035,79 @@ afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this, afr_set_root_inode_on_first_lookup (local, this, inode); } +static int32_t +afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int ret = 0; + char *pathinfo = NULL; + gf_boolean_t is_local = _gf_false; + afr_private_t *priv = NULL; + int32_t child_index = -1; + + if (op_ret != 0) { + goto out; + } + + ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo); + if (ret != 0) { + goto out; + } + + ret = afr_local_pathinfo (pathinfo, &is_local); + if (ret) { + goto out; + } + + priv = this->private; + /* + * Note that one local subvolume will override another here. The only + * way to avoid that would be to retain extra information about whether + * the previous read_child is local, and it's just not worth it. Even + * the slowest local subvolume is far preferable to a remote one. + */ + if (is_local) { + child_index = (int32_t)(long)cookie; + gf_log (this->name, GF_LOG_INFO, + "selecting local read_child %s", + priv->children[child_index]->name); + priv->read_child = child_index; + } + +out: + STACK_DESTROY(frame->root); + return 0; +} + +static void +afr_attempt_local_discovery (xlator_t *this, int32_t child_index) +{ + call_frame_t *newframe = NULL; + loc_t tmploc = {0,}; + afr_private_t *priv = this->private; + + newframe = create_frame(this,this->ctx->pool); + if (!newframe) { + return; + } + + tmploc.gfid[sizeof(tmploc.gfid)-1] = 1; + STACK_WIND_COOKIE (newframe, afr_discovery_cbk, + (void *)(long)child_index, + priv->children[child_index], + priv->children[child_index]->fops->getxattr, + &tmploc, GF_XATTR_PATHINFO_KEY, NULL); +} + static void afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xattr, struct iatt *postparent) { + afr_private_t *priv = this->private; + if (local->success_count == 0) { if (local->op_errno != ESTALE) { local->op_ret = op_ret; @@ -2053,6 +2120,11 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind afr_lookup_cache_args (local, child_index, xattr, buf, postparent); + + if (local->do_discovery && (priv->read_child == (-1))) { + afr_attempt_local_discovery(this,child_index); + } + local->cont.lookup.success_children[local->success_count] = child_index; local->success_count++; } @@ -2214,8 +2286,6 @@ afr_lookup (call_frame_t *frame, xlator_t *this, /* By default assume ENOTCONN. On success it will be set to 0. */ local->op_errno = ENOTCONN; - local->call_count = afr_up_children_count (local->child_up, - priv->child_count); ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc, &gfid_req); if (ret) { @@ -2225,6 +2295,12 @@ afr_lookup (call_frame_t *frame, xlator_t *this, afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req, &local->loc); local->fop = GF_FOP_LOOKUP; + if (priv->choose_local && !priv->did_discovery) { + if (__is_root_gfid(gfid_req)) { + local->do_discovery = _gf_true; + priv->did_discovery = _gf_true; + } + } for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND_COOKIE (frame, afr_lookup_cbk, @@ -3626,6 +3702,14 @@ afr_notify (xlator_t *this, int32_t event, if (!priv) return 0; + /* + * We need to reset this in case children come up in "staggered" + * fashion, so that we discover a late-arriving local subvolume. Note + * that we could end up issuing N lookups to the first subvolume, and + * O(N^2) overall, but N is small for AFR so it shouldn't be an issue. + */ + priv->did_discovery = _gf_false; + had_heard_from_all = 1; for (i = 0; i < priv->child_count; i++) { if (!priv->last_event[i]) { -- cgit