From 0a50167c0a8f950f5a1c76442b6c9abea466200d Mon Sep 17 00:00:00 2001 From: karthik-us Date: Wed, 19 Apr 2017 18:04:46 +0530 Subject: cluster/afr: Return the list of node_uuids for the subvolume Problem: AFR was returning the node uuid of the first node for every file if the replica set was healthy, which was resulting in only one node migrating all the files. Fix: With this patch AFR returns the list of node_uuids to the upper layer, so that they can decide on which node to migrate which files, resulting in improved performance. Ordering of node uuids will be maintained based on the ordering of the bricks. If a brick is down, then the node uuid for that will be set to all zeros. Change-Id: I73ee0f9898ae473584fdf487a2980d7a6db22f31 BUG: 1366817 Signed-off-by: karthik-us Reviewed-on: https://review.gluster.org/17084 Reviewed-by: Pranith Kumar Karampuri Tested-by: Pranith Kumar Karampuri Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System --- xlators/cluster/afr/src/afr-common.c | 49 +++++++++++ xlators/cluster/afr/src/afr-inode-read.c | 141 ++++++++++++++++++++----------- xlators/cluster/afr/src/afr.h | 5 ++ 3 files changed, 145 insertions(+), 50 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 34d9e56911e..2377419f01c 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -6291,3 +6291,52 @@ mdata_unlock: return ret; } + +/* + * Concatenates the xattrs in local->replies separated by a delimiter. + */ +int +afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this, + char *buf, const char *default_str, + int32_t *serz_len, char delimiter) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + char *xattr = NULL; + int i = 0; + int len = 0; + int ret = -1; + + priv = this->private; + local = frame->local; + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid || local->replies[i].op_ret) { + buf = strncat (buf, default_str, strlen (default_str)); + len += strlen (default_str); + buf[len++] = delimiter; + buf[len] = '\0'; + } else { + ret = dict_get_str (local->replies[i].xattr, + local->cont.getxattr.name, &xattr); + if (ret) { + gf_msg ("TEST", GF_LOG_ERROR, -ret, + AFR_MSG_DICT_GET_FAILED, + "Failed to get the node_uuid of brick " + "%d", i); + goto out; + } + buf = strncat (buf, xattr, strlen (xattr)); + len += strlen (xattr); + buf[len++] = delimiter; + buf[len] = '\0'; + } + } + buf[--len] = '\0'; /*remove the last delimiter*/ + if (serz_len) + *serz_len = ++len; + ret = 0; + +out: + return ret; +} diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 2b369ca3c68..20446d88c99 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -713,57 +713,110 @@ unwind: return ret; } + /** - * node-uuid cbk uses next child querying mechanism + * node-uuid cbk returns the list of node_uuids for the subvolume. */ int32_t -afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - xlator_t **children = NULL; - int unwind = 1; - int curr_call_child = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t callcnt = 0; + int ret = 0; + char *xattr_serz = NULL; + long cky = 0; + int32_t tlen = 0; + local = frame->local; priv = this->private; - children = priv->children; + cky = (long) cookie; - local = frame->local; + LOCK (&frame->lock); + { + callcnt = --local->call_count; + local->replies[cky].valid = 1; + local->replies[cky].op_ret = op_ret; + local->replies[cky].op_errno = op_errno; - if (op_ret == -1) { /** query the _next_ child */ + if (op_ret < 0) + goto unlock; - /** - * _current_ becomes _next_ - * If done with all childs and yet no success; give up ! - */ - curr_call_child = (int) ((long)cookie); - if (++curr_call_child == priv->child_count) - goto unwind; + local->op_ret = 0; - gf_msg_debug (this->name, op_errno, - "op_ret (-1): Re-querying afr-child (%d/%d)", - curr_call_child, priv->child_count); - - unwind = 0; - STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk, - (void *) (long) curr_call_child, - children[curr_call_child], - children[curr_call_child]->fops->getxattr, - &local->loc, - local->cont.getxattr.name, - NULL); + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref (xdata); + local->replies[cky].xattr = dict_ref (dict); } - unwind: - if (unwind) - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, - NULL); +unlock: + UNLOCK (&frame->lock); - return 0; + if (!callcnt) { + + if (local->op_ret != 0) { + /* All bricks gave an error. */ + local->op_errno = afr_final_errno (local, priv); + goto unwind; + } + + /*Since we store the UUID0_STR as node uuid for down bricks and + *for non zero op_ret, assigning length to priv->child_count + *number of uuids*/ + local->cont.getxattr.xattr_len = (strlen (UUID0_STR) + 2) * + priv->child_count; + + if (!local->dict) + local->dict = dict_new (); + if (!local->dict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + + xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len, + sizeof (char), gf_common_mt_char); + + if (!xattr_serz) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + + ret = afr_serialize_xattrs_with_delimiter (frame, this, + xattr_serz, + UUID0_STR, &tlen, + ' '); + if (ret) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + ret = dict_set_dynstr (local->dict, local->cont.getxattr.name, + xattr_serz); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, + -ret, AFR_MSG_DICT_SET_FAILED, + "Cannot set node_uuid key in dict"); + local->op_ret = -1; + local->op_errno = ENOMEM; + } else { + local->op_ret = local->cont.getxattr.xattr_len - 1; + local->op_errno = 0; + } + +unwind: + AFR_STACK_UNWIND (getxattr, frame, local->op_ret, + local->op_errno, local->dict, + local->xdata_rsp); + } + + return ret; } + int32_t afr_getxattr_quota_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @@ -1374,6 +1427,8 @@ afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk, *cbk = afr_common_getxattr_stime_cbk; } else if (strcmp (name, QUOTA_SIZE_KEY) == 0) { *cbk = afr_getxattr_quota_size_cbk; + } else if (!strcmp (name, GF_XATTR_NODE_UUID_KEY)) { + *cbk = afr_getxattr_node_uuid_cbk; } else { is_spl = _gf_false; } @@ -1489,9 +1544,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { afr_private_t *priv = NULL; - xlator_t **children = NULL; afr_local_t *local = NULL; - int i = 0; int32_t op_errno = 0; int ret = -1; fop_getxattr_cbk_t cbk = NULL; @@ -1503,8 +1556,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, priv = this->private; - children = priv->children; - loc_copy (&local->loc, loc); local->op = GF_FOP_GETXATTR; @@ -1545,16 +1596,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, return 0; } - if (XATTR_IS_NODE_UUID (name)) { - i = 0; - STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk, - (void *) (long) i, - children[i], - children[i]->fops->getxattr, - loc, name, xdata); - return 0; - } - no_name: afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 3be15175dc7..3a47ba47241 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1259,4 +1259,9 @@ __afr_fd_ctx_get (fd_t *fd, xlator_t *this); gf_boolean_t afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this, int event_gen1, int event_gen2); + +int +afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this, + char *buf, const char *default_str, + int32_t *serz_len, char delimiter); #endif /* __AFR_H__ */ -- cgit