From fce5f68bc72d448490a0d41be494ac54a9181b3c Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 7 Nov 2019 15:18:30 +0530 Subject: afr: make heal info lockless Changes in locks xlator: Added support for per-domain inodelk count requests. Caller needs to set GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS key in the dict and then set each key with name 'GLUSTERFS_INODELK_DOM_PREFIX:'. In the response dict, the xlator will send the per domain count as values for each of these keys. Changes in AFR: Replaced afr_selfheal_locked_inspect() with afr_lockless_inspect(). Logic has been added to make the latter behave same as the former, thus not breaking the current heal info output behaviour. fixes: bz#1774011 Change-Id: I9ae08ce768b39aeb6ee230207b5b7fa744176952 Signed-off-by: Ravishankar N --- xlators/cluster/afr/src/afr-common.c | 203 +++++++++++++++++-------- xlators/cluster/afr/src/afr-self-heal-common.c | 40 ++++- xlators/cluster/afr/src/afr-self-heal.h | 3 +- 3 files changed, 178 insertions(+), 68 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index bc34a023d2f..7913dfe58bf 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -6882,68 +6882,6 @@ out: return ret; } -int -afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, - inode_t **inode, gf_boolean_t *entry_selfheal, - gf_boolean_t *data_selfheal, - gf_boolean_t *metadata_selfheal, - unsigned char *pending) - -{ - int ret = -1; - fd_t *fd = NULL; - gf_boolean_t dsh = _gf_false; - gf_boolean_t msh = _gf_false; - gf_boolean_t esh = _gf_false; - - ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh, - &esh); - if (ret) - goto out; - - /* For every heal type hold locks and check if it indeed needs heal */ - - /* Heal-info does an open() on the file being examined so that the - * current eager-lock holding client, if present, at some point sees - * open-fd count being > 1 and releases the eager-lock so that heal-info - * doesn't remain blocked forever until IO completes. - */ - if ((*inode)->ia_type == IA_IFREG) { - ret = afr_selfheal_data_open(this, *inode, &fd); - if (ret < 0) { - gf_msg_debug(this->name, -ret, "%s: Failed to open", - uuid_utoa((*inode)->gfid)); - goto out; - } - } - - if (msh) { - ret = afr_selfheal_locked_metadata_inspect(frame, this, *inode, &msh, - pending); - if (ret == -EIO) - goto out; - } - - if (dsh) { - ret = afr_selfheal_locked_data_inspect(frame, this, fd, &dsh, pending); - if (ret == -EIO || (ret == -EAGAIN)) - goto out; - } - - if (esh) { - ret = afr_selfheal_locked_entry_inspect(frame, this, *inode, &esh, - pending); - } - -out: - *data_selfheal = dsh; - *entry_selfheal = esh; - *metadata_selfheal = msh; - if (fd) - fd_unref(fd); - return ret; -} - static dict_t * afr_set_heal_info(char *status) { @@ -6975,6 +6913,132 @@ out: return dict; } +static int +afr_update_heal_status(xlator_t *this, struct afr_reply *replies, + char *index_vgfid, gf_boolean_t *esh, gf_boolean_t *dsh, + gf_boolean_t *msh) +{ + int ret = -1; + int i = 0; + int io_domain_lk_count = 0; + int shd_domain_lk_count = 0; + afr_private_t *priv = NULL; + char *key1 = NULL; + char *key2 = NULL; + + priv = this->private; + key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + + strlen(priv->sh_domain)); + key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + + strlen(priv->sh_domain)); + sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name); + sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain); + + for (i = 0; i < priv->child_count; i++) { + if ((replies[i].valid != 1) || (replies[i].op_ret != 0)) + continue; + if (!io_domain_lk_count) { + ret = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count); + } + if (!shd_domain_lk_count) { + ret = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count); + } + } + + if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) { + if (shd_domain_lk_count) { + ret = -EAGAIN; /*For 'possibly-healing'. */ + } else { + ret = 0; /*needs heal. Just set a non -ve value so that it is + assumed as the source index.*/ + } + } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) { + if ((!shd_domain_lk_count && !io_domain_lk_count) || + (!io_domain_lk_count && shd_domain_lk_count)) { + /* Needs heal. */ + ret = 0; + } else { + GF_ASSERT(io_domain_lk_count && !shd_domain_lk_count); + /* No heal needed. */ + *dsh = *esh = *msh = 0; + } + } + return ret; +} + +/*return EIO, EAGAIN or pending*/ +int +afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + inode_t **inode, char *index_vgfid, + gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, unsigned char *pending) +{ + int ret = -1; + int i = 0; + afr_private_t *priv = NULL; + struct afr_reply *replies = NULL; + gf_boolean_t dsh = _gf_false; + gf_boolean_t msh = _gf_false; + gf_boolean_t esh = _gf_false; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *valid_on = NULL; + uint64_t *witness = NULL; + + priv = this->private; + replies = alloca0(sizeof(*replies) * priv->child_count); + sources = alloca0(sizeof(*sources) * priv->child_count); + sinks = alloca0(sizeof(*sinks) * priv->child_count); + witness = alloca0(sizeof(*witness) * priv->child_count); + valid_on = alloca0(sizeof(*valid_on) * priv->child_count); + + ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh, + &esh, replies); + if (ret) + goto out; + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid && replies[i].op_ret == 0) { + valid_on[i] = 1; + } + } + if (msh) { + ret = afr_selfheal_find_direction(frame, this, replies, + AFR_METADATA_TRANSACTION, valid_on, + sources, sinks, witness, pending); + if (*pending & PFLAG_SBRAIN) + ret = -EIO; + if (ret) + goto out; + } + if (dsh) { + ret = afr_selfheal_find_direction(frame, this, replies, + AFR_DATA_TRANSACTION, valid_on, + sources, sinks, witness, pending); + if (*pending & PFLAG_SBRAIN) + ret = -EIO; + if (ret) + goto out; + } + if (esh) { + ret = afr_selfheal_find_direction(frame, this, replies, + AFR_ENTRY_TRANSACTION, valid_on, + sources, sinks, witness, pending); + if (*pending & PFLAG_SBRAIN) + ret = -EIO; + if (ret) + goto out; + } + + ret = afr_update_heal_status(this, replies, index_vgfid, &esh, &dsh, &msh); +out: + *data_selfheal = dsh; + *entry_selfheal = esh; + *metadata_selfheal = msh; + if (replies) + afr_replies_wipe(replies, priv->child_count); + return ret; +} + int afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) { @@ -6990,6 +7054,14 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) char *status = NULL; call_frame_t *heal_frame = NULL; afr_local_t *heal_local = NULL; + afr_local_t *local = NULL; + char *index_vgfid = NULL; + + local = frame->local; + if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) { + ret = -1; + goto out; + } /*Use frame with lk-owner set*/ heal_frame = afr_frame_create(frame->this, &op_errno); @@ -6999,9 +7071,10 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) } heal_local = heal_frame->local; heal_frame->local = frame->local; - ret = afr_selfheal_locked_inspect(heal_frame, this, loc->gfid, &inode, - &entry_selfheal, &data_selfheal, - &metadata_selfheal, &pending); + + ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode, index_vgfid, + &entry_selfheal, &data_selfheal, + &metadata_selfheal, &pending); if (ret == -ENOMEM) { ret = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index f06f7e603a0..781437bf74e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1831,6 +1831,34 @@ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent, return inode; } +static int +afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict) +{ + int ret = 0; + afr_private_t *priv = NULL; + char *key = NULL; + + priv = this->private; + key = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + + strlen(priv->sh_domain)); + + ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1); + if (ret) + return ret; + + sprintf(key, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name); + ret = dict_set_uint32(dict, key, 1); + if (ret) + return ret; + + sprintf(key, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain); + ret = dict_set_uint32(dict, key, 1); + if (ret) + return ret; + + return 0; +} + int afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, uuid_t gfid, struct afr_reply *replies, @@ -1857,6 +1885,11 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, return -ENOMEM; } + if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) { + dict_unref(xattr_req); + return -1; + } + loc.inode = inode_ref(inode); gf_uuid_copy(loc.gfid, gfid); @@ -2252,7 +2285,8 @@ int afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, inode_t **link_inode, gf_boolean_t *data_selfheal, gf_boolean_t *metadata_selfheal, - gf_boolean_t *entry_selfheal) + gf_boolean_t *entry_selfheal, + struct afr_reply *replies_dst) { afr_private_t *priv = NULL; inode_t *inode = NULL; @@ -2388,6 +2422,8 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, ret = 0; out: + if (replies && replies_dst) + afr_replies_copy(replies_dst, replies, priv->child_count); if (inode) inode_unref(inode); if (replies) @@ -2507,7 +2543,7 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid) ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode, &data_selfheal, &metadata_selfheal, - &entry_selfheal); + &entry_selfheal, NULL); if (ret) goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 55d94e20cea..5e7bde8689d 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -325,7 +325,8 @@ int afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, inode_t **link_inode, gf_boolean_t *data_selfheal, gf_boolean_t *metadata_selfheal, - gf_boolean_t *entry_selfheal); + gf_boolean_t *entry_selfheal, + struct afr_reply *replies); int afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid); -- cgit