From 59ba78ae1461651e290ce72013786d828545d4c1 Mon Sep 17 00:00:00 2001 From: Anuradha Date: Mon, 5 Jan 2015 16:37:07 +0530 Subject: afr : glfs-heal implementation Backport of http://review.gluster.org/6529 and http://review.gluster.org/9119 Change-Id: Ie420efcb399b5119c61f448b421979c228b27b15 BUG: 1173528 Signed-off-by: Anuradha Reviewed-on: http://review.gluster.org/9335 Reviewed-by: Ravishankar N Tested-by: Gluster Build System Reviewed-by: Raghavendra Bhat --- xlators/cluster/afr/src/afr-common.c | 313 ++++++++++++++++++++++- xlators/cluster/afr/src/afr-inode-read.c | 5 + xlators/cluster/afr/src/afr-self-heal-common.c | 21 +- xlators/cluster/afr/src/afr-self-heal-data.c | 24 +- xlators/cluster/afr/src/afr-self-heal-entry.c | 25 +- xlators/cluster/afr/src/afr-self-heal-metadata.c | 9 +- xlators/cluster/afr/src/afr-self-heal.h | 29 +++ xlators/cluster/afr/src/afr-self-heald.c | 2 - xlators/cluster/afr/src/afr-self-heald.h | 6 + xlators/cluster/afr/src/afr.h | 4 + 10 files changed, 400 insertions(+), 38 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 1527a47f716..aefad8be959 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -494,7 +494,6 @@ afr_selfheal_enabled (xlator_t *this) } - int afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) { @@ -4032,3 +4031,315 @@ afr_mark_pending_changelog (afr_private_t *priv, unsigned char *pending, out: return changelog; } + +gf_boolean_t +afr_decide_heal_info (afr_private_t *priv, unsigned char *sources, int ret) +{ + int sources_count = 0; + + if (ret) + goto out; + + sources_count = AFR_COUNT (sources, priv->child_count); + if (sources_count == priv->child_count) + return _gf_false; +out: + return _gf_true; +} + +int +afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, + inode_t *inode, gf_boolean_t *msh) +{ + int ret = -1; + unsigned char *locked_on = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + struct afr_reply *locked_replies = NULL; + + afr_private_t *priv = this->private; + + locked_on = alloca0 (priv->child_count); + sources = alloca0 (priv->child_count); + sinks = alloca0 (priv->child_count); + healed_sinks = alloca0 (priv->child_count); + + locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + + ret = afr_selfheal_inodelk (frame, this, inode, this->name, + LLONG_MAX - 1, 0, locked_on); + { + if (ret == 0) { + /* Not a single lock */ + ret = -afr_final_errno (frame->local, priv); + if (ret == 0) + ret = -ENOTCONN;/* all invalid responses */ + goto out; + } + ret = __afr_selfheal_metadata_prepare (frame, this, inode, + locked_on, sources, + sinks, healed_sinks, + locked_replies); + *msh = afr_decide_heal_info (priv, sources, ret); + } + afr_selfheal_uninodelk (frame, this, inode, this->name, + LLONG_MAX - 1, 0, locked_on); +out: + if (locked_replies) + afr_replies_wipe (locked_replies, priv->child_count); + return ret; +} + +int +afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, + inode_t *inode, gf_boolean_t *dsh) +{ + int ret = -1; + afr_private_t *priv = NULL; + unsigned char *locked_on = NULL; + unsigned char *data_lock = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + struct afr_reply *locked_replies = NULL; + + priv = this->private; + locked_on = alloca0 (priv->child_count); + data_lock = alloca0 (priv->child_count); + sources = alloca0 (priv->child_count); + sinks = alloca0 (priv->child_count); + healed_sinks = alloca0 (priv->child_count); + + locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + + ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, + 0, 0, locked_on); + { + if (ret == 0) { + ret = -afr_final_errno (frame->local, priv); + if (ret == 0) + ret = -ENOTCONN;/* all invalid responses */ + goto out; + } + ret = afr_selfheal_inodelk (frame, this, inode, this->name, + 0, 0, data_lock); + { + if (ret == 0) { + ret = -afr_final_errno (frame->local, priv); + if (ret == 0) + ret = -ENOTCONN; + /* all invalid responses */ + goto unlock; + } + ret = __afr_selfheal_data_prepare (frame, this, inode, + data_lock, sources, + sinks, healed_sinks, + locked_replies); + *dsh = afr_decide_heal_info (priv, sources, ret); + } + afr_selfheal_uninodelk (frame, this, inode, this->name, 0, 0, + data_lock); + } +unlock: + afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0, + locked_on); +out: + if (locked_replies) + afr_replies_wipe (locked_replies, priv->child_count); + return ret; +} + +int +afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this, + inode_t *inode, + gf_boolean_t *esh) +{ + int ret = -1; + int source = -1; + afr_private_t *priv = NULL; + unsigned char *locked_on = NULL; + unsigned char *data_lock = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + struct afr_reply *locked_replies = NULL; + + priv = this->private; + locked_on = alloca0 (priv->child_count); + data_lock = alloca0 (priv->child_count); + sources = alloca0 (priv->child_count); + sinks = alloca0 (priv->child_count); + healed_sinks = alloca0 (priv->child_count); + + locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + + ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, + NULL, locked_on); + { + if (ret == 0) { + ret = -afr_final_errno (frame->local, priv); + if (ret == 0) + ret = -ENOTCONN;/* all invalid responses */ + goto out; + } + + ret = afr_selfheal_entrylk (frame, this, inode, this->name, + NULL, data_lock); + { + if (ret == 0) { + ret = -afr_final_errno (frame->local, priv); + if (ret == 0) + ret = -ENOTCONN; + /* all invalid responses */ + goto unlock; + } + ret = __afr_selfheal_entry_prepare (frame, this, inode, + data_lock, sources, + sinks, healed_sinks, + locked_replies, + &source); + if ((ret == 0) && source < 0) + ret = -EIO; + *esh = afr_decide_heal_info (priv, sources, ret); + } + afr_selfheal_unentrylk (frame, this, inode, this->name, NULL, + data_lock); + } +unlock: + afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL, + locked_on); +out: + if (locked_replies) + afr_replies_wipe (locked_replies, priv->child_count); + return ret; +} + +int +afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid, + inode_t **inode, + gf_boolean_t *entry_selfheal, + gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal) + +{ + int ret = -1; + gf_boolean_t dsh = _gf_false; + gf_boolean_t msh = _gf_false; + gf_boolean_t esh = _gf_false; + + ret = afr_selfheal_unlocked_inspect (frame, this, gfid, inode, + &dsh, &msh, &esh); + if (ret) + goto out; + + /* For every heal type hold locks and check if it indeed needs heal */ + + if (msh) { + ret = afr_selfheal_locked_metadata_inspect (frame, this, + *inode, &msh); + if (ret == -EIO) + goto out; + } + + if (dsh) { + ret = afr_selfheal_locked_data_inspect (frame, this, *inode, + &dsh); + if (ret == -EIO || (ret == -EAGAIN)) + goto out; + } + + if (esh) { + ret = afr_selfheal_locked_entry_inspect (frame, this, *inode, + &esh); + } + +out: + *data_selfheal = dsh; + *entry_selfheal = esh; + *metadata_selfheal = msh; + return ret; +} + +dict_t* +afr_set_heal_info (char *status) +{ + dict_t *dict = NULL; + int ret = -1; + + dict = dict_new (); + if (!dict) { + ret = -ENOMEM; + goto out; + } + + if (!strcmp (status, "heal")) { + ret = dict_set_str (dict, "heal-info", "heal"); + if (ret) + gf_log ("", GF_LOG_WARNING, + "Failed to set heal-info key to" + "heal"); + } else if (!strcmp (status, "split-brain")) { + ret = dict_set_str (dict, "heal-info", "split-brain"); + if (ret) + gf_log ("", GF_LOG_WARNING, + "Failed to set heal-info key to" + "split-brain"); + } else if (!strcmp (status, "possibly-healing")) { + ret = dict_set_str (dict, "heal-info", "possibly-healing"); + if (ret) + gf_log ("", GF_LOG_WARNING, + "Failed to set heal-info key to" + "possibly-healing"); + } +out: + return dict; +} + +int +afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + gf_boolean_t data_selfheal = _gf_false; + gf_boolean_t metadata_selfheal = _gf_false; + gf_boolean_t entry_selfheal = _gf_false; + dict_t *dict = NULL; + int ret = -1; + int op_errno = 0; + inode_t *inode = NULL; + + ret = afr_selfheal_locked_inspect (frame, this, loc->gfid, &inode, + &entry_selfheal, + &data_selfheal, &metadata_selfheal); + + if (ret == -ENOMEM) { + op_errno = -ret; + ret = -1; + goto out; + } + + if (ret == -EIO) { + dict = afr_set_heal_info ("split-brain"); + } else if (ret == -EAGAIN) { + dict = afr_set_heal_info ("possibly-healing"); + } else if (ret == 0) { + if (!data_selfheal && !entry_selfheal && + !metadata_selfheal) { + dict = afr_set_heal_info ("no-heal"); + } else { + dict = afr_set_heal_info ("heal"); + } + } else if (ret < 0) { + if (data_selfheal || entry_selfheal || + metadata_selfheal) { + dict = afr_set_heal_info ("heal"); + } + } + ret = 0; + +out: + AFR_STACK_UNWIND (getxattr, frame, ret, op_errno, dict, NULL); + if (dict) + dict_unref (dict); + return ret; +} diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 4cb219246f7..210d710a2b3 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1373,6 +1373,11 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, return 0; } + if (!strcmp (name, GF_AFR_HEAL_INFO)) { + afr_get_heal_info (frame, this, loc, xdata); + return 0; + } + /* * if we are doing getxattr with pathinfo as the key then we * collect information from all childs diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 14a514beffa..6198d4cf72c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -932,13 +932,14 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, if (replies[i].op_ret == -1) continue; - if (afr_is_data_set (this, replies[i].xdata)) + if (data_selfheal && afr_is_data_set (this, replies[i].xdata)) *data_selfheal = _gf_true; - if (afr_is_metadata_set (this, replies[i].xdata)) + if (metadata_selfheal && + afr_is_metadata_set (this, replies[i].xdata)) *metadata_selfheal = _gf_true; - if (afr_is_entry_set (this, replies[i].xdata)) + if (entry_selfheal && afr_is_entry_set (this, replies[i].xdata)) *entry_selfheal = _gf_true; valid_cnt ++; @@ -967,7 +968,8 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, priv->children[i]->name, uuid_utoa (replies[i].poststat.ia_gfid)); - *metadata_selfheal = _gf_true; + if (metadata_selfheal) + *metadata_selfheal = _gf_true; } if (!IA_EQUAL (first, replies[i].poststat, gid)) { @@ -978,7 +980,8 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, priv->children[i]->name, uuid_utoa (replies[i].poststat.ia_gfid)); - *metadata_selfheal = _gf_true; + if (metadata_selfheal) + *metadata_selfheal = _gf_true; } if (!IA_EQUAL (first, replies[i].poststat, prot)) { @@ -989,7 +992,8 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, priv->children[i]->name, uuid_utoa (replies[i].poststat.ia_gfid)); - *metadata_selfheal = _gf_true; + if (metadata_selfheal) + *metadata_selfheal = _gf_true; } if (IA_ISREG(first.ia_type) && @@ -1001,11 +1005,12 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, priv->children[i]->name, uuid_utoa (replies[i].poststat.ia_gfid)); - *data_selfheal = _gf_true; + if (data_selfheal) + *data_selfheal = _gf_true; } } - if (valid_cnt > 0) { + if (valid_cnt > 0 && link_inode) { *link_inode = afr_inode_link (inode, &first); if (!*link_inode) { ret = -EINVAL; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index f7503faa719..a434b9e6ba1 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -569,10 +569,11 @@ out: * The return value is the index of the subvolume to be used as the source * for self-healing, or -1 if no healing is necessary/split brain. */ -static int -__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, - unsigned char *locked_on, unsigned char *sources, - unsigned char *sinks, unsigned char *healed_sinks, +int +__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *locked_on, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, struct afr_reply *replies) { int ret = -1; @@ -582,10 +583,11 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; - ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid, + ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid, replies); - if (ret) - return ret; + + if (ret) + return ret; witness = alloca0(priv->child_count * sizeof (*witness)); ret = afr_selfheal_find_direction (frame, this, replies, @@ -650,8 +652,9 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, goto unlock; } - ret = __afr_selfheal_data_prepare (frame, this, fd, data_lock, - sources, sinks, healed_sinks, + ret = __afr_selfheal_data_prepare (frame, this, fd->inode, + data_lock, sources, sinks, + healed_sinks, locked_replies); if (ret < 0) goto unlock; @@ -678,7 +681,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, unlock: afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0, data_lock); - if (ret < 0) + if (ret < 0) goto out; ret = afr_selfheal_data_do (frame, this, fd, source, healed_sinks, @@ -731,7 +734,6 @@ afr_selfheal_data_open (xlator_t *this, inode_t *inode) return fd; } - int afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode) { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 4b513ffc73d..6af9488f9a4 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -366,11 +366,11 @@ __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources, return source; } - -static int -__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, - unsigned char *locked_on, unsigned char *sources, - unsigned char *sinks, unsigned char *healed_sinks, +int +__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *locked_on, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, struct afr_reply *replies, int *source_p) { int ret = -1; @@ -380,10 +380,10 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; - ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid, + ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid, replies); - if (ret) - return ret; + if (ret) + return ret; witness = alloca0 (sizeof (*witness) * priv->child_count); ret = afr_selfheal_find_direction (frame, this, replies, @@ -416,7 +416,6 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, return ret; } - static int afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, char *name) @@ -454,7 +453,8 @@ afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, goto unlock; } - ret = __afr_selfheal_entry_prepare (frame, this, fd, locked_on, + ret = __afr_selfheal_entry_prepare (frame, this, fd->inode, + locked_on, sources, sinks, healed_sinks, par_replies, &source); @@ -602,8 +602,9 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, goto unlock; } - ret = __afr_selfheal_entry_prepare (frame, this, fd, data_lock, - sources, sinks, healed_sinks, + ret = __afr_selfheal_entry_prepare (frame, this, fd->inode, + data_lock, sources, sinks, + healed_sinks, locked_replies, &source); } unlock: diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 87600df3bad..7c0d5cb08c6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -294,7 +294,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, return source; } -static int + +int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, @@ -310,11 +311,11 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i ret = afr_selfheal_unlocked_discover (frame, inode, inode->gfid, replies); - if (ret) - return ret; + if (ret) + return ret; witness = alloca0 (sizeof (*witness) * priv->child_count); - ret = afr_selfheal_find_direction (frame, this, replies, + ret = afr_selfheal_find_direction (frame, this, replies, AFR_METADATA_TRANSACTION, locked_on, sources, sinks, witness); if (ret) diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index f208e6bc813..50cff91ccb3 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -198,4 +198,33 @@ afr_mark_active_sinks (xlator_t *this, unsigned char *sources, gf_boolean_t afr_does_witness_exist (xlator_t *this, uint64_t *witness); + +int +__afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *locked_on, + unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, + struct afr_reply *replies); + +int +__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *locked_on, + unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + struct afr_reply *replies); +int +__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, + inode_t *inode, unsigned char *locked_on, + unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + struct afr_reply *replies, int *source_p); + +int +afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, + uuid_t gfid, inode_t **link_inode, + gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, + gf_boolean_t *entry_selfheal); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 6ce52aad1c4..29f34107481 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -40,8 +40,6 @@ #define NTH_INDEX_HEALER(this, n) &((((afr_private_t *)this->private))->shd.index_healers[n]) #define NTH_FULL_HEALER(this, n) &((((afr_private_t *)this->private))->shd.full_healers[n]) -int afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); - char * afr_subvol_name (xlator_t *this, int subvol) { diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 59f06b79cff..02b26b8061f 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -67,4 +67,10 @@ afr_selfheal_daemon_init (xlator_t *this); int afr_xl_op (xlator_t *this, dict_t *input, dict_t *output); +int +afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, + char **path_p); + +int +afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name); #endif /* !_AFR_SELF_HEALD_H */ diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 51e57e8207f..7e138c54ec0 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1011,4 +1011,8 @@ afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2); gf_boolean_t afr_is_xattr_ignorable (char *key); + +int +afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata); #endif /* __AFR_H__ */ -- cgit