summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAshish Pandey <aspandey@redhat.com>2018-10-18 17:15:58 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2018-11-08 19:05:46 +0000
commitd1b0de23c1fdc44a4ef661a8bb03c27228045634 (patch)
treee89afd63d44d1a74ffbf1a1a7fbc67f24b97d90b
parentb651cb3e612214c8fdb434af2b95ca0c018b5c36 (diff)
cluster/afr : Check for UP bricks before starting heal
Problem: Currently for replica volume, even if only one brick is UP SHD will keep crawling index entries even if it can not heal anything. In thin-arbiter volume which is also a replica 2 volume, this causes inode lock contention which in turn sends upcall to all the clients to release notify locks, even if it can not do anything for healing. This will slow down the client performance and kills the purpose of keeping in memory information about bad brick. Solution: Before starting heal or even crawling, check if sufficient number of children are UP and available to check and heal entries. (cherry picked from commit f73b4476b15f9d6d3dc3c8e20c9742aacd857f9f) Change-Id: I011c9da3b37cae275f791affd56b8f1c1ac9255d updates: bz#1644645 Signed-off-by: Ashish Pandey <aspandey@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c15
-rw-r--r--xlators/cluster/afr/src/afr.h3
3 files changed, 19 insertions, 1 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 51d611d503a..0971104cc10 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4919,7 +4919,7 @@ find_child_index(xlator_t *this, xlator_t *child)
return i;
}
-static int
+int
__afr_get_up_children_count(afr_private_t *priv)
{
int up_children = 0;
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 0cf01a041b4..72b7d0d56ba 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -716,6 +716,18 @@ out:
return;
}
+gf_boolean_t
+afr_bricks_available_for_heal(afr_private_t *priv)
+{
+ int up_children = 0;
+
+ up_children = __afr_get_up_children_count(priv);
+ if (up_children < 2) {
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
void *
afr_shd_index_healer(void *data)
{
@@ -735,6 +747,9 @@ afr_shd_index_healer(void *data)
for (;;) {
afr_shd_healer_wait(healer);
+ if (!afr_bricks_available_for_heal(priv))
+ continue;
+
ASSERT_LOCAL(this, healer);
priv->local[healer->subvol] = healer->local;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index cc4bceef521..3d2c1950571 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1284,6 +1284,9 @@ afr_ta_post_op_unlock(xlator_t *this, loc_t *loc);
gf_boolean_t
afr_is_pending_set(xlator_t *this, dict_t *xdata, int type);
+int
+__afr_get_up_children_count(afr_private_t *priv);
+
call_frame_t *
afr_ta_frame_create(xlator_t *this);
#endif /* __AFR_H__ */