cluster/ec: Don't trigger data/metadata heal on Lookups

Problem-1 If Lookup which doesn't take any locks observes version mismatch it can't be trusted. If we launch a heal based on this information it will lead to self-heals which will affect I/O performance in the cases where Lookup is wrong. Considering self-heal-daemon and operations on the inode from client which take locks can still trigger heal we can choose to not attempt a heal on Lookup. Problem-2: Fixed spurious failure of tests/bitrot/bug-1373520.t For the issues above, what was happening was that ec_heal_inspect() is preventing 'name' heal to happen Problem-3: tests/basic/ec/ec-background-heals.t To be honest I don't know what the problem was, while fixing the 2 problems above, I made some changes to ec_heal_inspect() and ec_need_heal() after which when I tried to recreate the spurious failure it just didn't happen even after a long time. BUG: 1414287 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Change-Id: Ife2535e1d0b267712973673f6d474e288f3c6834 Reviewed-on: https://review.gluster.org/16468 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Xavier Hernandez <xhernandez@datalab.es> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Ashish Pandey <aspandey@redhat.com>
author: Pranith Kumar K <pkarampu@redhat.com> 2017-01-25 15:31:44 +0530
committer: Pranith Kumar Karampuri <pkarampu@redhat.com> 2017-02-26 22:06:55 -0500
commit: c1fc1fc9cb5a13e6ddf8c9270deb0c7609333540 (patch)
tree: a3876aa8a0c1b087429ba916c9380b90bcda6b72 /xlators/cluster/ec/src/ec-common.c
parent: 4638dfc1fee80f9338f2941f3cccb17bec63989a (diff)
1 files changed, 52 insertions, 14 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 5e1efe381d4..823922542a0 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -82,10 +82,50 @@ int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this,
     return 0;
 }
 
+static uintptr_t
+ec_fop_needs_name_heal (ec_fop_data_t *fop)
+{
+        ec_t          *ec         = NULL;
+        ec_cbk_data_t *cbk        = NULL;
+        ec_cbk_data_t *enoent_cbk = NULL;
+
+        ec = fop->xl->private;
+        if (fop->id != GF_FOP_LOOKUP)
+                return 0;
+
+        if (!fop->loc[0].name || strlen (fop->loc[0].name) == 0)
+                return 0;
+
+        list_for_each_entry(cbk, &fop->cbk_list, list)
+        {
+                if (cbk->op_ret < 0 && cbk->op_errno == ENOENT) {
+                        enoent_cbk = cbk;
+                        break;
+                }
+        }
+
+        if (!enoent_cbk)
+                return 0;
+
+        return ec->xl_up & ~enoent_cbk->mask;
+}
+
 int32_t ec_fop_needs_heal(ec_fop_data_t *fop)
 {
     ec_t *ec = fop->xl->private;
 
+    if (fop->lock_count == 0) {
+    /*
+     * if fop->lock_count is zero that means it saw version mismatch
+     * without any locks so it can't be trusted. If we launch a heal
+     * based on this it will lead to INODELKs which will affect I/O
+     * performance. Considering self-heal-daemon and operations on
+     * the inode from client which take locks can still trigger the
+     * heal we can choose to not attempt a heal when fop->lock_count
+     * is zero.
+     */
+            return 0;
+    }
     return (ec->xl_up & ~(fop->remaining | fop->good)) != 0;
 }
 
@@ -95,7 +135,7 @@ void ec_check_status(ec_fop_data_t * fop)
     int32_t partial = 0;
     char str1[32], str2[32], str3[32], str4[32], str5[32];
 
-    if (!ec_fop_needs_heal(fop)) {
+    if (!ec_fop_needs_name_heal (fop) && !ec_fop_needs_heal(fop)) {
         return;
     }
 
@@ -108,19 +148,17 @@ void ec_check_status(ec_fop_data_t * fop)
         }
     }
 
-    if (fop->lock_count > 0) {
-            gf_msg (fop->xl->name, GF_LOG_WARNING, 0,
-                    EC_MSG_OP_FAIL_ON_SUBVOLS,
-                    "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
-                    "remaining=%s, good=%s, bad=%s)",
-                    gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
-                    ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
-                    ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
-                    ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
-                    ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
-                    ec_bin(str5, sizeof(str5),
-                    ec->xl_up & ~(fop->remaining | fop->good), ec->nodes));
-    }
+    gf_msg (fop->xl->name, GF_LOG_WARNING, 0,
+            EC_MSG_OP_FAIL_ON_SUBVOLS,
+            "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+            "remaining=%s, good=%s, bad=%s)",
+            gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+            ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+            ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+            ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+            ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+            ec_bin(str5, sizeof(str5),
+            ec->xl_up & ~(fop->remaining | fop->good), ec->nodes));
     if (fop->use_fd)
     {
         if (fop->fd != NULL) {
author	Pranith Kumar K <pkarampu@redhat.com>	2017-01-25 15:31:44 +0530
committer	Pranith Kumar Karampuri <pkarampu@redhat.com>	2017-02-26 22:06:55 -0500
commit	c1fc1fc9cb5a13e6ddf8c9270deb0c7609333540 (patch)
tree	a3876aa8a0c1b087429ba916c9380b90bcda6b72 /xlators/cluster/ec/src/ec-common.c
parent	4638dfc1fee80f9338f2941f3cccb17bec63989a (diff)