From b8a079b17f5aeb659ca35b9c7ec4b0ac7dc80406 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Mon, 13 Jul 2015 00:53:20 +0530
Subject: cluster/ec: Prevent data corruptions

- On lock reuse preserve 'healing' bits
- Don't set ctx->size outside locks in healing code
- Allow xattrop internal fops also on the fop->mask.

 >Change-Id: I6b76da5d7ebe367d8f3552cbf9fd18e556f2a171
 >BUG: 1232678
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/11640
 >Tested-by: NetBSD Build System <jenkins@build.gluster.org>
 >Tested-by: Gluster Build System <jenkins@build.gluster.com>
 >Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>

BUG: 1243647
Change-Id: I1b3828e4d4a863b84b2c4e732e7965d1302cea47
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/11686
Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
---
 xlators/cluster/ec/src/ec-common.c | 23 ++++++++++++++++++++---
 xlators/cluster/ec/src/ec-data.h   |  1 +
 xlators/cluster/ec/src/ec-heal.c   | 22 +++++++++++-----------
 3 files changed, 32 insertions(+), 14 deletions(-)

(limited to 'xlators/cluster/ec/src')

diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 26c62deea77..18770f259a4 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -401,6 +401,18 @@ ec_must_wind (ec_fop_data_t *fop)
         return _gf_false;
 }
 
+static gf_boolean_t
+ec_internal_op (ec_fop_data_t *fop)
+{
+        if (ec_must_wind (fop))
+                return _gf_true;
+        if (fop->id == GF_FOP_XATTROP)
+                return _gf_true;
+        if (fop->id == GF_FOP_FXATTROP)
+                return _gf_true;
+        return _gf_false;
+}
+
 int32_t ec_child_select(ec_fop_data_t * fop)
 {
     ec_t * ec = fop->xl->private;
@@ -413,8 +425,9 @@ int32_t ec_child_select(ec_fop_data_t * fop)
     /* Wind the fop on same subvols as parent for any internal extra fops like
      * head/tail read in case of writev fop. Unlocks shouldn't do this because
      * unlock should go on all subvols where lock is performed*/
-    if (fop->parent && !ec_must_wind (fop))
+    if (fop->parent && !ec_internal_op (fop)) {
             fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+    }
 
     mask = ec->xl_up;
     if (fop->parent == NULL)
@@ -975,6 +988,7 @@ out:
         parent->mask &= fop->good;
 
         /*As of now only data healing marks bricks as healing*/
+        lock->healing |= fop->healing;
         if (ec_is_data_fop (parent->id)) {
             parent->healing |= fop->healing;
         }
@@ -999,9 +1013,13 @@ void ec_get_size_version(ec_lock_link_t *link)
 
     lock = link->lock;
     ctx = lock->ctx;
+    fop = link->fop;
 
     /* If ec metadata has already been retrieved, do not try again. */
     if (ctx->have_info) {
+        if (ec_is_data_fop (fop->id)) {
+            fop->healing |= lock->healing;
+        }
         return;
     }
 
@@ -1011,8 +1029,6 @@ void ec_get_size_version(ec_lock_link_t *link)
         return;
     }
 
-    fop = link->fop;
-
     uid = fop->frame->root->uid;
     gid = fop->frame->root->gid;
 
@@ -1277,6 +1293,7 @@ int32_t ec_locked(call_frame_t *frame, void *cookie, xlator_t *this,
         link = fop->data;
         lock = link->lock;
         lock->mask = lock->good_mask = fop->good;
+        lock->healing = 0;
 
         ec_lock_acquired(link);
         ec_lock(fop->parent);
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index e409e5b0e77..18da06cea04 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -146,6 +146,7 @@ struct _ec_lock
                                    the next unlock/lock cycle. */
     uintptr_t          mask;
     uintptr_t          good_mask;
+    uintptr_t          healing;
     int32_t            refs;
     int32_t            refs_frozen;
     int32_t            inserted;
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 6f82203b66c..a7c97a54ce7 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -256,10 +256,13 @@ int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
     }
     else
     {
-            gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "
-                    "to heal block at %"PRIu64,
-                    uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),
-                    heal->offset);
+        if (op_ret < 0) {
+                gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "
+                        "to heal block at %"PRIu64,
+                        uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),
+                        heal->offset);
+                heal->bad = 0;
+        }
         heal->done = 1;
     }
 
@@ -1709,10 +1712,7 @@ ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)
     case EC_STATE_HEAL_DATA_UNLOCK:
         ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0);
 
-        if (state < 0)
-                return -EC_STATE_REPORT;
-        else
-                return EC_STATE_REPORT;
+         return EC_STATE_REPORT;
 
     case EC_STATE_REPORT:
         if (fop->cbks.heal) {
@@ -1728,7 +1728,7 @@ ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)
                             EIO, 0, 0, 0, NULL);
         }
 
-        return -EC_STATE_END;
+        return EC_STATE_END;
     default:
         gf_msg (fop->xl->name, GF_LOG_ERROR, 0,
                 EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
@@ -1759,8 +1759,6 @@ ec_heal_block (call_frame_t *frame, xlator_t *this, uintptr_t target,
     if (fop == NULL)
         goto out;
 
-    GF_ASSERT(ec_set_inode_size(fop, heal->fd->inode, heal->total_size));
-
     error = 0;
 
 out:
@@ -1834,6 +1832,8 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
                         break;
 
         }
+        memset (healed_sinks, 0, ec->nodes);
+        ec_mask_to_char_array (heal->bad, healed_sinks, ec->nodes);
         fd_unref (heal->fd);
         LOCK_DESTROY (&heal->lock);
         syncbarrier_destroy (heal->data);
-- 
cgit