summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2015-07-13 00:53:20 +0530
committerXavier Hernandez <xhernandez@datalab.es>2015-07-14 00:23:46 -0700
commit34e65c4b3aac3cbe80ec336c367b78b01376a7a3 (patch)
tree241aae56a057f404524d75bbda664abfe9eefd31
parentc4009f895818a84ed4c3bb8f826e78d17a6a5b3e (diff)
cluster/ec: Prevent data corruptions
- On lock reuse preserve 'healing' bits - Don't set ctx->size outside locks in healing code - Allow xattrop internal fops also on the fop->mask. Change-Id: I6b76da5d7ebe367d8f3552cbf9fd18e556f2a171 BUG: 1232678 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/11640 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
-rw-r--r--xlators/cluster/ec/src/ec-common.c23
-rw-r--r--xlators/cluster/ec/src/ec-data.h1
-rw-r--r--xlators/cluster/ec/src/ec-heal.c22
3 files changed, 32 insertions, 14 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index af5a30fe3de..1c49eba7735 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -401,6 +401,18 @@ ec_must_wind (ec_fop_data_t *fop)
return _gf_false;
}
+static gf_boolean_t
+ec_internal_op (ec_fop_data_t *fop)
+{
+ if (ec_must_wind (fop))
+ return _gf_true;
+ if (fop->id == GF_FOP_XATTROP)
+ return _gf_true;
+ if (fop->id == GF_FOP_FXATTROP)
+ return _gf_true;
+ return _gf_false;
+}
+
int32_t ec_child_select(ec_fop_data_t * fop)
{
ec_t * ec = fop->xl->private;
@@ -413,8 +425,9 @@ int32_t ec_child_select(ec_fop_data_t * fop)
/* Wind the fop on same subvols as parent for any internal extra fops like
* head/tail read in case of writev fop. Unlocks shouldn't do this because
* unlock should go on all subvols where lock is performed*/
- if (fop->parent && !ec_must_wind (fop))
+ if (fop->parent && !ec_internal_op (fop)) {
fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ }
mask = ec->xl_up;
if (fop->parent == NULL)
@@ -972,6 +985,7 @@ out:
parent->mask &= fop->good;
/*As of now only data healing marks bricks as healing*/
+ lock->healing |= fop->healing;
if (ec_is_data_fop (parent->id)) {
parent->healing |= fop->healing;
}
@@ -996,9 +1010,13 @@ void ec_get_size_version(ec_lock_link_t *link)
lock = link->lock;
ctx = lock->ctx;
+ fop = link->fop;
/* If ec metadata has already been retrieved, do not try again. */
if (ctx->have_info) {
+ if (ec_is_data_fop (fop->id)) {
+ fop->healing |= lock->healing;
+ }
return;
}
@@ -1008,8 +1026,6 @@ void ec_get_size_version(ec_lock_link_t *link)
return;
}
- fop = link->fop;
-
uid = fop->frame->root->uid;
gid = fop->frame->root->gid;
@@ -1274,6 +1290,7 @@ int32_t ec_locked(call_frame_t *frame, void *cookie, xlator_t *this,
link = fop->data;
lock = link->lock;
lock->mask = lock->good_mask = fop->good;
+ lock->healing = 0;
ec_lock_acquired(link);
ec_lock(fop->parent);
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index 135ccdf5f53..ec470e9e8c1 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -146,6 +146,7 @@ struct _ec_lock
the next unlock/lock cycle. */
uintptr_t mask;
uintptr_t good_mask;
+ uintptr_t healing;
int32_t refs;
int32_t refs_frozen;
int32_t inserted;
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 6f82203b66c..a7c97a54ce7 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -256,10 +256,13 @@ int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
else
{
- gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "
- "to heal block at %"PRIu64,
- uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),
- heal->offset);
+ if (op_ret < 0) {
+ gf_msg_debug (fop->xl->name, 0, "%s: read failed %s, failing "
+ "to heal block at %"PRIu64,
+ uuid_utoa (heal->fd->inode->gfid), strerror (op_errno),
+ heal->offset);
+ heal->bad = 0;
+ }
heal->done = 1;
}
@@ -1709,10 +1712,7 @@ ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)
case EC_STATE_HEAL_DATA_UNLOCK:
ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0);
- if (state < 0)
- return -EC_STATE_REPORT;
- else
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
case EC_STATE_REPORT:
if (fop->cbks.heal) {
@@ -1728,7 +1728,7 @@ ec_manager_heal_block (ec_fop_data_t *fop, int32_t state)
EIO, 0, 0, 0, NULL);
}
- return -EC_STATE_END;
+ return EC_STATE_END;
default:
gf_msg (fop->xl->name, GF_LOG_ERROR, 0,
EC_MSG_UNHANDLED_STATE, "Unhandled state %d for %s",
@@ -1759,8 +1759,6 @@ ec_heal_block (call_frame_t *frame, xlator_t *this, uintptr_t target,
if (fop == NULL)
goto out;
- GF_ASSERT(ec_set_inode_size(fop, heal->fd->inode, heal->total_size));
-
error = 0;
out:
@@ -1834,6 +1832,8 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
break;
}
+ memset (healed_sinks, 0, ec->nodes);
+ ec_mask_to_char_array (heal->bad, healed_sinks, ec->nodes);
fd_unref (heal->fd);
LOCK_DESTROY (&heal->lock);
syncbarrier_destroy (heal->data);