summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-heal.c
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2015-01-07 12:29:48 +0100
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-05-09 17:29:46 -0700
commitb46e65db722c14985db62a1679e0388d217b713b (patch)
treefac847af8f529505bf364ed54df25658552fbae3 /xlators/cluster/ec/src/ec-heal.c
parent73b61d709712692b48a1c9b18b4547410fb9078c (diff)
ec: Fix failures with missing files
When a file does not exist on a brick but it does on others, there could be problems trying to access it because there was some loc_t structures with null 'pargfid' but 'name' was set. This forced inode resolution based on <pargfid>/name instead of <gfid> which would be the correct one. To solve this problem, 'name' is always set to NULL when 'pargfid' is not present. Another problem was caused by an incorrect management of errors while doing incremental locking. The only allowed error during an incremental locking was ENOTCONN, but missing files on a brick can be returned as ESTALE. This caused an EIO on the operation. This patch doesn't care of errors during an incremental locking. At the end of the operation it will check if there are enough successfully locked bricks to continue or not. Change-Id: I9360ebf8d819d219cea2d173c09bd37679a6f15a BUG: 1176062 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/9407 Tested-by: NetBSD Build System Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-heal.c')
-rw-r--r--xlators/cluster/ec/src/ec-heal.c64
1 files changed, 32 insertions, 32 deletions
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 315de8765ad..ceddfeb6ac7 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -486,16 +486,6 @@ ec_heal_init (ec_fop_data_t * fop)
ec_heal_t * heal = NULL;
int32_t error = 0;
- inode = fop->loc[0].inode;
- if (inode == NULL)
- {
- gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to start inode healing "
- "because there is not enough "
- "information");
-
- return ENODATA;
- }
-
heal = GF_MALLOC(sizeof(ec_heal_t), ec_mt_ec_heal_t);
if (heal == NULL)
{
@@ -509,6 +499,16 @@ ec_heal_init (ec_fop_data_t * fop)
goto out;
}
+ inode = heal->loc.inode;
+ if (inode == NULL) {
+ gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to start inode healing "
+ "because there is not enough "
+ "information");
+
+ error = ENODATA;
+ goto out;
+ }
+
LOCK_INIT(&heal->lock);
heal->xl = fop->xl;
@@ -532,26 +532,20 @@ ec_heal_init (ec_fop_data_t * fop)
gf_log("ec", GF_LOG_INFO, "Healing '%s', gfid %s", heal->loc.path,
uuid_utoa(heal->loc.gfid));
} else {
- error = EEXIST;
- }
-
- list_add_tail(&heal->list, &ctx->heal);
- heal = NULL;
-
-unlock:
- UNLOCK(&inode->lock);
-
- if (error == EEXIST) {
LOCK(&fop->lock);
fop->jobs++;
fop->refs++;
UNLOCK(&fop->lock);
-
- error = 0;
}
+ list_add_tail(&heal->list, &ctx->heal);
+ heal = NULL;
+
+unlock:
+ UNLOCK(&inode->lock);
+
out:
GF_FREE(heal);
@@ -563,6 +557,7 @@ void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd)
loc_t loc;
if (ec_loc_parent(heal->xl, &heal->loc, &loc) != 0) {
+ gf_log("ec", GF_LOG_NOTICE, "ec_loc_parent() failed");
ec_fop_set_error(heal->fop, EIO);
return;
@@ -1164,10 +1159,11 @@ void ec_heal_dispatch(ec_heal_t *heal)
LOCK(&inode->lock);
- /* A heal object not belonging to any list means that it has not been fully
- * executed. It got its information from a previous heal that was executing
- * when this heal started. */
- if (!list_empty(&heal->list)) {
+ /* done == 0 means that self-heal is still running (it shouldn't happen)
+ * done == 1 means that self-heal has just completed
+ * done == 2 means that self-heal has completed and reported */
+ if (heal->done == 1) {
+ heal->done = 2;
list_del_init(&heal->list);
ctx = __ec_inode_get(inode, heal->xl);
if (ctx != NULL) {
@@ -1182,6 +1178,11 @@ void ec_heal_dispatch(ec_heal_t *heal)
if (!next->partial) {
break;
}
+
+ /* Setting 'done' to 2 avoids executing all heal logic and
+ * directly reports the result to the caller. */
+ next->done = 2;
+
list_move_tail(&next->list, &list);
}
if (list_empty(&ctx->heal)) {
@@ -1241,10 +1242,6 @@ void ec_heal_dispatch(ec_heal_t *heal)
heal->good = cbk->uintptr[1];
heal->fixed = cbk->uintptr[2];
- /* Setting 'done' to 1 avoids executing all heal logic and directly
- * reports the result to the caller. */
- heal->done = 1;
-
ec_resume(heal->fop, error);
}
@@ -1304,11 +1301,14 @@ ec_manager_heal (ec_fop_data_t * fop, int32_t state)
}
case EC_STATE_DISPATCH:
- if (heal->done) {
+ if (heal->done != 0) {
+ gf_log("ec", GF_LOG_NOTICE, "heal already done");
return EC_STATE_HEAL_DISPATCH;
}
+ gf_log("ec", GF_LOG_NOTICE, "heal before entrylk");
ec_heal_entrylk(heal, ENTRYLK_LOCK);
+ gf_log("ec", GF_LOG_NOTICE, "heal after entrylk");
return EC_STATE_HEAL_ENTRY_LOOKUP;
@@ -1403,7 +1403,7 @@ ec_manager_heal (ec_fop_data_t * fop, int32_t state)
return EC_STATE_HEAL_DISPATCH;
case EC_STATE_HEAL_DATA_LOCK:
- if (heal->done)
+ if (heal->done != 0)
{
return EC_STATE_HEAL_POST_INODELK_LOCK;
}