summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-heal.c
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2014-10-08 09:20:11 +0200
committerVijay Bellur <vbellur@redhat.com>2014-10-22 01:05:19 -0700
commit4522acc20bdd1ca17c053969ef7edce1bb6ede76 (patch)
tree5eaecfac3d913a0662de21f13344a2e037846621 /xlators/cluster/ec/src/ec-heal.c
parentd01b00ae2b124dfdd6905e463533a715f1cedc5b (diff)
ec: Fix self-heal issues
Problem: Doing an 'ls' of a directory that has been modified while one of the bricks was down, sometimes returns the old directory contents. Cause: Directories are not marked when they are modified as files are. The ec xlator balances requests amongst available and healthy bricks. Since there is no way to detect that a directory is out of date in one of the bricks, it is used from time to time to return the directory contents. Solution: Basically the solution consists in use versioning information also for directories, however some additional changes have been necessary. Changes: * Use directory versioning: This required to lock full directory instead of a single entry for all requests that add or remove entries from it. This is needed to allow atomic version update. This affects the following fops: create, mkdir, mknod, link, symlink, rename, unlink, rmdir Another side effect is that opendir requires to do a previous lookup to get versioning information and discard out of date bricks for subsequent readdir(p) calls. * Restrict directory self-heal: Till now, when one discrepancy was found in lookup, a self-heal was automatically started. This caused the versioning information of a bad directory to be healed instantly, making the original problem to reapear again. To solve this, when a missing directory is detected in one or more bricks on lookup or opendir fops, only a partial self-heal is performed on it. A partial self-heal basically creates the directory but does not restore any additional information. This avoids that an 'ls' could repair the directory and cause the problem to happen again. With this change, output of 'ls' is always consistent. However, since the directory has been created in the brick, this allows any other operation on it (create new files, for example) to succeed on all bricks and not add additional work to the self-heal process. To force a self-heal of a directory, any other operation must be done on it. For example a getxattr. With these changes, the correct healing procedure that would avoid inconsistent directory browsing consists on a post-order traversal of directoriesi being healed. This way, the directory contents will be healed before healing the directory itslef. * Additional changes to fix self-heal errors - Don't use fop->fd to decide between fd/loc. open, opendir and create have an fd, but the correct data is in loc. - Fix incorrect management of bad bricks per inode/fd. - Fix incorrect selection of fop's target bricks when there are bad bricks involved. - Improved ec_loc_parent() to always return a parent loc as complete as possible. This is a backport of http://review.gluster.org/8916/ Change-Id: Iaf3df174d7857da57d4a87b4a8740a7048b366ad BUG: 1149727 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/8946 Reviewed-by: Dan Lambright <dlambrig@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-heal.c')
-rw-r--r--xlators/cluster/ec/src/ec-heal.c48
1 files changed, 31 insertions, 17 deletions
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index dc11e6d2e87..a208330e68c 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -310,8 +310,7 @@ int32_t ec_heal_reopen_cbk(call_frame_t * frame, void * cookie,
LOCK(&fd->lock);
ctx = __ec_fd_get(fd, fop->xl);
- if ((ctx != NULL) && (ctx->loc.inode != NULL))
- {
+ if (ctx != NULL) {
ctx->bad &= ~good;
ctx->open |= good;
}
@@ -482,6 +481,7 @@ int32_t ec_heal_init(ec_fop_data_t * fop)
heal->fop = fop;
pool = fop->xl->ctx->iobuf_pool;
heal->size = iobpool_default_pagesize(pool) * ec->fragments;
+ heal->partial = fop->int32;
LOCK(&inode->lock);
@@ -516,10 +516,9 @@ out:
void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd)
{
loc_t loc;
- char * name;
int32_t error;
- error = ec_loc_parent(heal->xl, &heal->loc, &loc, &name);
+ error = ec_loc_parent(heal->xl, &heal->loc, &loc);
if (error != 0)
{
ec_fop_set_error(heal->fop, error);
@@ -528,10 +527,9 @@ void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd)
}
ec_entrylk(heal->fop->frame, heal->xl, -1, EC_MINIMUM_ALL, NULL, NULL,
- heal->xl->name, &loc, name, cmd, ENTRYLK_WRLCK, NULL);
+ heal->xl->name, &loc, NULL, cmd, ENTRYLK_WRLCK, NULL);
loc_wipe(&loc);
- GF_FREE(name);
}
void ec_heal_inodelk(ec_heal_t * heal, int32_t type, int32_t use_fd,
@@ -970,7 +968,8 @@ void ec_heal_reopen_fd(ec_heal_t * heal)
{
inode_t * inode;
fd_t * fd;
- ec_fd_t * ctx;
+ ec_fd_t *ctx_fd;
+ ec_inode_t *ctx_inode;
uintptr_t mask;
int32_t flags;
@@ -978,12 +977,16 @@ void ec_heal_reopen_fd(ec_heal_t * heal)
LOCK(&inode->lock);
+ ctx_inode = __ec_inode_get(inode, heal->xl);
+ if (ctx_inode != NULL) {
+ ctx_inode->bad &= ~(heal->good | heal->bad);
+ }
+
list_for_each_entry(fd, &inode->fd_list, inode_list)
{
- ctx = ec_fd_get(fd, heal->xl);
- if ((ctx != NULL) && (ctx->loc.inode != NULL))
- {
- mask = heal->bad & ~ctx->open;
+ ctx_fd = ec_fd_get(fd, heal->xl);
+ if (ctx_fd != NULL) {
+ mask = heal->bad & ~ctx_fd->open;
if (mask != 0)
{
UNLOCK(&inode->lock);
@@ -996,7 +999,7 @@ void ec_heal_reopen_fd(ec_heal_t * heal)
}
else
{
- flags = ctx->flags & ~O_TRUNC;
+ flags = ctx_fd->flags & ~O_TRUNC;
if ((flags & O_ACCMODE) == O_WRONLY)
{
flags &= ~O_ACCMODE;
@@ -1179,7 +1182,13 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state)
return EC_STATE_HEAL_ENTRY_PREPARE;
case EC_STATE_HEAL_ENTRY_PREPARE:
- ec_heal_prepare(heal);
+ if (!heal->partial || (heal->iatt.ia_type == IA_IFDIR)) {
+ ec_heal_prepare(heal);
+ }
+
+ if (heal->partial) {
+ return EC_STATE_HEAL_UNLOCK_ENTRY;
+ }
return EC_STATE_HEAL_PRE_INODELK_LOCK;
@@ -1240,6 +1249,8 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_HEAL_ENTRY_PREPARE:
case -EC_STATE_HEAL_PRE_INODELK_LOCK:
case -EC_STATE_HEAL_PRE_INODE_LOOKUP:
+ case -EC_STATE_HEAL_UNLOCK_ENTRY:
+ case EC_STATE_HEAL_UNLOCK_ENTRY:
ec_heal_entrylk(heal, ENTRYLK_UNLOCK);
if (ec_heal_needs_data_rebuild(heal))
@@ -1395,7 +1406,7 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state)
void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_heal_cbk_t func, void * data, loc_t * loc,
- dict_t * xdata)
+ int32_t partial, dict_t *xdata)
{
ec_cbk_t callback = { .heal = func };
ec_fop_data_t * fop = NULL;
@@ -1415,6 +1426,8 @@ void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
+ fop->int32 = partial;
+
if (loc != NULL)
{
if (loc_copy(&fop->loc[0], loc) != 0)
@@ -1474,14 +1487,15 @@ void ec_wind_fheal(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fheal_cbk_t func, void * data, fd_t * fd,
- dict_t * xdata)
+ int32_t partial, dict_t *xdata)
{
ec_fd_t * ctx = ec_fd_get(fd, this);
- if ((ctx != NULL) && (ctx->loc.inode != NULL))
+ if (ctx != NULL)
{
gf_log("ec", GF_LOG_DEBUG, "FHEAL ctx: flags=%X, open=%lX, bad=%lX",
ctx->flags, ctx->open, ctx->bad);
- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, xdata);
+ ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
+ xdata);
}
}