diff options
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 43 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-common.h | 8 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 104 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.c | 1 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-types.h | 1 |
5 files changed, 127 insertions, 30 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 8d656702e12..518368073e8 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -313,14 +313,15 @@ ec_check_status(ec_fop_data_t *fop) gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " - "remaining=%s, good=%s, bad=%s)", + "remaining=%s, good=%s, bad=%s, %s)", gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), ec_bin(str4, sizeof(str4), fop->good, ec->nodes), ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), - ec->nodes)); + ec->nodes), + ec_msg_str(fop)); if (fop->use_fd) { if (fop->fd != NULL) { ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, @@ -2371,37 +2372,47 @@ ec_update_info(ec_lock_link_t *link) uint64_t dirty[2] = {0, 0}; uint64_t size; ec_t *ec = NULL; + uintptr_t mask; lock = link->lock; ctx = lock->ctx; ec = link->fop->xl->private; /* pre_version[*] will be 0 if have_version is false */ - version[0] = ctx->post_version[0] - ctx->pre_version[0]; - version[1] = ctx->post_version[1] - ctx->pre_version[1]; + version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] - + ctx->pre_version[EC_DATA_TXN]; + version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] - + ctx->pre_version[EC_METADATA_TXN]; size = ctx->post_size - ctx->pre_size; /* If we set the dirty flag for update fop, we have to unset it. * If fop has failed on some bricks, leave the dirty as marked. */ + if (lock->unlock_now) { + if (version[EC_DATA_TXN]) { + /*A data fop will have difference in post and pre version + *and for data fop we send writes on healing bricks also */ + mask = lock->good_mask | lock->healing; + } else { + mask = lock->good_mask; + } /* Ensure that nodes are up while doing final * metadata update.*/ - if (!(ec->node_mask & ~lock->good_mask) && - !(ec->node_mask & ~ec->xl_up)) { - if (ctx->dirty[0] != 0) { - dirty[0] = -1; + if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) { + if (ctx->dirty[EC_DATA_TXN] != 0) { + dirty[EC_DATA_TXN] = -1; } - if (ctx->dirty[1] != 0) { - dirty[1] = -1; + if (ctx->dirty[EC_METADATA_TXN] != 0) { + dirty[EC_METADATA_TXN] = -1; } /*If everything is fine and we already *have version xattr set on entry, there *is no need to update version again*/ - if (ctx->pre_version[0]) { - version[0] = 0; + if (ctx->pre_version[EC_DATA_TXN]) { + version[EC_DATA_TXN] = 0; } - if (ctx->pre_version[1]) { - version[1] = 0; + if (ctx->pre_version[EC_METADATA_TXN]) { + version[EC_METADATA_TXN] = 0; } } else { link->optimistic_changelog = _gf_false; @@ -2410,8 +2421,8 @@ ec_update_info(ec_lock_link_t *link) memset(ctx->dirty, 0, sizeof(ctx->dirty)); } - if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) || - (dirty[1] != 0)) { + if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) || + (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) { ec_update_size_version(link, version, size, dirty); return _gf_true; } diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 115e1475b50..54aaa779443 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -190,4 +190,12 @@ ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, void ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index, int32_t ret_status); +gf_boolean_t +ec_is_entry_healing(ec_fop_data_t *fop); +void +ec_set_entry_healing(ec_fop_data_t *fop); +void +ec_reset_entry_healing(ec_fop_data_t *fop); +char * +ec_msg_str(ec_fop_data_t *fop); #endif /* __EC_COMMON_H__ */ diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 3928a898105..9321a0088a6 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -103,6 +103,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata) } /* FOP: heal */ +void +ec_set_entry_healing(ec_fop_data_t *fop) +{ + ec_inode_t *ctx = NULL; + loc_t *loc = NULL; + + if (!fop) + return; + + loc = &fop->loc[0]; + LOCK(&loc->inode->lock); + { + ctx = __ec_inode_get(loc->inode, fop->xl); + if (ctx) { + ctx->heal_count += 1; + } + } + UNLOCK(&loc->inode->lock); +} + +void +ec_reset_entry_healing(ec_fop_data_t *fop) +{ + ec_inode_t *ctx = NULL; + loc_t *loc = NULL; + int32_t heal_count = 0; + if (!fop) + return; + + loc = &fop->loc[0]; + LOCK(&loc->inode->lock); + { + ctx = __ec_inode_get(loc->inode, fop->xl); + if (ctx) { + ctx->heal_count += -1; + heal_count = ctx->heal_count; + } + } + UNLOCK(&loc->inode->lock); + GF_ASSERT(heal_count >= 0); +} + uintptr_t ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood) { @@ -2509,17 +2551,6 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) goto out; } } - - msources = alloca0(ec->nodes); - mhealed_sinks = alloca0(ec->nodes); - ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); - if (ret == 0) { - mgood = ec_char_array_to_mask(msources, ec->nodes); - mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); - } else { - op_ret = -1; - op_errno = -ret; - } sources = alloca0(ec->nodes); healed_sinks = alloca0(ec->nodes); if (IA_ISREG(loc->inode->ia_type)) { @@ -2540,8 +2571,19 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) op_ret = -1; op_errno = -ret; } + msources = alloca0(ec->nodes); + mhealed_sinks = alloca0(ec->nodes); + ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); + if (ret == 0) { + mgood = ec_char_array_to_mask(msources, ec->nodes); + mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); + } else { + op_ret = -1; + op_errno = -ret; + } out: + ec_reset_entry_healing(fop); if (fop->cbks.heal) { fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, ec_char_array_to_mask(participants, ec->nodes), @@ -2652,11 +2694,33 @@ ec_handle_healers_done(ec_fop_data_t *fop) ec_launch_heal(ec, heal_fop); } +gf_boolean_t +ec_is_entry_healing(ec_fop_data_t *fop) +{ + ec_inode_t *ctx = NULL; + int32_t heal_count = 0; + loc_t *loc = NULL; + + loc = &fop->loc[0]; + + LOCK(&loc->inode->lock); + { + ctx = __ec_inode_get(loc->inode, fop->xl); + if (ctx) { + heal_count = ctx->heal_count; + } + } + UNLOCK(&loc->inode->lock); + GF_ASSERT(heal_count >= 0); + return heal_count; +} + void ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) { gf_boolean_t can_heal = _gf_true; ec_t *ec = this->private; + ec_fop_data_t *fop_rel = NULL; if (fop->req_frame == NULL) { LOCK(&ec->lock); @@ -2664,8 +2728,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) if ((ec->background_heals > 0) && (ec->heal_wait_qlen + ec->background_heals) > (ec->heal_waiters + ec->healers)) { - list_add_tail(&fop->healer, &ec->heal_waiting); - ec->heal_waiters++; + if (!ec_is_entry_healing(fop)) { + list_add_tail(&fop->healer, &ec->heal_waiting); + ec->heal_waiters++; + ec_set_entry_healing(fop); + } else { + fop_rel = fop; + } fop = __ec_dequeue_heals(ec); } else { can_heal = _gf_false; @@ -2675,8 +2744,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) } if (can_heal) { - if (fop) + if (fop) { + if (fop->req_frame != NULL) { + ec_set_entry_healing(fop); + } ec_launch_heal(ec, fop); + } } else { gf_msg_debug(this->name, 0, "Max number of heals are " @@ -2684,6 +2757,9 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) ec_fop_set_error(fop, EBUSY); ec_heal_fail(ec, fop); } + if (fop_rel) { + ec_heal_done(0, NULL, fop_rel); + } } void diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index e6b0359bd6f..43f6e3b69d2 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -717,6 +717,7 @@ __ec_inode_get(inode_t *inode, xlator_t *xl) memset(ctx, 0, sizeof(*ctx)); INIT_LIST_HEAD(&ctx->heal); INIT_LIST_HEAD(&ctx->stripe_cache.lru); + ctx->heal_count = 0; value = (uint64_t)(uintptr_t)ctx; if (__inode_ctx_set(inode, xl, &value) != 0) { GF_FREE(ctx); diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index f3d63ca09ce..6ae4a2b970e 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -171,6 +171,7 @@ struct _ec_inode { gf_boolean_t have_config; gf_boolean_t have_version; gf_boolean_t have_size; + int32_t heal_count; ec_config_t config; uint64_t pre_version[2]; uint64_t post_version[2]; |