diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2017-03-02 07:14:14 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2017-03-04 07:37:56 -0500 |
commit | 78c5c5637104cf79578d0fb9173647c9c3421177 (patch) | |
tree | 4d45320f5487f900dcc1a21b7087313162da9652 /xlators/cluster/ec/src/ec-common.c | |
parent | aaa5b2ec2f0ef1a62047c9ab91d957c7b0a1552a (diff) |
cluster/ec: Introduce optimistic changelog in EC
Problem: Fix to https://bugzilla.redhat.com/show_bug.cgi?id=1316873 has made
changes to set dirty flag before every update fop, data or metadata, and unset
it after successful operation. That makes some of the fops very slow such as
entry operations or metadata operations.
Solution: File data operations are the only operation which take some time and
setting dirty flag before a fop and unsetting it after serves the purpose as
probability of failure of a fop is high when the time duration is more. For all
the other operations, set dirty flag at the end of the fop, if any brick is
down and need heal.
Providing following option to choose between high performance or better heal
marking for metadata and entry fops.
Set/Unset dirty flag for every update fop at the start of the fop. If ON, this
option impacts performance of entry operations or metadata operations as it
will set dirty flag at the start and unset it at the end of ALL update fop. If
OFF and all the bricks are good, dirty flag will be set at the start only for
file fops For metadata and entry fops dirty flag will not be set at the start,
if all the bricks are good. This does not impact performance for metadata
operations and entry operation but has a very small window to miss marking
entry as dirty in case it is required to be healed.
Thanks to Xavi and Ashish for the design
Picked the .t file from Ashish' patch https://review.gluster.org/16298
BUG: 1408809
Change-Id: I3ce860063f0e2901e50754dcfc3e4ed22daf819f
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://review.gluster.org/16821
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
Tested-by: Xavier Hernandez <xhernandez@datalab.es>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/cluster/ec/src/ec-common.c')
-rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 49 |
1 files changed, 48 insertions, 1 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 823922542a0..3ae7f110d99 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -932,16 +932,19 @@ ec_config_check (ec_fop_data_t *fop, ec_config_t *config) } gf_boolean_t -ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty) +ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, + uint64_t *dirty) { gf_boolean_t set_dirty = _gf_false; if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) { + if (!link->optimistic_changelog) dirty[EC_DATA_TXN] = 1; } if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) { + if (!link->optimistic_changelog) dirty[EC_METADATA_TXN] = 1; } @@ -962,6 +965,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, ec_lock_link_t *link = fop->data; ec_lock_t *lock = NULL; ec_inode_t *ctx; + gf_boolean_t release = _gf_false; lock = link->lock; parent = link->fop; @@ -1055,6 +1059,26 @@ unlock: UNLOCK(&lock->loc.inode->lock); if (op_errno == 0) { + /* If the fop fails on any of the good bricks, it is important to mark + * it dirty and update versions right away if dirty was not set before. + */ + if (lock->good_mask & ~(fop->good | fop->remaining)) { + release = _gf_true; + } + + /* lock->release is a critical field that is checked and modified most + * of the time inside a locked region. This use here is safe because we + * are in a modifying fop and we currently don't allow two modifying + * fops to be processed concurrently, so no one else could be checking + * or modifying it.*/ + if (link->update[0] && !link->dirty[0]) { + lock->release |= release; + } + + if (link->update[1] && !link->dirty[1]) { + lock->release |= release; + } + /* We don't allow the main fop to be executed on bricks that have not * succeeded the initial xattrop. */ parent->mask &= fop->good; @@ -1097,6 +1121,7 @@ void ec_get_size_version(ec_lock_link_t *link) ec_inode_t *ctx; ec_fop_data_t *fop; dict_t *dict = NULL; + ec_t *ec = NULL; int32_t error = 0; gf_boolean_t getting_xattr; gf_boolean_t set_dirty = _gf_false; @@ -1105,6 +1130,17 @@ void ec_get_size_version(ec_lock_link_t *link) lock = link->lock; ctx = lock->ctx; fop = link->fop; + ec = fop->xl->private; + + if (ec->optimistic_changelog && + !(ec->node_mask & ~link->lock->good_mask) && !ec_is_data_fop (fop->id)) + link->optimistic_changelog = _gf_true; + + /* If ctx->have_info is false and lock->query is true, it means that we'll + * send the xattrop anyway, so we can use it to update dirty counts, even + * if it's not necessary to do it right now. */ + if (!ctx->have_info && lock->query) + link->optimistic_changelog = _gf_false; set_dirty = ec_set_dirty_flag (link, ctx, dirty); @@ -1714,6 +1750,13 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk, if (link->update[1]) { ctx->post_version[1]++; } + /* If the fop fails on any of the good bricks, it is important to mark + * it dirty and update versions right away. */ + if (link->update[0] || link->update[1]) { + if (lock->good_mask & ~(fop->good | fop->remaining)) { + lock->release = _gf_true; + } + } } ec_lock_update_good(lock, fop); @@ -2028,9 +2071,13 @@ ec_update_info(ec_lock_link_t *link) if (ctx->dirty[1] != 0) { dirty[1] = -1; } + } else { + link->optimistic_changelog = _gf_false; + ec_set_dirty_flag (link, ctx, dirty); } memset(ctx->dirty, 0, sizeof(ctx->dirty)); } + if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) || (dirty[1] != 0)) { ec_update_size_version(link, version, size, dirty); |