diff options
| author | Ashish Pandey <aspandey@redhat.com> | 2017-09-18 14:07:31 +0530 |
|---|---|---|
| committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-11-10 22:15:37 +0000 |
| commit | a87abbd42e8b02deabbdfe290b16ed0d2f2e4c45 (patch) | |
| tree | ac36ba79b6e7a104cc49a9413a330d8499aee647 /xlators/cluster/ec/src/ec.c | |
| parent | 83558c69736878d2554ba77af3a6e27574da9447 (diff) | |
cluster/ec: Keep last written strip in in-memory cache
Problem:
Consider an EC volume with configuration 4 + 2.
The stripe size for this would be 512 * 4 = 2048.
That means, 2048 bytes of user data stored in one
stripe. Let's say 2048 + 512 = 2560 bytes are
already written on this volume. 512 Bytes would
be in second stripe. Now, if there are sequential
writes with offset 2560 and of size 1 Byte, we have
to read the whole stripe, encode it with 1 Byte and
then again have to write it back. Next, write with
offset 2561 and size of 1 Byte will again
READ-MODIFY-WRITE the whole stripe. This is causing
bad performance because of lots of READ request
travelling over the network.
There are some tools and scenario's where such kind
of load is coming and users are not aware of that.
Example: fio and zip
Solution:
One possible solution to deal with this issue is to
keep last stripe in memory. This way, we need not to
read it again and we can save READ fop going over the
network. Considering the above example, we have to
keep last 2048 bytes (maximum) in memory per file.
Change-Id: I3f95e6fc3ff81953646d374c445a40c6886b0b85
BUG: 1471753
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec.c')
| -rw-r--r-- | xlators/cluster/ec/src/ec.c | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 9f361a54aa3..275dd15a302 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -299,6 +299,8 @@ reconfigure (xlator_t *this, dict_t *options) options, bool, failed); GF_OPTION_RECONF ("parallel-writes", ec->parallel_writes, options, bool, failed); + GF_OPTION_RECONF ("stripe-cache", ec->stripe_cache, options, uint32, + failed); ret = 0; if (ec_assign_read_policy (ec, read_policy)) { ret = -1; @@ -581,6 +583,18 @@ notify (xlator_t *this, int32_t event, void *data, ...) return ret; } +static void +ec_statistics_init(ec_t *ec) +{ + GF_ATOMIC_INIT(ec->stats.stripe_cache.hits, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.misses, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.updates, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.invals, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0); + GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0); +} + int32_t init (xlator_t *this) { @@ -671,6 +685,7 @@ init (xlator_t *this) GF_OPTION_INIT ("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed); GF_OPTION_INIT ("optimistic-change-log", ec->optimistic_changelog, bool, failed); GF_OPTION_INIT ("parallel-writes", ec->parallel_writes, bool, failed); + GF_OPTION_INIT ("stripe-cache", ec->stripe_cache, uint32, failed); this->itable = inode_table_new (EC_SHD_INODE_LRU_LIMIT, this); if (!this->itable) @@ -697,6 +712,8 @@ init (xlator_t *this) goto failed; } + ec_statistics_init(ec); + return 0; failed: @@ -1252,6 +1269,9 @@ int32_t ec_gf_forget(xlator_t * this, inode_t * inode) if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) { ctx = (ec_inode_t *)(uintptr_t)value; + /* We can only forget an inode if it has been unlocked, so the stripe + * cache should also be empty. */ + GF_ASSERT(list_empty(&ctx->stripe_cache.lru)); GF_FREE(ctx); } @@ -1313,6 +1333,25 @@ int32_t ec_dump_private(xlator_t *this) gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters); gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache", + this->type, this->name); + gf_proc_dump_add_section(key_prefix); + + gf_proc_dump_write("hits", "%llu", + GF_ATOMIC_GET(ec->stats.stripe_cache.hits)); + gf_proc_dump_write("misses", "%llu", + GF_ATOMIC_GET(ec->stats.stripe_cache.misses)); + gf_proc_dump_write("updates", "%llu", + GF_ATOMIC_GET(ec->stats.stripe_cache.updates)); + gf_proc_dump_write("invalidations", "%llu", + GF_ATOMIC_GET(ec->stats.stripe_cache.invals)); + gf_proc_dump_write("evicts", "%llu", + GF_ATOMIC_GET(ec->stats.stripe_cache.evicts)); + gf_proc_dump_write("allocations", "%llu", + GF_ATOMIC_GET(ec->stats.stripe_cache.allocs)); + gf_proc_dump_write("errors", "%llu", + GF_ATOMIC_GET(ec->stats.stripe_cache.errors)); + return 0; } @@ -1512,5 +1551,18 @@ struct volume_options options[] = .description = "This controls if writes can be wound in parallel as long" "as it doesn't modify same stripes" }, + { .key = {"stripe-cache"}, + .type = GF_OPTION_TYPE_INT, + .min = 0,/*Disabling stripe_cache*/ + .max = EC_STRIPE_CACHE_MAX_SIZE, + .default_value = "0", + .description = "This option will keep the last stripe of write fop" + "in memory. If next write falls in this stripe, we need" + "not to read it again from backend and we can save READ" + "fop going over the network. This will improve performance," + "specially for sequential writes. However, this will also" + "lead to extra memory consumption, maximum " + "(cache size * stripe size) Bytes per open file." + }, { .key = {NULL} } }; |
