summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-common.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-common.c')
-rw-r--r--xlators/cluster/ec/src/ec-common.c115
1 files changed, 115 insertions, 0 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index ea5773c9879..29ab66f374c 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -1437,6 +1437,23 @@ ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
return found;
}
+static void
+ec_release_stripe_cache (ec_inode_t *ctx)
+{
+ ec_stripe_list_t *stripe_cache = NULL;
+ ec_stripe_t *stripe = NULL;
+
+ stripe_cache = &ctx->stripe_cache;
+ while (!list_empty (&stripe_cache->lru)) {
+ stripe = list_first_entry (&stripe_cache->lru, ec_stripe_t,
+ lru);
+ list_del (&stripe->lru);
+ GF_FREE (stripe);
+ }
+ stripe_cache->count = 0;
+ stripe_cache->max = 0;
+}
+
void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode)
{
ec_inode_t *ctx;
@@ -1448,6 +1465,7 @@ void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode)
goto unlock;
}
+ ec_release_stripe_cache (ctx);
ctx->have_info = _gf_false;
ctx->have_config = _gf_false;
ctx->have_version = _gf_false;
@@ -2465,6 +2483,102 @@ ec_use_eager_lock(ec_t *ec, ec_fop_data_t *fop)
return ec->other_eager_lock;
}
+static void
+ec_update_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache, ec_stripe_t *stripe,
+ ec_fop_data_t *fop)
+{
+ off_t base;
+
+ /* On write fops, we only update existing fragments if the write has
+ * succeeded. Otherwise, we remove them from the cache. */
+ if ((fop->id == GF_FOP_WRITE) && (fop->answer != NULL) &&
+ (fop->answer->op_ret >= 0)) {
+ base = stripe->frag_offset - fop->frag_range.first;
+ base *= ec->fragments;
+
+ /* We check if the stripe offset falls inside the real region
+ * modified by the write fop (a write request is allowed,
+ * though uncommon, to write less bytes than requested). The
+ * current write fop implementation doesn't allow partial
+ * writes of fragments, so if there's no error, we are sure
+ * that a full stripe has been completely modified or not
+ * touched at all. The value of op_ret may not be a multiple
+ * of the stripe size because it depends on the requested
+ * size by the user, so we update the stripe if the write has
+ * modified at least one byte (meaning ec has written the full
+ * stripe). */
+ if (base < fop->answer->op_ret) {
+ memcpy(stripe->data, fop->vector[0].iov_base + base,
+ ec->stripe_size);
+ list_move_tail(&stripe->lru, &stripe_cache->lru);
+
+ GF_ATOMIC_INC(ec->stats.stripe_cache.updates);
+ }
+ } else {
+ stripe->frag_offset = -1;
+ list_move (&stripe->lru, &stripe_cache->lru);
+
+ GF_ATOMIC_INC(ec->stats.stripe_cache.invals);
+ }
+}
+
+static void
+ec_update_cached_stripes (ec_fop_data_t *fop)
+{
+ uint64_t first;
+ uint64_t last;
+ ec_stripe_t *stripe = NULL;
+ ec_inode_t *ctx = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+ inode_t *inode = NULL;
+ struct list_head *temp;
+ struct list_head sentinel;
+
+ first = fop->frag_range.first;
+ /* 'last' represents the first stripe not touched by the operation */
+ last = fop->frag_range.last;
+
+ /* If there are no modified stripes, we don't need to do anything
+ * else. */
+ if (last <= first) {
+ return;
+ }
+
+ if (!fop->use_fd) {
+ inode = fop->loc[0].inode;
+ } else {
+ inode = fop->fd->inode;
+ }
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get (inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+ stripe_cache = &ctx->stripe_cache;
+
+ /* Since we'll be moving elements of the list to the tail, we might
+ * end in an infinite loop. To avoid it, we insert a sentinel element
+ * into the list, so that it will be used to detect when we have
+ * traversed all existing elements once. */
+ list_add_tail(&sentinel, &stripe_cache->lru);
+ temp = stripe_cache->lru.next;
+ while (temp != &sentinel) {
+ stripe = list_entry(temp, ec_stripe_t, lru);
+ temp = temp->next;
+ if ((first <= stripe->frag_offset) &&
+ (stripe->frag_offset < last)) {
+ ec_update_stripe (fop->xl->private, stripe_cache,
+ stripe, fop);
+ }
+ }
+ list_del(&sentinel);
+
+out:
+ UNLOCK(&inode->lock);
+}
+
void ec_lock_reuse(ec_fop_data_t *fop)
{
ec_cbk_data_t *cbk;
@@ -2491,6 +2605,7 @@ void ec_lock_reuse(ec_fop_data_t *fop)
* the lock. */
release = _gf_true;
}
+ ec_update_cached_stripes (fop);
for (i = 0; i < fop->lock_count; i++) {
ec_lock_next_owner(&fop->locks[i], cbk, release);