diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2017-05-24 22:30:29 +0530 |
---|---|---|
committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2017-05-29 14:12:20 +0000 |
commit | 1db7887771c748a63f3c46ce72918c98cb6dc208 (patch) | |
tree | 2d61937442f3f34b6e5dbd0f8b6eb62628df3214 /xlators | |
parent | 5b3e5f1c54fae28c0c89c1b8f769a5777e970956 (diff) |
features/shard: Handle offset in appending writes
When a file is opened with append, all writes are appended at the end of file
irrespective of the offset given in the write syscall. This needs to be
considered in shard size update function and also for choosing which shard to
write to.
At the moment shard piggybacks on queuing from write-behind
xlator for ordering of the operations. So if write-behind is disabled and
two parallel appending-writes come both of which can increase the file size
beyond shard-size the file will be corrupted.
>BUG: 1455301
>Change-Id: I9007e6a39098ab0b5d5386367bd07eb5f89cb09e
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
>Reviewed-on: https://review.gluster.org/17387
>Smoke: Gluster Build System <jenkins@build.gluster.org>
>Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
BUG: 1456225
Change-Id: I9007e6a39098ab0b5d5386367bd07eb5f89cb09e
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://review.gluster.org/17404
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/features/shard/src/shard.c | 109 |
1 files changed, 67 insertions, 42 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index a48806c6a8b..65cf23c7bdd 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -3629,6 +3629,18 @@ shard_common_inode_write_post_update_size_handler (call_frame_t *frame, return 0; } +static gf_boolean_t +shard_is_appending_write (shard_local_t *local) +{ + if (local->fop != GF_FOP_WRITE) + return _gf_false; + if (local->flags & O_APPEND) + return _gf_true; + if (local->fd->flags & O_APPEND) + return _gf_true; + return _gf_false; +} + int __shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode, xlator_t *this) @@ -3643,13 +3655,15 @@ __shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode, ctx = (shard_inode_ctx_t *) ctx_uint; - if (local->offset + local->total_size > ctx->stat.ia_size) { + if (shard_is_appending_write (local)) { + local->delta_size = local->total_size; + } else if (local->offset + local->total_size > ctx->stat.ia_size) { local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size; - ctx->stat.ia_size += (local->delta_size); } else { local->delta_size = 0; } + ctx->stat.ia_size += (local->delta_size); local->postbuf = ctx->stat; return 0; @@ -3955,8 +3969,11 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame, } int -shard_common_inode_write_post_lookup_handler (call_frame_t *frame, - xlator_t *this) +shard_mkdir_dot_shard (call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler); +int +shard_common_inode_write_post_resolve_handler (call_frame_t *frame, + xlator_t *this) { shard_local_t *local = NULL; @@ -3969,8 +3986,6 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, return 0; } - local->postbuf = local->prebuf; - if (local->call_count) { shard_common_lookup_shards (frame, this, local->resolver_base_inode, @@ -3983,12 +3998,11 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, } int -shard_common_inode_write_post_resolve_handler (call_frame_t *frame, - xlator_t *this) +shard_common_inode_write_post_lookup_handler (call_frame_t *frame, + xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; + shard_local_t *local = frame->local; + shard_priv_t *priv = this->private; if (local->op_ret < 0) { shard_common_inode_write_failure_unwind (local->fop, frame, @@ -3997,8 +4011,46 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame, return 0; } - shard_lookup_base_file (frame, this, &local->loc, - shard_common_inode_write_post_lookup_handler); + local->postbuf = local->prebuf; + + /*Adjust offset to EOF so that correct shard is chosen for append*/ + if (shard_is_appending_write (local)) + local->offset = local->prebuf.ia_size; + + local->first_block = get_lowest_block (local->offset, + local->block_size); + local->last_block = get_highest_block (local->offset, local->total_size, + local->block_size); + local->num_blocks = local->last_block - local->first_block + 1; + local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) { + shard_common_inode_write_failure_unwind (local->fop, frame, + -1, ENOMEM); + return 0; + } + + gf_msg_trace (this->name, 0, "%s: gfid=%s first_block=%"PRIu32" " + "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64 + " total_size=%zu flags=%"PRId32"", + gf_fop_list[local->fop], + uuid_utoa (local->resolver_base_inode->gfid), + local->first_block, local->last_block, local->num_blocks, + local->offset, local->total_size, local->flags); + + local->dot_shard_loc.inode = inode_find (this->itable, + priv->dot_shard_gfid); + + if (!local->dot_shard_loc.inode) { + /*change handler*/ + shard_mkdir_dot_shard (frame, this, + shard_common_inode_write_post_resolve_handler); + } else { + /*change handler*/ + local->post_res_handler = + shard_common_inode_write_post_resolve_handler; + shard_refresh_dot_shard (frame, this); + } return 0; } @@ -4697,9 +4749,6 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this, int i = 0; uint64_t block_size = 0; shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); if (ret) { @@ -4775,37 +4824,13 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this, local->iobref = iobref_ref (iobref); local->fd = fd_ref (fd); local->block_size = block_size; - local->first_block = get_lowest_block (offset, local->block_size); - local->last_block = get_highest_block (offset, local->total_size, - local->block_size); - local->num_blocks = local->last_block - local->first_block + 1; local->resolver_base_inode = local->fd->inode; - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto out; local->loc.inode = inode_ref (fd->inode); gf_uuid_copy (local->loc.gfid, fd->inode->gfid); - gf_msg_trace (this->name, 0, "%s: gfid=%s first_block=%"PRIu32" " - "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64"" - " total_size=%zu flags=%"PRId32"", gf_fop_list[fop], - uuid_utoa (fd->inode->gfid), local->first_block, - local->last_block, local->num_blocks, offset, - local->total_size, local->flags); - - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - - if (!local->dot_shard_loc.inode) { - shard_mkdir_dot_shard (frame, this, - shard_common_inode_write_post_resolve_handler); - } else { - local->post_res_handler = shard_common_inode_write_post_resolve_handler; - shard_refresh_dot_shard (frame, this); - } - + shard_lookup_base_file (frame, this, &local->loc, + shard_common_inode_write_post_lookup_handler); return 0; out: shard_common_inode_write_failure_unwind (fop, frame, -1, ENOMEM); |