summaryrefslogtreecommitdiffstats
path: root/xlators/features/shard
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2015-09-07 14:57:24 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-09-27 23:06:57 -0700
commit8e81a818010aa53e36649cb14d1363b273cbbedc (patch)
treeebd2d46443f14efbd8dbc1a34e145ca98b53e288 /xlators/features/shard
parent63d4382103573867b10efeb44acfb0bb6e4e1614 (diff)
features/shard: Performance improvements in IO path
Backport of: http://review.gluster.org/#/c/12126/ This is patch 1/2 of the performance improvement work for sharding in the IO path. What this patch does: Since the primary use-case where sharding is targeted - VM store - is a single-writer workload, instead of performing lookup on the base file everytime to gather the size and block count from the backend in reads, writes and truncate, now the size and block count is also cached and kept up-to-date after every inode write in the inode ctx. TO-DO: Make changes in rename, link, unlink, [f]setattr and [f]stat to keep the relevant iatt members up-to-date in the inode ctx. Change-Id: Id4f5c33044411b87b55968083a70a0a11a335ab2 BUG: 1261716 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/12213 Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/features/shard')
-rw-r--r--xlators/features/shard/src/shard.c260
-rw-r--r--xlators/features/shard/src/shard.h43
2 files changed, 233 insertions, 70 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 35a4f551e96..4ccd4b01a0b 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -74,10 +74,9 @@ __shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
return ret;
}
-
int
-__shard_inode_ctx_set (inode_t *inode, xlator_t *this,
- shard_inode_ctx_t *ctx_in)
+__shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
+ uint64_t block_size, int32_t valid)
{
int ret = -1;
shard_inode_ctx_t *ctx = NULL;
@@ -86,27 +85,65 @@ __shard_inode_ctx_set (inode_t *inode, xlator_t *this,
if (ret)
return ret;
- ctx->block_size = ctx_in->block_size;
- ctx->mode = ctx_in->mode;
- ctx->rdev = ctx_in->rdev;
+ if (valid & SHARD_MASK_BLOCK_SIZE)
+ ctx->block_size = block_size;
+
+ if (!stbuf)
+ return 0;
+
+ if (valid & SHARD_MASK_PROT)
+ ctx->stat.ia_prot = stbuf->ia_prot;
+
+ if (valid & SHARD_MASK_NLINK)
+ ctx->stat.ia_nlink = stbuf->ia_nlink;
+
+ if (valid & SHARD_MASK_UID)
+ ctx->stat.ia_uid = stbuf->ia_uid;
+
+ if (valid & SHARD_MASK_GID)
+ ctx->stat.ia_gid = stbuf->ia_gid;
+
+ if (valid & SHARD_MASK_SIZE)
+ ctx->stat.ia_size = stbuf->ia_size;
+
+ if (valid & SHARD_MASK_BLOCKS)
+ ctx->stat.ia_blocks = stbuf->ia_blocks;
+
+ if (valid & SHARD_MASK_TIMES) {
+ SHARD_TIME_UPDATE (ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
+ stbuf->ia_mtime, stbuf->ia_mtime_nsec);
+ SHARD_TIME_UPDATE (ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec);
+ SHARD_TIME_UPDATE (ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
+ stbuf->ia_atime, stbuf->ia_atime_nsec);
+ }
+
+ if (valid & SHARD_MASK_OTHERS) {
+ ctx->stat.ia_ino = stbuf->ia_ino;
+ gf_uuid_copy (ctx->stat.ia_gfid, stbuf->ia_gfid);
+ ctx->stat.ia_dev = stbuf->ia_dev;
+ ctx->stat.ia_type = stbuf->ia_type;
+ ctx->stat.ia_rdev = stbuf->ia_rdev;
+ ctx->stat.ia_blksize = stbuf->ia_blksize;
+ }
return 0;
}
int
-shard_inode_ctx_set_all (inode_t *inode, xlator_t *this,
- shard_inode_ctx_t *ctx_in)
+shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
+ uint64_t block_size, int32_t valid)
{
int ret = -1;
LOCK (&inode->lock);
{
- ret = __shard_inode_ctx_set (inode, this, ctx_in);
+ ret = __shard_inode_ctx_set (inode, this, stbuf, block_size,
+ valid);
}
UNLOCK (&inode->lock);
return ret;
-
}
int
@@ -158,10 +195,7 @@ __shard_inode_ctx_get_all (inode_t *inode, xlator_t *this,
ctx = (shard_inode_ctx_t *) ctx_uint;
- ctx_out->block_size = ctx->block_size;
- ctx_out->mode = ctx->mode;
- ctx_out->rdev = ctx->rdev;
-
+ memcpy (ctx_out, ctx, sizeof (shard_inode_ctx_t));
return 0;
}
@@ -341,6 +375,7 @@ shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata)
{
+ inode_t *inode = NULL;
shard_local_t *local = NULL;
local = frame->local;
@@ -351,6 +386,20 @@ shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
+ if (shard_modify_size_and_block_count (&local->postbuf, xdata)) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ if ((local->fd) && (local->fd->inode))
+ inode = local->fd->inode;
+ else if (local->loc.inode)
+ inode = local->loc.inode;
+
+ shard_inode_ctx_set (inode, this, &local->postbuf, 0,
+ SHARD_INODE_WRITE_MASK);
+
err:
local->post_update_size_handler (frame, this);
return 0;
@@ -402,7 +451,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!xattr_req) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- goto err;
+ goto out;
}
if (fd)
@@ -410,6 +459,13 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
else
inode = loc->inode;
+ /* If both size and block count have not changed, then skip the xattrop.
+ */
+ if ((local->delta_size + local->hole_size == 0) &&
+ (local->delta_blocks == 0)) {
+ goto out;
+ }
+
ret = shard_set_size_attrs (local->delta_size + local->hole_size,
local->delta_blocks, &size_attr);
if (ret) {
@@ -417,7 +473,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
" %s", uuid_utoa (inode->gfid));
local->op_ret = -1;
local->op_errno = ENOMEM;
- goto err;
+ goto out;
}
ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr,
@@ -429,7 +485,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
GF_FREE (size_attr);
local->op_ret = -1;
local->op_errno = ENOMEM;
- goto err;
+ goto out;
}
if (fd)
@@ -446,7 +502,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
dict_unref (xattr_req);
return 0;
-err:
+out:
if (xattr_req)
dict_unref (xattr_req);
handler (frame, this);
@@ -551,27 +607,31 @@ err:
}
static void
-shard_inode_ctx_set_if_absent (inode_t *inode, xlator_t *this, dict_t *xdata,
- struct iatt *buf)
+shard_inode_ctx_update (inode_t *inode, xlator_t *this, dict_t *xdata,
+ struct iatt *buf)
{
int ret = 0;
uint64_t size = 0;
void *bsize = NULL;
- shard_inode_ctx_t ctx_tmp = {0,};
if (shard_inode_ctx_get_block_size (inode, this, &size)) {
+ /* Fresh lookup */
ret = dict_get_ptr (xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
- if (!ret) {
- ctx_tmp.block_size = ntoh64 (*((uint64_t *)bsize));
- ctx_tmp.mode = st_mode_from_ia (buf->ia_prot,
- buf->ia_type);
- ctx_tmp.rdev = buf->ia_rdev;
- }
- ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING, "Failed to set "
- "inode ctx for %s", uuid_utoa (buf->ia_gfid));
+ if (!ret)
+ size = ntoh64 (*((uint64_t *)bsize));
+ /* If the file is sharded, set its block size, otherwise just
+ * set 0.
+ */
+
+ shard_inode_ctx_set (inode, this, buf, size,
+ SHARD_MASK_BLOCK_SIZE);
}
+ /* If the file is sharded, also set the remaining attributes,
+ * except for ia_size and ia_blocks.
+ */
+ if (size)
+ shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK);
+
}
int
@@ -594,7 +654,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* already initialised to all zeroes, nothing more needs to be done.
*/
- (void) shard_inode_ctx_set_if_absent (inode, this, xdata, buf);
+ (void) shard_inode_ctx_update (inode, this, xdata, buf);
/* Also, if the file is sharded, get the file size and block cnt xattr,
* and store them in the stbuf appropriately.
@@ -673,7 +733,10 @@ shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
+ int ret = -1;
+ int32_t mask = SHARD_INODE_WRITE_MASK;
shard_local_t *local = NULL;
+ shard_inode_ctx_t ctx = {0,};
local = frame->local;
@@ -690,6 +753,19 @@ shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
+ if (shard_inode_ctx_get_all (inode, this, &ctx))
+ mask = SHARD_ALL_MASK;
+
+ ret = shard_inode_ctx_set (inode, this, &local->prebuf, 0, mask);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set inode write "
+ "params into inode ctx for %s",
+ uuid_utoa (buf->ia_gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
unwind:
local->handler (frame, this);
return 0;
@@ -699,21 +775,32 @@ int
shard_lookup_base_file (call_frame_t *frame, xlator_t *this, loc_t *loc,
shard_post_fop_handler_t handler)
{
- shard_local_t *local = NULL;
- dict_t *xattr_req = NULL;
+ int ret = -1;
+ shard_local_t *local = NULL;
+ shard_inode_ctx_t ctx = {0,};
+ dict_t *xattr_req = NULL;
local = frame->local;
local->handler = handler;
+ ret = shard_inode_ctx_get_all (loc->inode, this, &ctx);
+ /* By this time, inode ctx should have been created either in create,
+ * mknod, readdirp or lookup. If not it is a bug!
+ */
+ if ((ret == 0) && (ctx.stat.ia_size > 0)) {
+ local->prebuf = ctx.stat;
+ goto out;
+ }
+
xattr_req = dict_new ();
if (!xattr_req) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- goto err;
+ goto out;
}
SHARD_MD_READ_FOP_INIT_REQ_DICT (this, xattr_req, loc->gfid,
- local, err);
+ local, out);
STACK_WIND (frame, shard_lookup_base_file_cbk, FIRST_CHILD (this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@@ -721,7 +808,7 @@ shard_lookup_base_file (call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_unref (xattr_req);
return 0;
-err:
+out:
if (xattr_req)
dict_unref (xattr_req);
handler (frame, this);
@@ -1502,6 +1589,10 @@ shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this)
return 0;
}
+/* TO-DO:
+ * Fix updates to size and block count with racing write(s) and truncate(s).
+ */
+
int
shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
@@ -1608,17 +1699,14 @@ shard_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
int ret = -1;
shard_local_t *local = NULL;
- shard_inode_ctx_t ctx_tmp = {0,};
local = frame->local;
if (op_ret == -1)
goto unwind;
- ctx_tmp.block_size = ntoh64 (local->block_size);
- ctx_tmp.mode = st_mode_from_ia (buf->ia_prot, buf->ia_type);
- ctx_tmp.rdev = buf->ia_rdev;
- ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp);
+ ret = shard_inode_ctx_set (inode, this, buf, ntoh64 (local->block_size),
+ SHARD_ALL_MASK);
if (ret)
gf_log (this->name, GF_LOG_WARNING, "Failed to set inode ctx "
"for %s", uuid_utoa (inode->gfid));
@@ -2278,22 +2366,17 @@ shard_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
int ret = -1;
shard_local_t *local = NULL;
- shard_inode_ctx_t ctx_tmp = {0,};
local = frame->local;
if (op_ret == -1)
goto unwind;
- ctx_tmp.block_size = ntoh64 (local->block_size);
- ctx_tmp.mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
- ctx_tmp.rdev = stbuf->ia_rdev;
- ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "Failed to set block size "
- "for %s in inode ctx", uuid_utoa (inode->gfid));
- goto unwind;
- }
+ ret = shard_inode_ctx_set (inode, this, stbuf,
+ ntoh64 (local->block_size), SHARD_ALL_MASK);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "Failed to set inode ctx "
+ "for %s", uuid_utoa (inode->gfid));
unwind:
SHARD_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf,
@@ -2600,6 +2683,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this,
int ret = 0;
int call_count = 0;
char path[PATH_MAX] = {0,};
+ mode_t mode = 0;
char *bname = NULL;
shard_priv_t *priv = NULL;
shard_inode_ctx_t ctx_tmp = {0,};
@@ -2627,6 +2711,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this,
local->op_errno = ENOMEM;
goto err;
}
+ mode = st_mode_from_ia (ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
while (shard_idx_iter <= last_block) {
if (local->inode_list[i]) {
@@ -2686,7 +2771,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this,
(void *) (long) shard_idx_iter,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod, &loc,
- ctx_tmp.mode, ctx_tmp.rdev, 0, xattr_req);
+ mode, ctx_tmp.stat.ia_rdev, 0, xattr_req);
loc_wipe (&loc);
dict_unref (xattr_req);
@@ -2929,10 +3014,52 @@ shard_post_update_size_writev_handler (call_frame_t *frame, xlator_t *this)
}
int
+__shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *) ctx_uint;
+
+ if (local->offset + local->total_size > ctx->stat.ia_size) {
+ local->delta_size = (local->offset + local->total_size) -
+ ctx->stat.ia_size;
+ ctx->stat.ia_size += (local->delta_size);
+ } else {
+ local->delta_size = 0;
+ }
+
+ return 0;
+}
+
+int
+shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_get_delta_size_from_inode_ctx (local, inode,
+ this);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
+ int ret = 0;
int call_count = 0;
fd_t *anon_fd = cookie;
shard_local_t *local = NULL;
@@ -2946,6 +3073,14 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->written_size += op_ret;
local->delta_blocks += (postbuf->ia_blocks - prebuf->ia_blocks);
local->delta_size += (postbuf->ia_size - prebuf->ia_size);
+ ret = shard_inode_ctx_set (local->fd->inode, this, postbuf, 0,
+ SHARD_MASK_TIMES);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to set "
+ "times in the inode ctx. Shard = %s, base file "
+ "gfid = %s", uuid_utoa (postbuf->ia_gfid),
+ uuid_utoa (local->fd->inode->gfid));
+ }
}
if (anon_fd)
@@ -2958,12 +3093,9 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
SHARD_STACK_UNWIND (writev, frame, local->op_ret,
local->op_errno, NULL, NULL, NULL);
} else {
- if (local->is_write_extending)
- local->delta_size = (local->offset +
- local->total_size) -
- local->prebuf.ia_size;
- else
- local->delta_size = 0;
+ shard_get_delta_size_from_inode_ctx (local,
+ local->fd->inode,
+ this);
local->hole_size = 0;
if (xdata)
local->xattr_rsp = dict_ref (xdata);
@@ -3138,12 +3270,6 @@ shard_post_lookup_writev_handler (call_frame_t *frame, xlator_t *this)
local->postbuf = local->prebuf;
- /* At this point, calculate the size of the hole if it is going to be
- * created as part of this write.
- */
- if (local->offset + local->total_size > local->prebuf.ia_size)
- local->is_write_extending = _gf_true;
-
if (local->create_count)
shard_common_resume_mknod (frame, this,
shard_post_mknod_writev_handler);
@@ -3462,8 +3588,8 @@ shard_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!entry->inode)
continue;
- shard_inode_ctx_set_if_absent (entry->inode, this, entry->dict,
- &entry->d_stat);
+ shard_inode_ctx_update (entry->inode, this, entry->dict,
+ &entry->d_stat);
}
local->op_ret = op_ret;
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index b57e2c4d7c9..982f3869a7a 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -29,6 +29,32 @@
#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
#define SHARD_INODE_LRU_LIMIT 4096
+/**
+ * Bit masks for the valid flag, which is used while updating ctx
+**/
+#define SHARD_MASK_BLOCK_SIZE (1 << 0)
+#define SHARD_MASK_PROT (1 << 1)
+#define SHARD_MASK_NLINK (1 << 2)
+#define SHARD_MASK_UID (1 << 3)
+#define SHARD_MASK_GID (1 << 4)
+#define SHARD_MASK_SIZE (1 << 6)
+#define SHARD_MASK_BLOCKS (1 << 7)
+#define SHARD_MASK_TIMES (1 << 8)
+#define SHARD_MASK_OTHERS (1 << 9)
+
+#define SHARD_INODE_WRITE_MASK (SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \
+ | SHARD_MASK_TIMES)
+
+#define SHARD_LOOKUP_MASK (SHARD_MASK_PROT | SHARD_MASK_NLINK | SHARD_MASK_UID \
+ | SHARD_MASK_GID | SHARD_MASK_TIMES \
+ | SHARD_MASK_OTHERS)
+
+#define SHARD_ALL_MASK (SHARD_MASK_BLOCK_SIZE | SHARD_MASK_PROT \
+ | SHARD_MASK_NLINK | SHARD_MASK_UID | SHARD_MASK_GID \
+ | SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \
+ | SHARD_MASK_TIMES | SHARD_MASK_OTHERS)
+
+
#define get_lowest_block(off, shard_size) ((off) / (shard_size))
#define get_highest_block(off, len, shard_size) \
(((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size))
@@ -135,6 +161,19 @@
} \
} while (0)
+#define SHARD_TIME_UPDATE(ctx_sec, ctx_nsec, new_sec, new_nsec) do { \
+ if (ctx_sec == new_sec) \
+ ctx_nsec = new_nsec = max (new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } else { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } while (0)
+
+
typedef struct shard_priv {
uint64_t block_size;
uuid_t dot_shard_gfid;
@@ -204,7 +243,6 @@ typedef struct shard_local {
gf_dirent_t entries_head;
gf_boolean_t is_set_fsid;
gf_boolean_t list_inited;
- gf_boolean_t is_write_extending;
shard_post_fop_handler_t handler;
shard_post_lookup_shards_fop_handler_t pls_fop_handler;
shard_post_resolve_fop_handler_t post_res_handler;
@@ -218,10 +256,9 @@ typedef struct shard_local {
} shard_local_t;
typedef struct shard_inode_ctx {
- uint32_t rdev;
uint64_t block_size; /* The block size with which this inode is
sharded */
- mode_t mode;
+ struct iatt stat;
} shard_inode_ctx_t;
#endif /* __SHARD_H__ */