diff options
-rw-r--r-- | extras/group-gluster-block | 1 | ||||
-rw-r--r-- | tests/bugs/shard/shard-append-test.c | 179 | ||||
-rw-r--r-- | tests/bugs/shard/shard-append-test.t | 32 | ||||
-rw-r--r-- | xlators/features/shard/src/shard.c | 109 |
4 files changed, 278 insertions, 43 deletions
diff --git a/extras/group-gluster-block b/extras/group-gluster-block index 0753d26b3ca..a4a6367920b 100644 --- a/extras/group-gluster-block +++ b/extras/group-gluster-block @@ -2,7 +2,6 @@ performance.quick-read=off performance.read-ahead=off performance.io-cache=off performance.stat-prefetch=off -performance.write-behind=off performance.open-behind=off performance.readdir-ahead=off network.remote-dio=enable diff --git a/tests/bugs/shard/shard-append-test.c b/tests/bugs/shard/shard-append-test.c new file mode 100644 index 00000000000..92dff3d078d --- /dev/null +++ b/tests/bugs/shard/shard-append-test.c @@ -0,0 +1,179 @@ +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <limits.h> +#include <string.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> + +#define LOG_ERR(msg) do { \ + fprintf (stderr, "%s : Error (%s)\n", msg, strerror (errno)); \ + } while (0) + +/*This test tests that shard xlator handles offset in appending writes + * correctly. This test performs writes of 1025 bytes 1025 times, in 5 threads + * with different threads. The buffer to be written is same character repeated + * 1025 times in the buffer for a thread. At the end it reads the buffer till + * end of file and tests that the read of 1025 bytes is always same character + * and the content read is 5*1025*1025 size. 1025 bytes is chosen because it + * will lead to write on more than one shard at some point when the size is + * going over the initial shard*/ +pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; +int thread_data = '1'; + +glfs_t * +init_glfs (const char *hostname, const char *volname, + const char *logfile) +{ + int ret = -1; + glfs_t *fs = NULL; + + fs = glfs_new (volname); + if (!fs) { + LOG_ERR ("glfs_new failed"); + return NULL; + } + + ret = glfs_set_volfile_server (fs, "tcp", hostname, 24007); + if (ret < 0) { + LOG_ERR ("glfs_set_volfile_server failed"); + goto out; + } + + ret = glfs_set_logging (fs, logfile, 7); + if (ret < 0) { + LOG_ERR ("glfs_set_logging failed"); + goto out; + } + + ret = glfs_init (fs); + if (ret < 0) { + LOG_ERR ("glfs_init failed"); + goto out; + } + + ret = 0; +out: + if (ret) { + glfs_fini (fs); + fs = NULL; + } + + return fs; +} + +void* +write_data (void *data) +{ + char buf[1025] = {0}; + glfs_fd_t *glfd = NULL; + glfs_t *fs = data; + int i = 0; + + pthread_mutex_lock (&lock); + { + memset(buf, thread_data, sizeof(buf)); + thread_data++; + } + pthread_mutex_unlock (&lock); + + for (i = 0; i < 1025; i++) { + glfd = glfs_creat(fs, "parallel-write.txt", O_WRONLY | O_APPEND, + S_IRUSR | S_IWUSR | O_SYNC); + if (!glfd) { + LOG_ERR ("Failed to create file"); + exit(1); + } + + if (glfs_write (glfd, buf, sizeof(buf), 0) < 0) { + LOG_ERR ("Failed to write to file"); + exit(1); + } + if (glfs_close(glfd) != 0) { + LOG_ERR ("Failed to close file"); + exit(1); + } + } + return NULL; +} + +int +main (int argc, char *argv[]) +{ + pthread_t tid[5] = {0}; + char buf[1025] = {0}; + char cmp_buf[1025] = {0}; + int ret = 0; + char *hostname = NULL; + char *volname = NULL; + char *logfile = NULL; + glfs_t *fs = NULL; + glfs_fd_t *glfd = NULL; + ssize_t bytes_read = 0; + ssize_t total_bytes_read = 0; + int i = 0; + + if (argc != 4) { + fprintf (stderr, "Invalid argument\n"); + exit(1); + } + + hostname = argv[1]; + volname = argv[2]; + logfile = argv[3]; + + fs = init_glfs (hostname, volname, logfile); + if (fs == NULL) { + LOG_ERR ("init_glfs failed"); + return -1; + } + + for (i = 0; i < 5; i++) { + pthread_create(&tid[i], NULL, write_data, fs); + } + + for (i = 0; i < 5; i++) { + pthread_join(tid[i], NULL); + } + glfd = glfs_open(fs, "parallel-write.txt", O_RDONLY); + if (!glfd) { + LOG_ERR ("Failed to open file for reading"); + exit(1); + } + + while ((bytes_read = glfs_read (glfd, buf, sizeof(buf), 0)) > 0) { + if (bytes_read != sizeof(buf)) { + fprintf (stderr, "Didn't read complete data read: %zd " + "expected: %lu", bytes_read, sizeof(buf)); + exit(1); + } + + total_bytes_read += bytes_read; + if (buf[0] < '1' || buf[0] >= thread_data) { + fprintf(stderr, "Invalid character found: %c", buf[0]); + exit(1); + } + memset(cmp_buf, buf[0], sizeof(cmp_buf)); + if (memcmp(cmp_buf, buf, sizeof(cmp_buf))) { + LOG_ERR ("Data corrupted"); + exit(1); + } + memset(cmp_buf, 0, sizeof(cmp_buf)); + } + + if (total_bytes_read != 5*1025*1025) { + fprintf(stderr, "Failed to read what is written, read; %zd, " + "expected %zu", total_bytes_read, 5*1025*1025); + exit(1); + } + + if (glfs_close(glfd) != 0) { + LOG_ERR ("Failed to close"); + exit(1); + } + return 0; +} diff --git a/tests/bugs/shard/shard-append-test.t b/tests/bugs/shard/shard-append-test.t new file mode 100644 index 00000000000..f8719f2a2c1 --- /dev/null +++ b/tests/bugs/shard/shard-append-test.t @@ -0,0 +1,32 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd + +TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3}; +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off + +#Uncomment the following line after shard-queuing is implemented +#TEST $CLI volume set $V0 performance.write-behind off + +TEST $CLI volume set $V0 performance.strict-o-direct on +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0; + +logdir=`gluster --print-logdir` + +TEST build_tester $(dirname $0)/shard-append-test.c -lgfapi -lpthread + +TEST ./$(dirname $0)/shard-append-test ${H0} $V0 $logdir/shard-append-test.log + +cleanup_tester $(dirname $0)/shard-append-test + +cleanup; diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index eaeb840e86d..d7526339591 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -3631,6 +3631,18 @@ shard_common_inode_write_post_update_size_handler (call_frame_t *frame, return 0; } +static gf_boolean_t +shard_is_appending_write (shard_local_t *local) +{ + if (local->fop != GF_FOP_WRITE) + return _gf_false; + if (local->flags & O_APPEND) + return _gf_true; + if (local->fd->flags & O_APPEND) + return _gf_true; + return _gf_false; +} + int __shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode, xlator_t *this) @@ -3645,13 +3657,15 @@ __shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode, ctx = (shard_inode_ctx_t *) ctx_uint; - if (local->offset + local->total_size > ctx->stat.ia_size) { + if (shard_is_appending_write (local)) { + local->delta_size = local->total_size; + } else if (local->offset + local->total_size > ctx->stat.ia_size) { local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size; - ctx->stat.ia_size += (local->delta_size); } else { local->delta_size = 0; } + ctx->stat.ia_size += (local->delta_size); local->postbuf = ctx->stat; return 0; @@ -3957,8 +3971,11 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame, } int -shard_common_inode_write_post_lookup_handler (call_frame_t *frame, - xlator_t *this) +shard_mkdir_dot_shard (call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler); +int +shard_common_inode_write_post_resolve_handler (call_frame_t *frame, + xlator_t *this) { shard_local_t *local = NULL; @@ -3971,8 +3988,6 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, return 0; } - local->postbuf = local->prebuf; - if (local->call_count) { shard_common_lookup_shards (frame, this, local->resolver_base_inode, @@ -3985,12 +4000,11 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame, } int -shard_common_inode_write_post_resolve_handler (call_frame_t *frame, - xlator_t *this) +shard_common_inode_write_post_lookup_handler (call_frame_t *frame, + xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; + shard_local_t *local = frame->local; + shard_priv_t *priv = this->private; if (local->op_ret < 0) { shard_common_inode_write_failure_unwind (local->fop, frame, @@ -3999,8 +4013,46 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame, return 0; } - shard_lookup_base_file (frame, this, &local->loc, - shard_common_inode_write_post_lookup_handler); + local->postbuf = local->prebuf; + + /*Adjust offset to EOF so that correct shard is chosen for append*/ + if (shard_is_appending_write (local)) + local->offset = local->prebuf.ia_size; + + local->first_block = get_lowest_block (local->offset, + local->block_size); + local->last_block = get_highest_block (local->offset, local->total_size, + local->block_size); + local->num_blocks = local->last_block - local->first_block + 1; + local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) { + shard_common_inode_write_failure_unwind (local->fop, frame, + -1, ENOMEM); + return 0; + } + + gf_msg_trace (this->name, 0, "%s: gfid=%s first_block=%"PRIu32" " + "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64 + " total_size=%zu flags=%"PRId32"", + gf_fop_list[local->fop], + uuid_utoa (local->resolver_base_inode->gfid), + local->first_block, local->last_block, local->num_blocks, + local->offset, local->total_size, local->flags); + + local->dot_shard_loc.inode = inode_find (this->itable, + priv->dot_shard_gfid); + + if (!local->dot_shard_loc.inode) { + /*change handler*/ + shard_mkdir_dot_shard (frame, this, + shard_common_inode_write_post_resolve_handler); + } else { + /*change handler*/ + local->post_res_handler = + shard_common_inode_write_post_resolve_handler; + shard_refresh_dot_shard (frame, this); + } return 0; } @@ -4699,9 +4751,6 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this, int i = 0; uint64_t block_size = 0; shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); if (ret) { @@ -4777,37 +4826,13 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this, local->iobref = iobref_ref (iobref); local->fd = fd_ref (fd); local->block_size = block_size; - local->first_block = get_lowest_block (offset, local->block_size); - local->last_block = get_highest_block (offset, local->total_size, - local->block_size); - local->num_blocks = local->last_block - local->first_block + 1; local->resolver_base_inode = local->fd->inode; - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto out; local->loc.inode = inode_ref (fd->inode); gf_uuid_copy (local->loc.gfid, fd->inode->gfid); - gf_msg_trace (this->name, 0, "%s: gfid=%s first_block=%"PRIu32" " - "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64"" - " total_size=%zu flags=%"PRId32"", gf_fop_list[fop], - uuid_utoa (fd->inode->gfid), local->first_block, - local->last_block, local->num_blocks, offset, - local->total_size, local->flags); - - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - - if (!local->dot_shard_loc.inode) { - shard_mkdir_dot_shard (frame, this, - shard_common_inode_write_post_resolve_handler); - } else { - local->post_res_handler = shard_common_inode_write_post_resolve_handler; - shard_refresh_dot_shard (frame, this); - } - + shard_lookup_base_file (frame, this, &local->loc, + shard_common_inode_write_post_lookup_handler); return 0; out: shard_common_inode_write_failure_unwind (fop, frame, -1, ENOMEM); |