From 8d55c25f158921b508bff0e7f25158991913f922 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 11 Dec 2013 09:33:53 +0530 Subject: syncop: Change return value of syncop Problem: We found a day-1 bug when syncop_xxx() infra is used inside a synctask with compilation optimization (CFLAGS -O2). Detailed explanation of the Root cause: We found the bug in 'gf_defrag_migrate_data' in rebalance operation: Lets look at interesting parts of the function: int gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *migrate_data) { ..... code section - [ Loop ] while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) != 0) { ..... code section - [ ERRNO-1 ] (errno of readdirp is stored in readdir_operrno by a thread) /* Need to keep track of ENOENT errno, that means, there is no need to send more readdirp() */ readdir_operrno = errno; ..... code section - [ SYNCOP-1 ] (syncop_getxattr is called by a thread) ret = syncop_getxattr (this, &entry_loc, &dict, GF_XATTR_LINKINFO_KEY); code section - [ ERRNO-2] (checking for failures of syncop_getxattr(). This may not always be executed in same thread which executed [SYNCOP-1]) if (ret < 0) { if (errno != ENODATA) { loglevel = GF_LOG_ERROR; defrag->total_failures += 1; ..... } the function above could be executed by thread(t1) till [SYNCOP-1] and code from [ERRNO-2] can be executed by a different thread(t2) because of the way syncop-infra schedules the tasks. when the code is compiled with -O2 optimization this is the assembly code that is generated: [ERRNO-1] 1165 readdir_operrno = errno; <<---- errno gets expanded as *(__errno_location()) 0x00007fd149d48b60 <+496>: callq 0x7fd149d410c0 0x00007fd149d48b72 <+514>: mov %rax,0x50(%rsp) <<------ Address returned by __errno_location() is stored in a special location in stack for later use. 0x00007fd149d48b77 <+519>: mov (%rax),%eax 0x00007fd149d48b79 <+521>: mov %eax,0x78(%rsp) .... [ERRNO-2] 1281 if (errno != ENODATA) { 0x00007fd149d492ae <+2366>: mov 0x50(%rsp),%rax <<----- Because it already stored the address returned by __errno_location(), it just dereferences the address to get the errno value. BUT THIS CODE NEED NOT BE EXECUTED BY SAME THREAD!!! 0x00007fd149d492b3 <+2371>: mov $0x9,%ebp 0x00007fd149d492b8 <+2376>: mov (%rax),%edi 0x00007fd149d492ba <+2378>: cmp $0x3d,%edi The problem is that __errno_location() value of t1 and t2 are different. So [ERRNO-2] ends up reading errno of t1 instead of errno of t2 even though t2 is executing [ERRNO-2] code section. When code is compiled without any optimization for [ERRNO-2]: 1281 if (errno != ENODATA) { 0x00007fd58e7a326f <+2237>: callq 0x7fd58e797300 <<--- As it is calling __errno_location() again it gets the location from t2 so it works as intended. 0x00007fd58e7a3274 <+2242>: mov (%rax),%eax 0x00007fd58e7a3276 <+2244>: cmp $0x3d,%eax 0x00007fd58e7a3279 <+2247>: je 0x7fd58e7a32a1 Fix: Make syncop_xxx() return (-errno) value as the return value in case of errors and all the functions which make syncop_xxx() will need to use (-ret) to figure out the reason for failure in case of syncop_xxx() failures. Change-Id: I314d20dabe55d3e62ff66f3b4adb1cac2eaebb57 BUG: 1040356 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6475 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/features/qemu-block/src/bdrv-xlator.c | 15 ++++----------- xlators/features/qemu-block/src/qb-coroutines.c | 15 ++++++++++----- 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'xlators/features/qemu-block') diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c index 106c5977535..aaf028cfea5 100644 --- a/xlators/features/qemu-block/src/bdrv-xlator.c +++ b/xlators/features/qemu-block/src/bdrv-xlator.c @@ -109,7 +109,7 @@ qemu_gluster_open (BlockDriverState *bs, QDict *options, int bdrv_flags) NULL); if (ret) { loc_wipe(&loc); - return -errno; + return ret; } s->inode = inode_ref(loc.inode); @@ -153,7 +153,7 @@ qemu_gluster_create (const char *filename, QEMUOptionParameter *options) ret = syncop_fstat (FIRST_CHILD(THIS), fd, &stat); if (ret) { fd_unref (fd); - return -errno; + return ret; } if (stat.ia_size) { @@ -166,7 +166,7 @@ qemu_gluster_create (const char *filename, QEMUOptionParameter *options) ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, total_size); if (ret) { fd_unref (fd); - return -errno; + return ret; } } @@ -196,10 +196,8 @@ qemu_gluster_co_readv (BlockDriverState *bs, int64_t sector_num, int nb_sectors, ret = syncop_readv (FIRST_CHILD(THIS), fd, size, offset, 0, &iov, &count, &iobref); - if (ret < 0) { - ret = -errno; + if (ret < 0) goto out; - } iov_copy (qiov->iov, qiov->niov, iov, count); /* *choke!* */ @@ -249,8 +247,6 @@ qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors iov.iov_len = size; ret = syncop_writev (FIRST_CHILD(THIS), fd, &iov, 1, offset, iobref, 0); - if (ret < 0) - ret = -errno; out: if (iobuf) @@ -306,9 +302,6 @@ qemu_gluster_truncate (BlockDriverState *bs, int64_t offset) fd_unref (fd); - if (ret < 0) - return ret; - return ret; } diff --git a/xlators/features/qemu-block/src/qb-coroutines.c b/xlators/features/qemu-block/src/qb-coroutines.c index 7c52adb21ed..974312f1268 100644 --- a/xlators/features/qemu-block/src/qb-coroutines.c +++ b/xlators/features/qemu-block/src/qb-coroutines.c @@ -86,7 +86,7 @@ qb_format_and_resume (void *opaque) GF_FREE(qb_inode->backing_fname); if (ret) { loc_wipe(&loc); - ret = errno; + ret = -ret; goto err; } @@ -150,11 +150,10 @@ qb_format_and_resume (void *opaque) ret = syncop_fsetxattr (FIRST_CHILD(THIS), fd, xattr, 0); if (ret) { - ret = errno; gf_log (frame->this->name, GF_LOG_ERROR, "failed to setxattr for %s", uuid_utoa (inode->gfid)); - QB_STUB_UNWIND (stub, -1, ret); + QB_STUB_UNWIND (stub, -1, -ret); fd_unref (fd); dict_unref (xattr); return 0; @@ -476,7 +475,10 @@ qb_co_truncate (void *opaque) } } - syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.prestat); + ret = syncop_fstat (FIRST_CHILD(this), local->fd, + &stub->args_cbk.prestat); + if (ret < 0) + goto out; stub->args_cbk.prestat.ia_size = qb_inode->size; ret = bdrv_truncate (qb_inode->bs, stub->args.offset); @@ -487,7 +489,10 @@ qb_co_truncate (void *opaque) qb_inode->size = offset; - syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.poststat); + ret = syncop_fstat (FIRST_CHILD(this), local->fd, + &stub->args_cbk.poststat); + if (ret < 0) + goto out; stub->args_cbk.poststat.ia_size = qb_inode->size; qb_update_size_xattr (this, local->fd, qb_inode->fmt, qb_inode->size); -- cgit