diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2013-12-11 09:33:53 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2014-01-19 23:05:15 -0800 | 
| commit | 8d55c25f158921b508bff0e7f25158991913f922 (patch) | |
| tree | 258500ed1f6b15d6f2f6848a5cbd883b8dc40ea2 /xlators/features/qemu-block | |
| parent | c2b09dc87e0763dfdff1e93a1dc6cc4c05f091bf (diff) | |
syncop: Change return value of syncop
Problem:
We found a day-1 bug when syncop_xxx() infra is used inside a synctask with
compilation optimization (CFLAGS -O2).
Detailed explanation of the Root cause:
We found the bug in 'gf_defrag_migrate_data' in rebalance operation:
Lets look at interesting parts of the function:
int
gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
                        dict_t *migrate_data)
{
.....
code section - [ Loop ]
        while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
                                       &entries)) != 0) {
.....
code section - [ ERRNO-1 ] (errno of readdirp is stored in readdir_operrno by a
thread)
                /* Need to keep track of ENOENT errno, that means, there is no
                   need to send more readdirp() */
                readdir_operrno = errno;
.....
code section - [ SYNCOP-1 ] (syncop_getxattr is called by a thread)
                        ret = syncop_getxattr (this, &entry_loc, &dict,
                                               GF_XATTR_LINKINFO_KEY);
code section - [ ERRNO-2]   (checking for failures of syncop_getxattr(). This
may not always be executed in same thread which executed [SYNCOP-1])
                        if (ret < 0) {
                                if (errno != ENODATA) {
                                        loglevel = GF_LOG_ERROR;
                                        defrag->total_failures += 1;
.....
}
the function above could be executed by thread(t1) till [SYNCOP-1] and code
from [ERRNO-2] can be executed by a different thread(t2) because of the way
syncop-infra schedules the tasks.
when the code is compiled with -O2 optimization this is the assembly code that
is generated:
 [ERRNO-1]
1165                        readdir_operrno = errno; <<---- errno gets expanded
as *(__errno_location())
   0x00007fd149d48b60 <+496>:        callq  0x7fd149d410c0 <address@hidden>
   0x00007fd149d48b72 <+514>:        mov    %rax,0x50(%rsp) <<------ Address
returned by __errno_location() is stored in a special location in stack for
later use.
   0x00007fd149d48b77 <+519>:        mov    (%rax),%eax
   0x00007fd149d48b79 <+521>:        mov    %eax,0x78(%rsp)
....
 [ERRNO-2]
1281                                        if (errno != ENODATA) {
   0x00007fd149d492ae <+2366>:        mov    0x50(%rsp),%rax <<-----  Because
it already stored the address returned by __errno_location(), it just
dereferences the address to get the errno value. BUT THIS CODE NEED NOT BE
EXECUTED BY SAME THREAD!!!
   0x00007fd149d492b3 <+2371>:        mov    $0x9,%ebp
   0x00007fd149d492b8 <+2376>:        mov    (%rax),%edi
   0x00007fd149d492ba <+2378>:        cmp    $0x3d,%edi
The problem is that __errno_location() value of t1 and t2 are different. So
[ERRNO-2] ends up reading errno of t1 instead of errno of t2 even though t2 is
executing [ERRNO-2] code section.
When code is compiled without any optimization for [ERRNO-2]:
1281                                        if (errno != ENODATA) {
   0x00007fd58e7a326f <+2237>:        callq  0x7fd58e797300
<address@hidden><<--- As it is calling __errno_location() again it gets the
location from t2 so it works as intended.
   0x00007fd58e7a3274 <+2242>:        mov    (%rax),%eax
   0x00007fd58e7a3276 <+2244>:        cmp    $0x3d,%eax
   0x00007fd58e7a3279 <+2247>:        je     0x7fd58e7a32a1
<gf_defrag_migrate_data+2287>
Fix:
Make syncop_xxx() return (-errno) value as the return value in
case of errors and all the functions which make syncop_xxx() will need to use
(-ret) to figure out the reason for failure in case of syncop_xxx() failures.
Change-Id: I314d20dabe55d3e62ff66f3b4adb1cac2eaebb57
BUG: 1040356
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/6475
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/features/qemu-block')
| -rw-r--r-- | xlators/features/qemu-block/src/bdrv-xlator.c | 15 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/qb-coroutines.c | 15 | 
2 files changed, 14 insertions, 16 deletions
| diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c index 106c5977535..aaf028cfea5 100644 --- a/xlators/features/qemu-block/src/bdrv-xlator.c +++ b/xlators/features/qemu-block/src/bdrv-xlator.c @@ -109,7 +109,7 @@ qemu_gluster_open (BlockDriverState *bs, QDict *options, int bdrv_flags)  				    NULL);  		if (ret) {  			loc_wipe(&loc); -			return -errno; +			return ret;  		}  		s->inode = inode_ref(loc.inode); @@ -153,7 +153,7 @@ qemu_gluster_create (const char *filename, QEMUOptionParameter *options)  	ret = syncop_fstat (FIRST_CHILD(THIS), fd, &stat);  	if (ret) {  		fd_unref (fd); -		return -errno; +		return ret;  	}  	if (stat.ia_size) { @@ -166,7 +166,7 @@ qemu_gluster_create (const char *filename, QEMUOptionParameter *options)  		ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, total_size);  		if (ret) {  			fd_unref (fd); -			return -errno; +			return ret;  		}  	} @@ -196,10 +196,8 @@ qemu_gluster_co_readv (BlockDriverState *bs, int64_t sector_num, int nb_sectors,  	ret = syncop_readv (FIRST_CHILD(THIS), fd, size, offset, 0,  			    &iov, &count, &iobref); -	if (ret < 0) { -		ret = -errno; +	if (ret < 0)  		goto out; -	}  	iov_copy (qiov->iov, qiov->niov, iov, count); /* *choke!* */ @@ -249,8 +247,6 @@ qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors  	iov.iov_len = size;  	ret = syncop_writev (FIRST_CHILD(THIS), fd, &iov, 1, offset, iobref, 0); -	if (ret < 0) -		ret = -errno;  out:  	if (iobuf) @@ -306,9 +302,6 @@ qemu_gluster_truncate (BlockDriverState *bs, int64_t offset)  	fd_unref (fd); -	if (ret < 0) -		return ret; -  	return ret;  } diff --git a/xlators/features/qemu-block/src/qb-coroutines.c b/xlators/features/qemu-block/src/qb-coroutines.c index 7c52adb21ed..974312f1268 100644 --- a/xlators/features/qemu-block/src/qb-coroutines.c +++ b/xlators/features/qemu-block/src/qb-coroutines.c @@ -86,7 +86,7 @@ qb_format_and_resume (void *opaque)  		GF_FREE(qb_inode->backing_fname);  		if (ret) {  			loc_wipe(&loc); -			ret = errno; +			ret = -ret;  			goto err;  		} @@ -150,11 +150,10 @@ qb_format_and_resume (void *opaque)  	ret = syncop_fsetxattr (FIRST_CHILD(THIS), fd, xattr, 0);  	if (ret) { -		ret = errno;  		gf_log (frame->this->name, GF_LOG_ERROR,  			"failed to setxattr for %s",  			uuid_utoa (inode->gfid)); -		QB_STUB_UNWIND (stub, -1, ret); +		QB_STUB_UNWIND (stub, -1, -ret);  		fd_unref (fd);  		dict_unref (xattr);  		return 0; @@ -476,7 +475,10 @@ qb_co_truncate (void *opaque)  		}  	} -	syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.prestat); +	ret = syncop_fstat (FIRST_CHILD(this), local->fd, +                            &stub->args_cbk.prestat); +        if (ret < 0) +                goto out;  	stub->args_cbk.prestat.ia_size = qb_inode->size;  	ret = bdrv_truncate (qb_inode->bs, stub->args.offset); @@ -487,7 +489,10 @@ qb_co_truncate (void *opaque)  	qb_inode->size = offset; -	syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.poststat); +	ret = syncop_fstat (FIRST_CHILD(this), local->fd, +                            &stub->args_cbk.poststat); +        if (ret < 0) +                goto out;  	stub->args_cbk.poststat.ia_size = qb_inode->size;  	qb_update_size_xattr (this, local->fd, qb_inode->fmt, qb_inode->size); | 
