summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaleb S KEITHLEY <kkeithle@redhat.com>2016-02-18 11:21:12 -0500
committerJeff Darcy <jdarcy@redhat.com>2016-03-07 03:34:59 -0800
commit6860968c3adaf2e8c3cb51124bbdfccef74beeb9 (patch)
tree0bb3721f3ae438e7ea7891e0a179cfa63b697a7c
parent459d0a5e173f9d9f597aec89f81e5377425eb8fb (diff)
qemu-block: deprecated/defunct, remove from tree
qemu-block xlator is not used by anyone, or so I'm told. It's also substantially out of date. There's little reason to keep it in our sources. (And FedoraProject doesn't like bundled software either.) Change-Id: I4aeb2fdfd962ec6d93de6bae126874121272220a Signed-off-by: Kaleb S KEITHLEY <kkeithle@redhat.com> Reviewed-on: http://review.gluster.org/13473 Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r--configure.ac33
-rw-r--r--contrib/qemu/block.c4604
-rw-r--r--contrib/qemu/block/qcow.c914
-rw-r--r--contrib/qemu/block/qcow2-cache.c323
-rw-r--r--contrib/qemu/block/qcow2-cluster.c1478
-rw-r--r--contrib/qemu/block/qcow2-refcount.c1374
-rw-r--r--contrib/qemu/block/qcow2-snapshot.c660
-rw-r--r--contrib/qemu/block/qcow2.c1825
-rw-r--r--contrib/qemu/block/qcow2.h437
-rw-r--r--contrib/qemu/block/qed-check.c248
-rw-r--r--contrib/qemu/block/qed-cluster.c165
-rw-r--r--contrib/qemu/block/qed-gencb.c32
-rw-r--r--contrib/qemu/block/qed-l2-cache.c187
-rw-r--r--contrib/qemu/block/qed-table.c296
-rw-r--r--contrib/qemu/block/qed.c1596
-rw-r--r--contrib/qemu/block/qed.h344
-rw-r--r--contrib/qemu/block/snapshot.c157
-rw-r--r--contrib/qemu/config-host.h72
-rw-r--r--contrib/qemu/coroutine-ucontext.c225
-rw-r--r--contrib/qemu/include/block/aio.h247
-rw-r--r--contrib/qemu/include/block/block.h443
-rw-r--r--contrib/qemu/include/block/block_int.h421
-rw-r--r--contrib/qemu/include/block/blockjob.h278
-rw-r--r--contrib/qemu/include/block/coroutine.h218
-rw-r--r--contrib/qemu/include/block/coroutine_int.h53
-rw-r--r--contrib/qemu/include/block/snapshot.h53
-rw-r--r--contrib/qemu/include/config.h2
-rw-r--r--contrib/qemu/include/exec/cpu-common.h124
-rw-r--r--contrib/qemu/include/exec/hwaddr.h20
-rw-r--r--contrib/qemu/include/exec/poison.h63
-rw-r--r--contrib/qemu/include/fpu/softfloat.h641
-rw-r--r--contrib/qemu/include/glib-compat.h27
-rw-r--r--contrib/qemu/include/migration/migration.h157
-rw-r--r--contrib/qemu/include/migration/qemu-file.h266
-rw-r--r--contrib/qemu/include/migration/vmstate.h740
-rw-r--r--contrib/qemu/include/monitor/monitor.h104
-rw-r--r--contrib/qemu/include/monitor/readline.h55
-rw-r--r--contrib/qemu/include/qapi/error.h85
-rw-r--r--contrib/qemu/include/qapi/qmp/json-lexer.h51
-rw-r--r--contrib/qemu/include/qapi/qmp/json-parser.h24
-rw-r--r--contrib/qemu/include/qapi/qmp/json-streamer.h40
-rw-r--r--contrib/qemu/include/qapi/qmp/qbool.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qdict.h69
-rw-r--r--contrib/qemu/include/qapi/qmp/qerror.h249
-rw-r--r--contrib/qemu/include/qapi/qmp/qfloat.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qint.h28
-rw-r--r--contrib/qemu/include/qapi/qmp/qjson.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qlist.h63
-rw-r--r--contrib/qemu/include/qapi/qmp/qobject.h112
-rw-r--r--contrib/qemu/include/qapi/qmp/qstring.h36
-rw-r--r--contrib/qemu/include/qapi/qmp/types.h25
-rw-r--r--contrib/qemu/include/qemu-common.h478
-rw-r--r--contrib/qemu/include/qemu/aes.h45
-rw-r--r--contrib/qemu/include/qemu/atomic.h202
-rw-r--r--contrib/qemu/include/qemu/bitmap.h222
-rw-r--r--contrib/qemu/include/qemu/bitops.h276
-rw-r--r--contrib/qemu/include/qemu/bswap.h487
-rw-r--r--contrib/qemu/include/qemu/compiler.h55
-rw-r--r--contrib/qemu/include/qemu/error-report.h46
-rw-r--r--contrib/qemu/include/qemu/event_notifier.h46
-rw-r--r--contrib/qemu/include/qemu/hbitmap.h209
-rw-r--r--contrib/qemu/include/qemu/host-utils.h322
-rw-r--r--contrib/qemu/include/qemu/iov.h115
-rw-r--r--contrib/qemu/include/qemu/main-loop.h311
-rw-r--r--contrib/qemu/include/qemu/module.h40
-rw-r--r--contrib/qemu/include/qemu/notify.h72
-rw-r--r--contrib/qemu/include/qemu/option.h157
-rw-r--r--contrib/qemu/include/qemu/option_int.h54
-rw-r--r--contrib/qemu/include/qemu/osdep.h218
-rw-r--r--contrib/qemu/include/qemu/queue.h414
-rw-r--r--contrib/qemu/include/qemu/sockets.h83
-rw-r--r--contrib/qemu/include/qemu/thread-posix.h28
-rw-r--r--contrib/qemu/include/qemu/thread.h56
-rw-r--r--contrib/qemu/include/qemu/timer.h305
-rw-r--r--contrib/qemu/include/qemu/typedefs.h69
-rw-r--r--contrib/qemu/include/sysemu/os-posix.h52
-rw-r--r--contrib/qemu/include/sysemu/sysemu.h200
-rw-r--r--contrib/qemu/include/trace.h6
-rw-r--r--contrib/qemu/nop-symbols.c12
-rw-r--r--contrib/qemu/qapi-types.h2746
-rw-r--r--contrib/qemu/qemu-coroutine-lock.c178
-rw-r--r--contrib/qemu/qemu-coroutine-sleep.c39
-rw-r--r--contrib/qemu/qemu-coroutine.c135
-rw-r--r--contrib/qemu/qmp-commands.h204
-rw-r--r--contrib/qemu/qobject/json-lexer.c373
-rw-r--r--contrib/qemu/qobject/json-parser.c724
-rw-r--r--contrib/qemu/qobject/json-streamer.c122
-rw-r--r--contrib/qemu/qobject/qbool.c68
-rw-r--r--contrib/qemu/qobject/qdict.c478
-rw-r--r--contrib/qemu/qobject/qerror.c156
-rw-r--r--contrib/qemu/qobject/qfloat.c68
-rw-r--r--contrib/qemu/qobject/qint.c67
-rw-r--r--contrib/qemu/qobject/qjson.c282
-rw-r--r--contrib/qemu/qobject/qlist.c170
-rw-r--r--contrib/qemu/qobject/qstring.c149
-rw-r--r--contrib/qemu/trace/generated-tracers.h3759
-rw-r--r--contrib/qemu/util/aes.c1314
-rw-r--r--contrib/qemu/util/bitmap.c256
-rw-r--r--contrib/qemu/util/bitops.c158
-rw-r--r--contrib/qemu/util/cutils.c532
-rw-r--r--contrib/qemu/util/error.c120
-rw-r--r--contrib/qemu/util/hbitmap.c404
-rw-r--r--contrib/qemu/util/hexdump.c37
-rw-r--r--contrib/qemu/util/iov.c426
-rw-r--r--contrib/qemu/util/module.c81
-rw-r--r--contrib/qemu/util/oslib-posix.c255
-rw-r--r--contrib/qemu/util/qemu-error.c225
-rw-r--r--contrib/qemu/util/qemu-option.c1126
-rw-r--r--contrib/qemu/util/qemu-thread-posix.c327
-rw-r--r--contrib/qemu/util/unicode.c100
-rwxr-xr-xtests/basic/file-snapshot.t62
-rw-r--r--xlators/features/Makefile.am4
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am156
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c386
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c43
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c55
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c111
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c45
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1134
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
124 files changed, 2 insertions, 42231 deletions
diff --git a/configure.ac b/configure.ac
index 29e36648aac..56ec512badf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -177,8 +177,6 @@ AC_CONFIG_FILES([Makefile
xlators/encryption/rot-13/src/Makefile
xlators/encryption/crypt/Makefile
xlators/encryption/crypt/src/Makefile
- xlators/features/qemu-block/Makefile
- xlators/features/qemu-block/src/Makefile
xlators/system/Makefile
xlators/system/posix-acl/Makefile
xlators/system/posix-acl/src/Makefile
@@ -591,33 +589,6 @@ fi
AC_SUBST(FUSERMOUNT_SUBDIR)
#end FUSERMOUNT section
-# QEMU_BLOCK section
-
-AC_ARG_ENABLE([qemu-block],
- AC_HELP_STRING([--enable-qemu-block],
- [Build QEMU Block formats translator]))
-
-if test "x$enable_qemu_block" != "xno"; then
- PKG_CHECK_MODULES([GLIB], [glib-2.0],
- [HAVE_GLIB_2="yes"],
- [HAVE_GLIB_2="no"])
-fi
-
-if test "x$enable_qemu_block" = "xyes" -a "x$HAVE_GLIB_2" = "xno"; then
- echo "QEMU Block formats translator requires libglib-2.0, but missing."
- exit 1
-fi
-
-BUILD_QEMU_BLOCK=no
-if test "x${enable_qemu_block}" != "xno" -a "x${HAVE_GLIB_2}" = "xyes"; then
- BUILD_QEMU_BLOCK=yes
- AC_DEFINE(HAVE_QEMU_BLOCK, 1, [define if libglib-2.0 library found and QEMU
- Block translator enabled])
-fi
-
-
-# end QEMU_BLOCK section
-
# EPOLL section
AC_ARG_ENABLE([epoll],
AC_HELP_STRING([--disable-epoll],
@@ -1104,9 +1075,6 @@ if test "x${GF_HOST_OS}" != "xGF_LINUX_HOST_OS" ; then
fi
AC_SUBST(UMOUNTD_SUBDIR)
-# enable/disable QEMU
-AM_CONDITIONAL([ENABLE_QEMU_BLOCK], [test x$BUILD_QEMU_BLOCK = xyes])
-
# enable debug section
AC_ARG_ENABLE([debug],
@@ -1369,7 +1337,6 @@ echo "Block Device xlator : $BUILD_BD_XLATOR"
echo "glupy : $BUILD_GLUPY"
echo "Use syslog : $USE_SYSLOG"
echo "XML output : $BUILD_XML_OUTPUT"
-echo "QEMU Block formats : $BUILD_QEMU_BLOCK"
echo "Encryption xlator : $BUILD_CRYPT_XLATOR"
echo "Unit Tests : $BUILD_UNITTEST"
echo "POSIX ACLs : $BUILD_POSIX_ACLS"
diff --git a/contrib/qemu/block.c b/contrib/qemu/block.c
deleted file mode 100644
index b56024113b8..00000000000
--- a/contrib/qemu/block.c
+++ /dev/null
@@ -1,4604 +0,0 @@
-/*
- * QEMU System Emulator block driver
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "config-host.h"
-#include "qemu-common.h"
-#include "trace.h"
-#include "monitor/monitor.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-#include "qemu/module.h"
-#include "qapi/qmp/qjson.h"
-#include "sysemu/sysemu.h"
-#include "qemu/notify.h"
-#include "block/coroutine.h"
-#include "qmp-commands.h"
-#include "qemu/timer.h"
-
-#ifdef CONFIG_BSD
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <sys/queue.h>
-#ifndef __DragonFly__
-#include <sys/disk.h>
-#endif
-#endif
-
-#ifdef _WIN32
-#include <windows.h>
-#endif
-
-#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-
-typedef enum {
- BDRV_REQ_COPY_ON_READ = 0x1,
- BDRV_REQ_ZERO_WRITE = 0x2,
-} BdrvRequestFlags;
-
-static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
-static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque,
- bool is_write);
-static void coroutine_fn bdrv_co_do_rw(void *opaque);
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
-
-static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, double elapsed_time, uint64_t *wait);
-static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
- double elapsed_time, uint64_t *wait);
-static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, int64_t *wait);
-
-static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
- QTAILQ_HEAD_INITIALIZER(bdrv_states);
-
-static QLIST_HEAD(, BlockDriver) bdrv_drivers =
- QLIST_HEAD_INITIALIZER(bdrv_drivers);
-
-/* If non-zero, use only whitelisted block drivers */
-static int use_bdrv_whitelist;
-
-#ifdef _WIN32
-static int is_windows_drive_prefix(const char *filename)
-{
- return (((filename[0] >= 'a' && filename[0] <= 'z') ||
- (filename[0] >= 'A' && filename[0] <= 'Z')) &&
- filename[1] == ':');
-}
-
-int is_windows_drive(const char *filename)
-{
- if (is_windows_drive_prefix(filename) &&
- filename[2] == '\0')
- return 1;
- if (strstart(filename, "\\\\.\\", NULL) ||
- strstart(filename, "//./", NULL))
- return 1;
- return 0;
-}
-#endif
-
-/* throttling disk I/O limits */
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
- bs->io_limits_enabled = false;
-
- while (qemu_co_queue_next(&bs->throttled_reqs));
-
- if (bs->block_timer) {
- qemu_del_timer(bs->block_timer);
- qemu_free_timer(bs->block_timer);
- bs->block_timer = NULL;
- }
-
- bs->slice_start = 0;
- bs->slice_end = 0;
-}
-
-static void bdrv_block_timer(void *opaque)
-{
- BlockDriverState *bs = opaque;
-
- qemu_co_queue_next(&bs->throttled_reqs);
-}
-
-void bdrv_io_limits_enable(BlockDriverState *bs)
-{
- qemu_co_queue_init(&bs->throttled_reqs);
- bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
- bs->io_limits_enabled = true;
-}
-
-bool bdrv_io_limits_enabled(BlockDriverState *bs)
-{
- BlockIOLimit *io_limits = &bs->io_limits;
- return io_limits->bps[BLOCK_IO_LIMIT_READ]
- || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
- || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
- || io_limits->iops[BLOCK_IO_LIMIT_READ]
- || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
- || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
-}
-
-static void bdrv_io_limits_intercept(BlockDriverState *bs,
- bool is_write, int nb_sectors)
-{
- int64_t wait_time = -1;
-
- if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
- qemu_co_queue_wait(&bs->throttled_reqs);
- }
-
- /* In fact, we hope to keep each request's timing, in FIFO mode. The next
- * throttled requests will not be dequeued until the current request is
- * allowed to be serviced. So if the current request still exceeds the
- * limits, it will be inserted to the head. All requests followed it will
- * be still in throttled_reqs queue.
- */
-
- while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
- qemu_mod_timer(bs->block_timer,
- wait_time + qemu_get_clock_ns(vm_clock));
- qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
- }
-
- qemu_co_queue_next(&bs->throttled_reqs);
-}
-
-/* check if the path starts with "<protocol>:" */
-static int path_has_protocol(const char *path)
-{
- const char *p;
-
-#ifdef _WIN32
- if (is_windows_drive(path) ||
- is_windows_drive_prefix(path)) {
- return 0;
- }
- p = path + strcspn(path, ":/\\");
-#else
- p = path + strcspn(path, ":/");
-#endif
-
- return *p == ':';
-}
-
-int path_is_absolute(const char *path)
-{
-#ifdef _WIN32
- /* specific case for names like: "\\.\d:" */
- if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
- return 1;
- }
- return (*path == '/' || *path == '\\');
-#else
- return (*path == '/');
-#endif
-}
-
-/* if filename is absolute, just copy it to dest. Otherwise, build a
- path to it by considering it is relative to base_path. URL are
- supported. */
-void path_combine(char *dest, int dest_size,
- const char *base_path,
- const char *filename)
-{
- const char *p, *p1;
- int len;
-
- if (dest_size <= 0)
- return;
- if (path_is_absolute(filename)) {
- pstrcpy(dest, dest_size, filename);
- } else {
- p = strchr(base_path, ':');
- if (p)
- p++;
- else
- p = base_path;
- p1 = strrchr(base_path, '/');
-#ifdef _WIN32
- {
- const char *p2;
- p2 = strrchr(base_path, '\\');
- if (!p1 || p2 > p1)
- p1 = p2;
- }
-#endif
- if (p1)
- p1++;
- else
- p1 = base_path;
- if (p1 > p)
- p = p1;
- len = p - base_path;
- if (len > dest_size - 1)
- len = dest_size - 1;
- memcpy(dest, base_path, len);
- dest[len] = '\0';
- pstrcat(dest, dest_size, filename);
- }
-}
-
-void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
-{
- if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
- pstrcpy(dest, sz, bs->backing_file);
- } else {
- path_combine(dest, sz, bs->filename, bs->backing_file);
- }
-}
-
-void bdrv_register(BlockDriver *bdrv)
-{
- /* Block drivers without coroutine functions need emulation */
- if (!bdrv->bdrv_co_readv) {
- bdrv->bdrv_co_readv = bdrv_co_readv_em;
- bdrv->bdrv_co_writev = bdrv_co_writev_em;
-
- /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
- * the block driver lacks aio we need to emulate that too.
- */
- if (!bdrv->bdrv_aio_readv) {
- /* add AIO emulation layer */
- bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
- bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
- }
- }
-
- QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
-}
-
-/* create a new block device (by default it is empty) */
-BlockDriverState *bdrv_new(const char *device_name)
-{
- BlockDriverState *bs;
-
- bs = g_malloc0(sizeof(BlockDriverState));
- pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
- if (device_name[0] != '\0') {
- QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
- }
- bdrv_iostatus_disable(bs);
- notifier_list_init(&bs->close_notifiers);
- notifier_with_return_list_init(&bs->before_write_notifiers);
-
- return bs;
-}
-
-void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
-{
- notifier_list_add(&bs->close_notifiers, notify);
-}
-
-BlockDriver *bdrv_find_format(const char *format_name)
-{
- BlockDriver *drv1;
- QLIST_FOREACH(drv1, &bdrv_drivers, list) {
- if (!strcmp(drv1->format_name, format_name)) {
- return drv1;
- }
- }
- return NULL;
-}
-
-static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
-{
- static const char *whitelist_rw[] = {
- CONFIG_BDRV_RW_WHITELIST
- };
- static const char *whitelist_ro[] = {
- CONFIG_BDRV_RO_WHITELIST
- };
- const char **p;
-
- if (!whitelist_rw[0] && !whitelist_ro[0]) {
- return 1; /* no whitelist, anything goes */
- }
-
- for (p = whitelist_rw; *p; p++) {
- if (!strcmp(drv->format_name, *p)) {
- return 1;
- }
- }
- if (read_only) {
- for (p = whitelist_ro; *p; p++) {
- if (!strcmp(drv->format_name, *p)) {
- return 1;
- }
- }
- }
- return 0;
-}
-
-BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
- bool read_only)
-{
- BlockDriver *drv = bdrv_find_format(format_name);
- return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
-}
-
-typedef struct CreateCo {
- BlockDriver *drv;
- char *filename;
- QEMUOptionParameter *options;
- int ret;
-} CreateCo;
-
-static void coroutine_fn bdrv_create_co_entry(void *opaque)
-{
- CreateCo *cco = opaque;
- assert(cco->drv);
-
- cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
-}
-
-int bdrv_create(BlockDriver *drv, const char* filename,
- QEMUOptionParameter *options)
-{
- int ret;
-
- Coroutine *co;
- CreateCo cco = {
- .drv = drv,
- .filename = g_strdup(filename),
- .options = options,
- .ret = NOT_DONE,
- };
-
- if (!drv->bdrv_create) {
- ret = -ENOTSUP;
- goto out;
- }
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_create_co_entry(&cco);
- } else {
- co = qemu_coroutine_create(bdrv_create_co_entry);
- qemu_coroutine_enter(co, &cco);
- while (cco.ret == NOT_DONE) {
- qemu_aio_wait();
- }
- }
-
- ret = cco.ret;
-
-out:
- g_free(cco.filename);
- return ret;
-}
-
-int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
-{
- BlockDriver *drv;
-
- drv = bdrv_find_protocol(filename, true);
- if (drv == NULL) {
- return -ENOENT;
- }
-
- return bdrv_create(drv, filename, options);
-}
-
-/*
- * Create a uniquely-named empty temporary file.
- * Return 0 upon success, otherwise a negative errno value.
- */
-int get_tmp_filename(char *filename, int size)
-{
-#ifdef _WIN32
- char temp_dir[MAX_PATH];
- /* GetTempFileName requires that its output buffer (4th param)
- have length MAX_PATH or greater. */
- assert(size >= MAX_PATH);
- return (GetTempPath(MAX_PATH, temp_dir)
- && GetTempFileName(temp_dir, "qem", 0, filename)
- ? 0 : -GetLastError());
-#else
- int fd;
- const char *tmpdir;
- tmpdir = getenv("TMPDIR");
- if (!tmpdir)
- tmpdir = "/tmp";
- if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
- return -EOVERFLOW;
- }
- fd = mkstemp(filename);
- if (fd < 0) {
- return -errno;
- }
- if (close(fd) != 0) {
- unlink(filename);
- return -errno;
- }
- return 0;
-#endif
-}
-
-/*
- * Detect host devices. By convention, /dev/cdrom[N] is always
- * recognized as a host CDROM.
- */
-static BlockDriver *find_hdev_driver(const char *filename)
-{
- int score_max = 0, score;
- BlockDriver *drv = NULL, *d;
-
- QLIST_FOREACH(d, &bdrv_drivers, list) {
- if (d->bdrv_probe_device) {
- score = d->bdrv_probe_device(filename);
- if (score > score_max) {
- score_max = score;
- drv = d;
- }
- }
- }
-
- return drv;
-}
-
-BlockDriver *bdrv_find_protocol(const char *filename,
- bool allow_protocol_prefix)
-{
- BlockDriver *drv1;
- char protocol[128];
- int len;
- const char *p;
-
- /* TODO Drivers without bdrv_file_open must be specified explicitly */
-
- /*
- * XXX(hch): we really should not let host device detection
- * override an explicit protocol specification, but moving this
- * later breaks access to device names with colons in them.
- * Thanks to the brain-dead persistent naming schemes on udev-
- * based Linux systems those actually are quite common.
- */
- drv1 = find_hdev_driver(filename);
- if (drv1) {
- return drv1;
- }
-
- if (!path_has_protocol(filename) || !allow_protocol_prefix) {
- return bdrv_find_format("file");
- }
-
- p = strchr(filename, ':');
- assert(p != NULL);
- len = p - filename;
- if (len > sizeof(protocol) - 1)
- len = sizeof(protocol) - 1;
- memcpy(protocol, filename, len);
- protocol[len] = '\0';
- QLIST_FOREACH(drv1, &bdrv_drivers, list) {
- if (drv1->protocol_name &&
- !strcmp(drv1->protocol_name, protocol)) {
- return drv1;
- }
- }
- return NULL;
-}
-
-static int find_image_format(BlockDriverState *bs, const char *filename,
- BlockDriver **pdrv)
-{
- int score, score_max;
- BlockDriver *drv1, *drv;
- uint8_t buf[2048];
- int ret = 0;
-
- /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
- if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
- drv = bdrv_find_format("raw");
- if (!drv) {
- ret = -ENOENT;
- }
- *pdrv = drv;
- return ret;
- }
-
- ret = bdrv_pread(bs, 0, buf, sizeof(buf));
- if (ret < 0) {
- *pdrv = NULL;
- return ret;
- }
-
- score_max = 0;
- drv = NULL;
- QLIST_FOREACH(drv1, &bdrv_drivers, list) {
- if (drv1->bdrv_probe) {
- score = drv1->bdrv_probe(buf, ret, filename);
- if (score > score_max) {
- score_max = score;
- drv = drv1;
- }
- }
- }
- if (!drv) {
- ret = -ENOENT;
- }
- *pdrv = drv;
- return ret;
-}
-
-/**
- * Set the current 'total_sectors' value
- */
-static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
-{
- BlockDriver *drv = bs->drv;
-
- /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
- if (bs->sg)
- return 0;
-
- /* query actual device if possible, otherwise just trust the hint */
- if (drv->bdrv_getlength) {
- int64_t length = drv->bdrv_getlength(bs);
- if (length < 0) {
- return length;
- }
- hint = length >> BDRV_SECTOR_BITS;
- }
-
- bs->total_sectors = hint;
- return 0;
-}
-
-/**
- * Set open flags for a given discard mode
- *
- * Return 0 on success, -1 if the discard mode was invalid.
- */
-int bdrv_parse_discard_flags(const char *mode, int *flags)
-{
- *flags &= ~BDRV_O_UNMAP;
-
- if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
- /* do nothing */
- } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
- *flags |= BDRV_O_UNMAP;
- } else {
- return -1;
- }
-
- return 0;
-}
-
-/**
- * Set open flags for a given cache mode
- *
- * Return 0 on success, -1 if the cache mode was invalid.
- */
-int bdrv_parse_cache_flags(const char *mode, int *flags)
-{
- *flags &= ~BDRV_O_CACHE_MASK;
-
- if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
- *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
- } else if (!strcmp(mode, "directsync")) {
- *flags |= BDRV_O_NOCACHE;
- } else if (!strcmp(mode, "writeback")) {
- *flags |= BDRV_O_CACHE_WB;
- } else if (!strcmp(mode, "unsafe")) {
- *flags |= BDRV_O_CACHE_WB;
- *flags |= BDRV_O_NO_FLUSH;
- } else if (!strcmp(mode, "writethrough")) {
- /* this is the default */
- } else {
- return -1;
- }
-
- return 0;
-}
-
-/**
- * The copy-on-read flag is actually a reference count so multiple users may
- * use the feature without worrying about clobbering its previous state.
- * Copy-on-read stays enabled until all users have called to disable it.
- */
-void bdrv_enable_copy_on_read(BlockDriverState *bs)
-{
- bs->copy_on_read++;
-}
-
-void bdrv_disable_copy_on_read(BlockDriverState *bs)
-{
- assert(bs->copy_on_read > 0);
- bs->copy_on_read--;
-}
-
-static int bdrv_open_flags(BlockDriverState *bs, int flags)
-{
- int open_flags = flags | BDRV_O_CACHE_WB;
-
- /*
- * Clear flags that are internal to the block layer before opening the
- * image.
- */
- open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
- /*
- * Snapshots should be writable.
- */
- if (bs->is_temporary) {
- open_flags |= BDRV_O_RDWR;
- }
-
- return open_flags;
-}
-
-/*
- * Common part for opening disk images and files
- *
- * Removes all processed options from *options.
- */
-static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
- QDict *options, int flags, BlockDriver *drv)
-{
- int ret, open_flags;
- const char *filename;
-
- assert(drv != NULL);
- assert(bs->file == NULL);
- assert(options != NULL && bs->options != options);
-
- if (file != NULL) {
- filename = file->filename;
- } else {
- filename = qdict_get_try_str(options, "filename");
- }
-
- trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
-
- /* bdrv_open() with directly using a protocol as drv. This layer is already
- * opened, so assign it to bs (while file becomes a closed BlockDriverState)
- * and return immediately. */
- if (file != NULL && drv->bdrv_file_open) {
- bdrv_swap(file, bs);
- return 0;
- }
-
- bs->open_flags = flags;
- bs->buffer_alignment = 512;
- open_flags = bdrv_open_flags(bs, flags);
- bs->read_only = !(open_flags & BDRV_O_RDWR);
-
- if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
- return -ENOTSUP;
- }
-
- assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
- if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
- bdrv_enable_copy_on_read(bs);
- }
-
- if (filename != NULL) {
- pstrcpy(bs->filename, sizeof(bs->filename), filename);
- } else {
- bs->filename[0] = '\0';
- }
-
- bs->drv = drv;
- bs->opaque = g_malloc0(drv->instance_size);
-
- bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
-
- /* Open the image, either directly or using a protocol */
- if (drv->bdrv_file_open) {
- assert(file == NULL);
- assert(drv->bdrv_parse_filename || filename != NULL);
- ret = drv->bdrv_file_open(bs, options, open_flags);
- } else {
- if (file == NULL) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a "
- "block driver for the protocol level",
- drv->format_name);
- ret = -EINVAL;
- goto free_and_fail;
- }
- assert(file != NULL);
- bs->file = file;
- ret = drv->bdrv_open(bs, options, open_flags);
- }
-
- if (ret < 0) {
- goto free_and_fail;
- }
-
- ret = refresh_total_sectors(bs, bs->total_sectors);
- if (ret < 0) {
- goto free_and_fail;
- }
-
-#ifndef _WIN32
- if (bs->is_temporary) {
- assert(filename != NULL);
- unlink(filename);
- }
-#endif
- return 0;
-
-free_and_fail:
- bs->file = NULL;
- g_free(bs->opaque);
- bs->opaque = NULL;
- bs->drv = NULL;
- return ret;
-}
-
-/*
- * Opens a file using a protocol (file, host_device, nbd, ...)
- *
- * options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict belongs to the block layer
- * after the call (even on failure), so if the caller intends to reuse the
- * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
- */
-int bdrv_file_open(BlockDriverState **pbs, const char *filename,
- QDict *options, int flags)
-{
- BlockDriverState *bs;
- BlockDriver *drv;
- const char *drvname;
- bool allow_protocol_prefix = false;
- int ret;
-
- /* NULL means an empty set of options */
- if (options == NULL) {
- options = qdict_new();
- }
-
- bs = bdrv_new("");
- bs->options = options;
- options = qdict_clone_shallow(options);
-
- /* Fetch the file name from the options QDict if necessary */
- if (!filename) {
- filename = qdict_get_try_str(options, "filename");
- } else if (filename && !qdict_haskey(options, "filename")) {
- qdict_put(options, "filename", qstring_from_str(filename));
- allow_protocol_prefix = true;
- } else {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
- "'filename' options at the same time");
- ret = -EINVAL;
- goto fail;
- }
-
- /* Find the right block driver */
- drvname = qdict_get_try_str(options, "driver");
- if (drvname) {
- drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
- qdict_del(options, "driver");
- } else if (filename) {
- drv = bdrv_find_protocol(filename, allow_protocol_prefix);
- if (!drv) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
- }
- } else {
- qerror_report(ERROR_CLASS_GENERIC_ERROR,
- "Must specify either driver or file");
- drv = NULL;
- }
-
- if (!drv) {
- ret = -ENOENT;
- goto fail;
- }
-
- /* Parse the filename and open it */
- if (drv->bdrv_parse_filename && filename) {
- Error *local_err = NULL;
- drv->bdrv_parse_filename(filename, options, &local_err);
- if (error_is_set(&local_err)) {
- qerror_report_err(local_err);
- error_free(local_err);
- ret = -EINVAL;
- goto fail;
- }
- qdict_del(options, "filename");
- } else if (!drv->bdrv_parse_filename && !filename) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR,
- "The '%s' block driver requires a file name",
- drv->format_name);
- ret = -EINVAL;
- goto fail;
- }
-
- ret = bdrv_open_common(bs, NULL, options, flags, drv);
- if (ret < 0) {
- goto fail;
- }
-
- /* Check if any unknown options were used */
- if (qdict_size(options) != 0) {
- const QDictEntry *entry = qdict_first(options);
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
- "support the option '%s'",
- drv->format_name, entry->key);
- ret = -EINVAL;
- goto fail;
- }
- QDECREF(options);
-
- bs->growable = 1;
- *pbs = bs;
- return 0;
-
-fail:
- QDECREF(options);
- if (!bs->drv) {
- QDECREF(bs->options);
- }
- bdrv_delete(bs);
- return ret;
-}
-
-/*
- * Opens the backing file for a BlockDriverState if not yet open
- *
- * options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict is transferred to this
- * function (even on failure), so if the caller intends to reuse the dictionary,
- * it needs to use QINCREF() before calling bdrv_file_open.
- */
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
-{
- char backing_filename[PATH_MAX];
- int back_flags, ret;
- BlockDriver *back_drv = NULL;
-
- if (bs->backing_hd != NULL) {
- QDECREF(options);
- return 0;
- }
-
- /* NULL means an empty set of options */
- if (options == NULL) {
- options = qdict_new();
- }
-
- bs->open_flags &= ~BDRV_O_NO_BACKING;
- if (qdict_haskey(options, "file.filename")) {
- backing_filename[0] = '\0';
- } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
- QDECREF(options);
- return 0;
- }
-
- bs->backing_hd = bdrv_new("");
- bdrv_get_full_backing_filename(bs, backing_filename,
- sizeof(backing_filename));
-
- if (bs->backing_format[0] != '\0') {
- back_drv = bdrv_find_format(bs->backing_format);
- }
-
- /* backing files always opened read-only */
- back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
-
- ret = bdrv_open(bs->backing_hd,
- *backing_filename ? backing_filename : NULL, options,
- back_flags, back_drv);
- if (ret < 0) {
- bdrv_delete(bs->backing_hd);
- bs->backing_hd = NULL;
- bs->open_flags |= BDRV_O_NO_BACKING;
- return ret;
- }
- return 0;
-}
-
-static void extract_subqdict(QDict *src, QDict **dst, const char *start)
-{
- const QDictEntry *entry, *next;
- const char *p;
-
- *dst = qdict_new();
- entry = qdict_first(src);
-
- while (entry != NULL) {
- next = qdict_next(src, entry);
- if (strstart(entry->key, start, &p)) {
- qobject_incref(entry->value);
- qdict_put_obj(*dst, p, entry->value);
- qdict_del(src, entry->key);
- }
- entry = next;
- }
-}
-
-/*
- * Opens a disk image (raw, qcow2, vmdk, ...)
- *
- * options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict belongs to the block layer
- * after the call (even on failure), so if the caller intends to reuse the
- * dictionary, it needs to use QINCREF() before calling bdrv_open.
- */
-int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
- int flags, BlockDriver *drv)
-{
- int ret;
- /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
- char tmp_filename[PATH_MAX + 1];
- BlockDriverState *file = NULL;
- QDict *file_options = NULL;
-
- /* NULL means an empty set of options */
- if (options == NULL) {
- options = qdict_new();
- }
-
- bs->options = options;
- options = qdict_clone_shallow(options);
-
- /* For snapshot=on, create a temporary qcow2 overlay */
- if (flags & BDRV_O_SNAPSHOT) {
- BlockDriverState *bs1;
- int64_t total_size;
- BlockDriver *bdrv_qcow2;
- QEMUOptionParameter *create_options;
- char backing_filename[PATH_MAX];
-
- if (qdict_size(options) != 0) {
- error_report("Can't use snapshot=on with driver-specific options");
- ret = -EINVAL;
- goto fail;
- }
- assert(filename != NULL);
-
- /* if snapshot, we create a temporary backing file and open it
- instead of opening 'filename' directly */
-
- /* if there is a backing file, use it */
- bs1 = bdrv_new("");
- ret = bdrv_open(bs1, filename, NULL, 0, drv);
- if (ret < 0) {
- bdrv_delete(bs1);
- goto fail;
- }
- total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
-
- bdrv_delete(bs1);
-
- ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
- if (ret < 0) {
- goto fail;
- }
-
- /* Real path is meaningless for protocols */
- if (path_has_protocol(filename)) {
- snprintf(backing_filename, sizeof(backing_filename),
- "%s", filename);
- } else if (!realpath(filename, backing_filename)) {
- ret = -errno;
- goto fail;
- }
-
- bdrv_qcow2 = bdrv_find_format("qcow2");
- create_options = parse_option_parameters("", bdrv_qcow2->create_options,
- NULL);
-
- set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
- set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
- backing_filename);
- if (drv) {
- set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
- drv->format_name);
- }
-
- ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
- free_option_parameters(create_options);
- if (ret < 0) {
- goto fail;
- }
-
- filename = tmp_filename;
- drv = bdrv_qcow2;
- bs->is_temporary = 1;
- }
-
- /* Open image file without format layer */
- if (flags & BDRV_O_RDWR) {
- flags |= BDRV_O_ALLOW_RDWR;
- }
-
- extract_subqdict(options, &file_options, "file.");
-
- ret = bdrv_file_open(&file, filename, file_options,
- bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
- if (ret < 0) {
- goto fail;
- }
-
- /* Find the right image format driver */
- if (!drv) {
- ret = find_image_format(file, filename, &drv);
- }
-
- if (!drv) {
- goto unlink_and_fail;
- }
-
- /* Open the image */
- ret = bdrv_open_common(bs, file, options, flags, drv);
- if (ret < 0) {
- goto unlink_and_fail;
- }
-
- if (bs->file != file) {
- bdrv_delete(file);
- file = NULL;
- }
-
- /* If there is a backing file, use it */
- if ((flags & BDRV_O_NO_BACKING) == 0) {
- QDict *backing_options;
-
- extract_subqdict(options, &backing_options, "backing.");
- ret = bdrv_open_backing_file(bs, backing_options);
- if (ret < 0) {
- goto close_and_fail;
- }
- }
-
- /* Check if any unknown options were used */
- if (qdict_size(options) != 0) {
- const QDictEntry *entry = qdict_first(options);
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by "
- "device '%s' doesn't support the option '%s'",
- drv->format_name, bs->device_name, entry->key);
-
- ret = -EINVAL;
- goto close_and_fail;
- }
- QDECREF(options);
-
- if (!bdrv_key_required(bs)) {
- bdrv_dev_change_media_cb(bs, true);
- }
-
- /* throttling disk I/O limits */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_enable(bs);
- }
-
- return 0;
-
-unlink_and_fail:
- if (file != NULL) {
- bdrv_delete(file);
- }
- if (bs->is_temporary) {
- unlink(filename);
- }
-fail:
- QDECREF(bs->options);
- QDECREF(options);
- bs->options = NULL;
- return ret;
-
-close_and_fail:
- bdrv_close(bs);
- QDECREF(options);
- return ret;
-}
-
-typedef struct BlockReopenQueueEntry {
- bool prepared;
- BDRVReopenState state;
- QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
-} BlockReopenQueueEntry;
-
-/*
- * Adds a BlockDriverState to a simple queue for an atomic, transactional
- * reopen of multiple devices.
- *
- * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
- * already performed, or alternatively may be NULL a new BlockReopenQueue will
- * be created and initialized. This newly created BlockReopenQueue should be
- * passed back in for subsequent calls that are intended to be of the same
- * atomic 'set'.
- *
- * bs is the BlockDriverState to add to the reopen queue.
- *
- * flags contains the open flags for the associated bs
- *
- * returns a pointer to bs_queue, which is either the newly allocated
- * bs_queue, or the existing bs_queue being used.
- *
- */
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, int flags)
-{
- assert(bs != NULL);
-
- BlockReopenQueueEntry *bs_entry;
- if (bs_queue == NULL) {
- bs_queue = g_new0(BlockReopenQueue, 1);
- QSIMPLEQ_INIT(bs_queue);
- }
-
- if (bs->file) {
- bdrv_reopen_queue(bs_queue, bs->file, flags);
- }
-
- bs_entry = g_new0(BlockReopenQueueEntry, 1);
- QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
-
- bs_entry->state.bs = bs;
- bs_entry->state.flags = flags;
-
- return bs_queue;
-}
-
-/*
- * Reopen multiple BlockDriverStates atomically & transactionally.
- *
- * The queue passed in (bs_queue) must have been built up previous
- * via bdrv_reopen_queue().
- *
- * Reopens all BDS specified in the queue, with the appropriate
- * flags. All devices are prepared for reopen, and failure of any
- * device will cause all device changes to be abandonded, and intermediate
- * data cleaned up.
- *
- * If all devices prepare successfully, then the changes are committed
- * to all devices.
- *
- */
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
-{
- int ret = -1;
- BlockReopenQueueEntry *bs_entry, *next;
- Error *local_err = NULL;
-
- assert(bs_queue != NULL);
-
- bdrv_drain_all();
-
- QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
- if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
- error_propagate(errp, local_err);
- goto cleanup;
- }
- bs_entry->prepared = true;
- }
-
- /* If we reach this point, we have success and just need to apply the
- * changes
- */
- QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
- bdrv_reopen_commit(&bs_entry->state);
- }
-
- ret = 0;
-
-cleanup:
- QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
- if (ret && bs_entry->prepared) {
- bdrv_reopen_abort(&bs_entry->state);
- }
- g_free(bs_entry);
- }
- g_free(bs_queue);
- return ret;
-}
-
-
-/* Reopen a single BlockDriverState with the specified flags. */
-int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
-{
- int ret = -1;
- Error *local_err = NULL;
- BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
-
- ret = bdrv_reopen_multiple(queue, &local_err);
- if (local_err != NULL) {
- error_propagate(errp, local_err);
- }
- return ret;
-}
-
-
-/*
- * Prepares a BlockDriverState for reopen. All changes are staged in the
- * 'opaque' field of the BDRVReopenState, which is used and allocated by
- * the block driver layer .bdrv_reopen_prepare()
- *
- * bs is the BlockDriverState to reopen
- * flags are the new open flags
- * queue is the reopen queue
- *
- * Returns 0 on success, non-zero on error. On error errp will be set
- * as well.
- *
- * On failure, bdrv_reopen_abort() will be called to clean up any data.
- * It is the responsibility of the caller to then call the abort() or
- * commit() for any other BDS that have been left in a prepare() state
- *
- */
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
- Error **errp)
-{
- int ret = -1;
- Error *local_err = NULL;
- BlockDriver *drv;
-
- assert(reopen_state != NULL);
- assert(reopen_state->bs->drv != NULL);
- drv = reopen_state->bs->drv;
-
- /* if we are to stay read-only, do not allow permission change
- * to r/w */
- if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
- reopen_state->flags & BDRV_O_RDWR) {
- error_set(errp, QERR_DEVICE_IS_READ_ONLY,
- reopen_state->bs->device_name);
- goto error;
- }
-
-
- ret = bdrv_flush(reopen_state->bs);
- if (ret) {
- error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
- strerror(-ret));
- goto error;
- }
-
- if (drv->bdrv_reopen_prepare) {
- ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
- if (ret) {
- if (local_err != NULL) {
- error_propagate(errp, local_err);
- } else {
- error_setg(errp, "failed while preparing to reopen image '%s'",
- reopen_state->bs->filename);
- }
- goto error;
- }
- } else {
- /* It is currently mandatory to have a bdrv_reopen_prepare()
- * handler for each supported drv. */
- error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- drv->format_name, reopen_state->bs->device_name,
- "reopening of file");
- ret = -1;
- goto error;
- }
-
- ret = 0;
-
-error:
- return ret;
-}
-
-/*
- * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
- * makes them final by swapping the staging BlockDriverState contents into
- * the active BlockDriverState contents.
- */
-void bdrv_reopen_commit(BDRVReopenState *reopen_state)
-{
- BlockDriver *drv;
-
- assert(reopen_state != NULL);
- drv = reopen_state->bs->drv;
- assert(drv != NULL);
-
- /* If there are any driver level actions to take */
- if (drv->bdrv_reopen_commit) {
- drv->bdrv_reopen_commit(reopen_state);
- }
-
- /* set BDS specific flags now */
- reopen_state->bs->open_flags = reopen_state->flags;
- reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
- BDRV_O_CACHE_WB);
- reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
-}
-
-/*
- * Abort the reopen, and delete and free the staged changes in
- * reopen_state
- */
-void bdrv_reopen_abort(BDRVReopenState *reopen_state)
-{
- BlockDriver *drv;
-
- assert(reopen_state != NULL);
- drv = reopen_state->bs->drv;
- assert(drv != NULL);
-
- if (drv->bdrv_reopen_abort) {
- drv->bdrv_reopen_abort(reopen_state);
- }
-}
-
-
-void bdrv_close(BlockDriverState *bs)
-{
- if (bs->job) {
- block_job_cancel_sync(bs->job);
- }
- bdrv_drain_all(); /* complete I/O */
- bdrv_flush(bs);
- bdrv_drain_all(); /* in case flush left pending I/O */
- notifier_list_notify(&bs->close_notifiers, bs);
-
- if (bs->drv) {
- if (bs->backing_hd) {
- bdrv_delete(bs->backing_hd);
- bs->backing_hd = NULL;
- }
- bs->drv->bdrv_close(bs);
- g_free(bs->opaque);
-#ifdef _WIN32
- if (bs->is_temporary) {
- unlink(bs->filename);
- }
-#endif
- bs->opaque = NULL;
- bs->drv = NULL;
- bs->copy_on_read = 0;
- bs->backing_file[0] = '\0';
- bs->backing_format[0] = '\0';
- bs->total_sectors = 0;
- bs->encrypted = 0;
- bs->valid_key = 0;
- bs->sg = 0;
- bs->growable = 0;
- QDECREF(bs->options);
- bs->options = NULL;
-
- if (bs->file != NULL) {
- bdrv_delete(bs->file);
- bs->file = NULL;
- }
- }
-
- bdrv_dev_change_media_cb(bs, false);
-
- /*throttling disk I/O limits*/
- if (bs->io_limits_enabled) {
- bdrv_io_limits_disable(bs);
- }
-}
-
-void bdrv_close_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- bdrv_close(bs);
- }
-}
-
-/*
- * Wait for pending requests to complete across all BlockDriverStates
- *
- * This function does not flush data to disk, use bdrv_flush_all() for that
- * after calling this function.
- *
- * Note that completion of an asynchronous I/O operation can trigger any
- * number of other I/O operations on other devices---for example a coroutine
- * can be arbitrarily complex and a constant flow of I/O can come until the
- * coroutine is complete. Because of this, it is not possible to have a
- * function to drain a single device's I/O queue.
- */
-void bdrv_drain_all(void)
-{
- BlockDriverState *bs;
- bool busy;
-
- do {
- busy = qemu_aio_wait();
-
- /* FIXME: We do not have timer support here, so this is effectively
- * a busy wait.
- */
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
- qemu_co_queue_restart_all(&bs->throttled_reqs);
- busy = true;
- }
- }
- } while (busy);
-
- /* If requests are still pending there is a bug somewhere */
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- assert(QLIST_EMPTY(&bs->tracked_requests));
- assert(qemu_co_queue_empty(&bs->throttled_reqs));
- }
-}
-
-/* make a BlockDriverState anonymous by removing from bdrv_state list.
- Also, NULL terminate the device_name to prevent double remove */
-void bdrv_make_anon(BlockDriverState *bs)
-{
- if (bs->device_name[0] != '\0') {
- QTAILQ_REMOVE(&bdrv_states, bs, list);
- }
- bs->device_name[0] = '\0';
-}
-
-static void bdrv_rebind(BlockDriverState *bs)
-{
- if (bs->drv && bs->drv->bdrv_rebind) {
- bs->drv->bdrv_rebind(bs);
- }
-}
-
-static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
- BlockDriverState *bs_src)
-{
- /* move some fields that need to stay attached to the device */
- bs_dest->open_flags = bs_src->open_flags;
-
- /* dev info */
- bs_dest->dev_ops = bs_src->dev_ops;
- bs_dest->dev_opaque = bs_src->dev_opaque;
- bs_dest->dev = bs_src->dev;
- bs_dest->buffer_alignment = bs_src->buffer_alignment;
- bs_dest->copy_on_read = bs_src->copy_on_read;
-
- bs_dest->enable_write_cache = bs_src->enable_write_cache;
-
- /* i/o timing parameters */
- bs_dest->slice_start = bs_src->slice_start;
- bs_dest->slice_end = bs_src->slice_end;
- bs_dest->slice_submitted = bs_src->slice_submitted;
- bs_dest->io_limits = bs_src->io_limits;
- bs_dest->throttled_reqs = bs_src->throttled_reqs;
- bs_dest->block_timer = bs_src->block_timer;
- bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
-
- /* r/w error */
- bs_dest->on_read_error = bs_src->on_read_error;
- bs_dest->on_write_error = bs_src->on_write_error;
-
- /* i/o status */
- bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
- bs_dest->iostatus = bs_src->iostatus;
-
- /* dirty bitmap */
- bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
-
- /* job */
- bs_dest->in_use = bs_src->in_use;
- bs_dest->job = bs_src->job;
-
- /* keep the same entry in bdrv_states */
- pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
- bs_src->device_name);
- bs_dest->list = bs_src->list;
-}
-
-/*
- * Swap bs contents for two image chains while they are live,
- * while keeping required fields on the BlockDriverState that is
- * actually attached to a device.
- *
- * This will modify the BlockDriverState fields, and swap contents
- * between bs_new and bs_old. Both bs_new and bs_old are modified.
- *
- * bs_new is required to be anonymous.
- *
- * This function does not create any image files.
- */
-void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
-{
- BlockDriverState tmp;
-
- /* bs_new must be anonymous and shouldn't have anything fancy enabled */
- assert(bs_new->device_name[0] == '\0');
- assert(bs_new->dirty_bitmap == NULL);
- assert(bs_new->job == NULL);
- assert(bs_new->dev == NULL);
- assert(bs_new->in_use == 0);
- assert(bs_new->io_limits_enabled == false);
- assert(bs_new->block_timer == NULL);
-
- tmp = *bs_new;
- *bs_new = *bs_old;
- *bs_old = tmp;
-
- /* there are some fields that should not be swapped, move them back */
- bdrv_move_feature_fields(&tmp, bs_old);
- bdrv_move_feature_fields(bs_old, bs_new);
- bdrv_move_feature_fields(bs_new, &tmp);
-
- /* bs_new shouldn't be in bdrv_states even after the swap! */
- assert(bs_new->device_name[0] == '\0');
-
- /* Check a few fields that should remain attached to the device */
- assert(bs_new->dev == NULL);
- assert(bs_new->job == NULL);
- assert(bs_new->in_use == 0);
- assert(bs_new->io_limits_enabled == false);
- assert(bs_new->block_timer == NULL);
-
- bdrv_rebind(bs_new);
- bdrv_rebind(bs_old);
-}
-
-/*
- * Add new bs contents at the top of an image chain while the chain is
- * live, while keeping required fields on the top layer.
- *
- * This will modify the BlockDriverState fields, and swap contents
- * between bs_new and bs_top. Both bs_new and bs_top are modified.
- *
- * bs_new is required to be anonymous.
- *
- * This function does not create any image files.
- */
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
-{
- bdrv_swap(bs_new, bs_top);
-
- /* The contents of 'tmp' will become bs_top, as we are
- * swapping bs_new and bs_top contents. */
- bs_top->backing_hd = bs_new;
- bs_top->open_flags &= ~BDRV_O_NO_BACKING;
- pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
- bs_new->filename);
- pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
- bs_new->drv ? bs_new->drv->format_name : "");
-}
-
-void bdrv_delete(BlockDriverState *bs)
-{
- assert(!bs->dev);
- assert(!bs->job);
- assert(!bs->in_use);
-
- /* remove from list, if necessary */
- bdrv_make_anon(bs);
-
- bdrv_close(bs);
-
- g_free(bs);
-}
-
-int bdrv_attach_dev(BlockDriverState *bs, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
- if (bs->dev) {
- return -EBUSY;
- }
- bs->dev = dev;
- bdrv_iostatus_reset(bs);
- return 0;
-}
-
-/* TODO qdevified devices don't use this, remove when devices are qdevified */
-void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
-{
- if (bdrv_attach_dev(bs, dev) < 0) {
- abort();
- }
-}
-
-void bdrv_detach_dev(BlockDriverState *bs, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
- assert(bs->dev == dev);
- bs->dev = NULL;
- bs->dev_ops = NULL;
- bs->dev_opaque = NULL;
- bs->buffer_alignment = 512;
-}
-
-/* TODO change to return DeviceState * when all users are qdevified */
-void *bdrv_get_attached_dev(BlockDriverState *bs)
-{
- return bs->dev;
-}
-
-void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
- void *opaque)
-{
- bs->dev_ops = ops;
- bs->dev_opaque = opaque;
-}
-
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
- enum MonitorEvent ev,
- BlockErrorAction action, bool is_read)
-{
- QObject *data;
- const char *action_str;
-
- switch (action) {
- case BDRV_ACTION_REPORT:
- action_str = "report";
- break;
- case BDRV_ACTION_IGNORE:
- action_str = "ignore";
- break;
- case BDRV_ACTION_STOP:
- action_str = "stop";
- break;
- default:
- abort();
- }
-
- data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
- bdrv->device_name,
- action_str,
- is_read ? "read" : "write");
- monitor_protocol_event(ev, data);
-
- qobject_decref(data);
-}
-
-static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
-{
- QObject *data;
-
- data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
- bdrv_get_device_name(bs), ejected);
- monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
-
- qobject_decref(data);
-}
-
-static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
-{
- if (bs->dev_ops && bs->dev_ops->change_media_cb) {
- bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
- bs->dev_ops->change_media_cb(bs->dev_opaque, load);
- if (tray_was_closed) {
- /* tray open */
- bdrv_emit_qmp_eject_event(bs, true);
- }
- if (load) {
- /* tray close */
- bdrv_emit_qmp_eject_event(bs, false);
- }
- }
-}
-
-bool bdrv_dev_has_removable_media(BlockDriverState *bs)
-{
- return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
-}
-
-void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
-{
- if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
- bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
- }
-}
-
-bool bdrv_dev_is_tray_open(BlockDriverState *bs)
-{
- if (bs->dev_ops && bs->dev_ops->is_tray_open) {
- return bs->dev_ops->is_tray_open(bs->dev_opaque);
- }
- return false;
-}
-
-static void bdrv_dev_resize_cb(BlockDriverState *bs)
-{
- if (bs->dev_ops && bs->dev_ops->resize_cb) {
- bs->dev_ops->resize_cb(bs->dev_opaque);
- }
-}
-
-bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
-{
- if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
- return bs->dev_ops->is_medium_locked(bs->dev_opaque);
- }
- return false;
-}
-
-/*
- * Run consistency checks on an image
- *
- * Returns 0 if the check could be completed (it doesn't mean that the image is
- * free of errors) or -errno when an internal error occurred. The results of the
- * check are stored in res.
- */
-int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
-{
- if (bs->drv->bdrv_check == NULL) {
- return -ENOTSUP;
- }
-
- memset(res, 0, sizeof(*res));
- return bs->drv->bdrv_check(bs, res, fix);
-}
-
-#define COMMIT_BUF_SECTORS 2048
-
-/* commit COW file into the raw image */
-int bdrv_commit(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- int64_t sector, total_sectors;
- int n, ro, open_flags;
- int ret = 0;
- uint8_t *buf;
- char filename[PATH_MAX];
-
- if (!drv)
- return -ENOMEDIUM;
-
- if (!bs->backing_hd) {
- return -ENOTSUP;
- }
-
- if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
- return -EBUSY;
- }
-
- ro = bs->backing_hd->read_only;
- /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
- pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
- open_flags = bs->backing_hd->open_flags;
-
- if (ro) {
- if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
- return -EACCES;
- }
- }
-
- total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
- buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
-
- for (sector = 0; sector < total_sectors; sector += n) {
- if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
-
- if (bdrv_read(bs, sector, buf, n) != 0) {
- ret = -EIO;
- goto ro_cleanup;
- }
-
- if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
- ret = -EIO;
- goto ro_cleanup;
- }
- }
- }
-
- if (drv->bdrv_make_empty) {
- ret = drv->bdrv_make_empty(bs);
- bdrv_flush(bs);
- }
-
- /*
- * Make sure all data we wrote to the backing device is actually
- * stable on disk.
- */
- if (bs->backing_hd)
- bdrv_flush(bs->backing_hd);
-
-ro_cleanup:
- g_free(buf);
-
- if (ro) {
- /* ignoring error return here */
- bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
- }
-
- return ret;
-}
-
-int bdrv_commit_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- if (bs->drv && bs->backing_hd) {
- int ret = bdrv_commit(bs);
- if (ret < 0) {
- return ret;
- }
- }
- }
- return 0;
-}
-
-/**
- * Remove an active request from the tracked requests list
- *
- * This function should be called when a tracked request is completing.
- */
-static void tracked_request_end(BdrvTrackedRequest *req)
-{
- QLIST_REMOVE(req, list);
- qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-/**
- * Add an active request to the tracked requests list
- */
-static void tracked_request_begin(BdrvTrackedRequest *req,
- BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, bool is_write)
-{
- *req = (BdrvTrackedRequest){
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .is_write = is_write,
- .co = qemu_coroutine_self(),
- };
-
- qemu_co_queue_init(&req->wait_queue);
-
- QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-}
-
-/**
- * Round a region to cluster boundaries
- */
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
-{
- BlockDriverInfo bdi;
-
- if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
- *cluster_sector_num = sector_num;
- *cluster_nb_sectors = nb_sectors;
- } else {
- int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
- *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
- *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
- nb_sectors, c);
- }
-}
-
-static bool tracked_request_overlaps(BdrvTrackedRequest *req,
- int64_t sector_num, int nb_sectors) {
- /* aaaa bbbb */
- if (sector_num >= req->sector_num + req->nb_sectors) {
- return false;
- }
- /* bbbb aaaa */
- if (req->sector_num >= sector_num + nb_sectors) {
- return false;
- }
- return true;
-}
-
-static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- BdrvTrackedRequest *req;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
- bool retry;
-
- /* If we touch the same cluster it counts as an overlap. This guarantees
- * that allocating writes will be serialized and not race with each other
- * for the same cluster. For example, in copy-on-read it ensures that the
- * CoR read and write operations are atomic and guest writes cannot
- * interleave between them.
- */
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
-
- do {
- retry = false;
- QLIST_FOREACH(req, &bs->tracked_requests, list) {
- if (tracked_request_overlaps(req, cluster_sector_num,
- cluster_nb_sectors)) {
- /* Hitting this means there was a reentrant request, for
- * example, a block driver issuing nested requests. This must
- * never happen since it means deadlock.
- */
- assert(qemu_coroutine_self() != req->co);
-
- qemu_co_queue_wait(&req->wait_queue);
- retry = true;
- break;
- }
- }
- } while (retry);
-}
-
-/*
- * Return values:
- * 0 - success
- * -EINVAL - backing format specified, but no file
- * -ENOSPC - can't update the backing file because no space is left in the
- * image file header
- * -ENOTSUP - format driver doesn't support changing the backing file
- */
-int bdrv_change_backing_file(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt)
-{
- BlockDriver *drv = bs->drv;
- int ret;
-
- /* Backing file format doesn't make sense without a backing file */
- if (backing_fmt && !backing_file) {
- return -EINVAL;
- }
-
- if (drv->bdrv_change_backing_file != NULL) {
- ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
- } else {
- ret = -ENOTSUP;
- }
-
- if (ret == 0) {
- pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
- pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
- }
- return ret;
-}
-
-/*
- * Finds the image layer in the chain that has 'bs' as its backing file.
- *
- * active is the current topmost image.
- *
- * Returns NULL if bs is not found in active's image chain,
- * or if active == bs.
- */
-BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
- BlockDriverState *bs)
-{
- BlockDriverState *overlay = NULL;
- BlockDriverState *intermediate;
-
- assert(active != NULL);
- assert(bs != NULL);
-
- /* if bs is the same as active, then by definition it has no overlay
- */
- if (active == bs) {
- return NULL;
- }
-
- intermediate = active;
- while (intermediate->backing_hd) {
- if (intermediate->backing_hd == bs) {
- overlay = intermediate;
- break;
- }
- intermediate = intermediate->backing_hd;
- }
-
- return overlay;
-}
-
-typedef struct BlkIntermediateStates {
- BlockDriverState *bs;
- QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
-} BlkIntermediateStates;
-
-
-/*
- * Drops images above 'base' up to and including 'top', and sets the image
- * above 'top' to have base as its backing file.
- *
- * Requires that the overlay to 'top' is opened r/w, so that the backing file
- * information in 'bs' can be properly updated.
- *
- * E.g., this will convert the following chain:
- * bottom <- base <- intermediate <- top <- active
- *
- * to
- *
- * bottom <- base <- active
- *
- * It is allowed for bottom==base, in which case it converts:
- *
- * base <- intermediate <- top <- active
- *
- * to
- *
- * base <- active
- *
- * Error conditions:
- * if active == top, that is considered an error
- *
- */
-int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
- BlockDriverState *base)
-{
- BlockDriverState *intermediate;
- BlockDriverState *base_bs = NULL;
- BlockDriverState *new_top_bs = NULL;
- BlkIntermediateStates *intermediate_state, *next;
- int ret = -EIO;
-
- QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
- QSIMPLEQ_INIT(&states_to_delete);
-
- if (!top->drv || !base->drv) {
- goto exit;
- }
-
- new_top_bs = bdrv_find_overlay(active, top);
-
- if (new_top_bs == NULL) {
- /* we could not find the image above 'top', this is an error */
- goto exit;
- }
-
- /* special case of new_top_bs->backing_hd already pointing to base - nothing
- * to do, no intermediate images */
- if (new_top_bs->backing_hd == base) {
- ret = 0;
- goto exit;
- }
-
- intermediate = top;
-
- /* now we will go down through the list, and add each BDS we find
- * into our deletion queue, until we hit the 'base'
- */
- while (intermediate) {
- intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
- intermediate_state->bs = intermediate;
- QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
-
- if (intermediate->backing_hd == base) {
- base_bs = intermediate->backing_hd;
- break;
- }
- intermediate = intermediate->backing_hd;
- }
- if (base_bs == NULL) {
- /* something went wrong, we did not end at the base. safely
- * unravel everything, and exit with error */
- goto exit;
- }
-
- /* success - we can delete the intermediate states, and link top->base */
- ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
- base_bs->drv ? base_bs->drv->format_name : "");
- if (ret) {
- goto exit;
- }
- new_top_bs->backing_hd = base_bs;
-
-
- QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
- /* so that bdrv_close() does not recursively close the chain */
- intermediate_state->bs->backing_hd = NULL;
- bdrv_delete(intermediate_state->bs);
- }
- ret = 0;
-
-exit:
- QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
- g_free(intermediate_state);
- }
- return ret;
-}
-
-
-static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
- size_t size)
-{
- int64_t len;
-
- if (!bdrv_is_inserted(bs))
- return -ENOMEDIUM;
-
- if (bs->growable)
- return 0;
-
- len = bdrv_getlength(bs);
-
- if (offset < 0)
- return -EIO;
-
- if ((offset > len) || (len - offset < size))
- return -EIO;
-
- return 0;
-}
-
-static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
-{
- return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE);
-}
-
-typedef struct RwCo {
- BlockDriverState *bs;
- int64_t sector_num;
- int nb_sectors;
- QEMUIOVector *qiov;
- bool is_write;
- int ret;
-} RwCo;
-
-static void coroutine_fn bdrv_rw_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- if (!rwco->is_write) {
- rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
- } else {
- rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
- }
-}
-
-/*
- * Process a vectored synchronous request using coroutines
- */
-static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, bool is_write)
-{
- Coroutine *co;
- RwCo rwco = {
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
- .qiov = qiov,
- .is_write = is_write,
- .ret = NOT_DONE,
- };
- assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
-
- /**
- * In sync call context, when the vcpu is blocked, this throttling timer
- * will not fire; so the I/O throttling function has to be disabled here
- * if it has been enabled.
- */
- if (bs->io_limits_enabled) {
- fprintf(stderr, "Disabling I/O throttling on '%s' due "
- "to synchronous I/O.\n", bdrv_get_device_name(bs));
- bdrv_io_limits_disable(bs);
- }
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_rw_co_entry(&rwco);
- } else {
- co = qemu_coroutine_create(bdrv_rw_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
- }
- }
- return rwco.ret;
-}
-
-/*
- * Process a synchronous request using coroutines
- */
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors, bool is_write)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *)buf,
- .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
- };
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
-}
-
-/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
-}
-
-/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- bool enabled;
- int ret;
-
- enabled = bs->io_limits_enabled;
- bs->io_limits_enabled = false;
- ret = bdrv_read(bs, 0, buf, 1);
- bs->io_limits_enabled = enabled;
- return ret;
-}
-
-/* Return < 0 if error. Important errors are:
- -EIO generic I/O error (may happen for all errors)
- -ENOMEDIUM No media inserted.
- -EINVAL Invalid sector number or nb_sectors
- -EACCES Trying to write a read-only device
-*/
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
-}
-
-int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
-{
- return bdrv_rwv_co(bs, sector_num, qiov, true);
-}
-
-int bdrv_pread(BlockDriverState *bs, int64_t offset,
- void *buf, int count1)
-{
- uint8_t tmp_buf[BDRV_SECTOR_SIZE];
- int len, nb_sectors, count;
- int64_t sector_num;
- int ret;
-
- count = count1;
- /* first read to align to sector start */
- len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
- if (len > count)
- len = count;
- sector_num = offset >> BDRV_SECTOR_BITS;
- if (len > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
- count -= len;
- if (count == 0)
- return count1;
- sector_num++;
- buf += len;
- }
-
- /* read the sectors "in place" */
- nb_sectors = count >> BDRV_SECTOR_BITS;
- if (nb_sectors > 0) {
- if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
- return ret;
- sector_num += nb_sectors;
- len = nb_sectors << BDRV_SECTOR_BITS;
- buf += len;
- count -= len;
- }
-
- /* add data from the last sector */
- if (count > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- memcpy(buf, tmp_buf, count);
- }
- return count1;
-}
-
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
-{
- uint8_t tmp_buf[BDRV_SECTOR_SIZE];
- int len, nb_sectors, count;
- int64_t sector_num;
- int ret;
-
- count = qiov->size;
-
- /* first write to align to sector start */
- len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
- if (len > count)
- len = count;
- sector_num = offset >> BDRV_SECTOR_BITS;
- if (len > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
- len);
- if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- count -= len;
- if (count == 0)
- return qiov->size;
- sector_num++;
- }
-
- /* write the sectors "in place" */
- nb_sectors = count >> BDRV_SECTOR_BITS;
- if (nb_sectors > 0) {
- QEMUIOVector qiov_inplace;
-
- qemu_iovec_init(&qiov_inplace, qiov->niov);
- qemu_iovec_concat(&qiov_inplace, qiov, len,
- nb_sectors << BDRV_SECTOR_BITS);
- ret = bdrv_writev(bs, sector_num, &qiov_inplace);
- qemu_iovec_destroy(&qiov_inplace);
- if (ret < 0) {
- return ret;
- }
-
- sector_num += nb_sectors;
- len = nb_sectors << BDRV_SECTOR_BITS;
- count -= len;
- }
-
- /* add data from the last sector */
- if (count > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
- if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- }
- return qiov->size;
-}
-
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int count1)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = count1,
- };
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_pwritev(bs, offset, &qiov);
-}
-
-/*
- * Writes to the file and ensures that no writes are reordered across this
- * request (acts as a barrier)
- *
- * Returns 0 on success, -errno in error cases.
- */
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
- const void *buf, int count)
-{
- int ret;
-
- ret = bdrv_pwrite(bs, offset, buf, count);
- if (ret < 0) {
- return ret;
- }
-
- /* No flush needed for cache modes that already do it */
- if (bs->enable_write_cache) {
- bdrv_flush(bs);
- }
-
- return 0;
-}
-
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- /* Perform I/O through a temporary buffer so that users who scribble over
- * their read buffer while the operation is in progress do not end up
- * modifying the image file. This is critical for zero-copy guest I/O
- * where anything might happen inside guest memory.
- */
- void *bounce_buffer;
-
- BlockDriver *drv = bs->drv;
- struct iovec iov;
- QEMUIOVector bounce_qiov;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
- size_t skip_bytes;
- int ret;
-
- /* Cover entire cluster so no additional backing file I/O is required when
- * allocating cluster in the image file.
- */
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
-
- trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
- cluster_sector_num, cluster_nb_sectors);
-
- iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
- ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
- if (ret < 0) {
- goto err;
- }
-
- if (drv->bdrv_co_write_zeroes &&
- buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
- cluster_nb_sectors);
- } else {
- /* This does not change the data on the disk, it is not necessary
- * to flush even in cache=writethrough mode.
- */
- ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
- }
-
- if (ret < 0) {
- /* It might be okay to ignore write errors for guest requests. If this
- * is a deliberate copy-on-read then we don't want to ignore the error.
- * Simply report it in all cases.
- */
- goto err;
- }
-
- skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
- nb_sectors * BDRV_SECTOR_SIZE);
-
-err:
- qemu_vfree(bounce_buffer);
- return ret;
-}
-
-/*
- * Handle a read request in coroutine context
- */
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
-{
- BlockDriver *drv = bs->drv;
- BdrvTrackedRequest req;
- int ret;
-
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- }
-
- /* throttling disk read I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, false, nb_sectors);
- }
-
- if (bs->copy_on_read) {
- flags |= BDRV_REQ_COPY_ON_READ;
- }
- if (flags & BDRV_REQ_COPY_ON_READ) {
- bs->copy_on_read_in_flight++;
- }
-
- if (bs->copy_on_read_in_flight) {
- wait_for_overlapping_requests(bs, sector_num, nb_sectors);
- }
-
- tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
-
- if (flags & BDRV_REQ_COPY_ON_READ) {
- int pnum;
-
- ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
- if (ret < 0) {
- goto out;
- }
-
- if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
- goto out;
- }
- }
-
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
-
-out:
- tracked_request_end(&req);
-
- if (flags & BDRV_REQ_COPY_ON_READ) {
- bs->copy_on_read_in_flight--;
- }
-
- return ret;
-}
-
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_readv(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
- BDRV_REQ_COPY_ON_READ);
-}
-
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- BlockDriver *drv = bs->drv;
- QEMUIOVector qiov;
- struct iovec iov;
- int ret;
-
- /* TODO Emulate only part of misaligned requests instead of letting block
- * drivers return -ENOTSUP and emulate everything */
-
- /* First try the efficient write zeroes operation */
- if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
- if (ret != -ENOTSUP) {
- return ret;
- }
- }
-
- /* Fall back to bounce buffer if write zeroes is unsupported */
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
- memset(iov.iov_base, 0, iov.iov_len);
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
-
- qemu_vfree(iov.iov_base);
- return ret;
-}
-
-/*
- * Handle a write request in coroutine context
- */
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
-{
- BlockDriver *drv = bs->drv;
- BdrvTrackedRequest req;
- int ret;
-
- if (!bs->drv) {
- return -ENOMEDIUM;
- }
- if (bs->read_only) {
- return -EACCES;
- }
- if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- }
-
- /* throttling disk write I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, true, nb_sectors);
- }
-
- if (bs->copy_on_read_in_flight) {
- wait_for_overlapping_requests(bs, sector_num, nb_sectors);
- }
-
- tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
-
- ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
-
- if (ret < 0) {
- /* Do nothing, write notifier decided to fail this request */
- } else if (flags & BDRV_REQ_ZERO_WRITE) {
- ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
- } else {
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
- }
-
- if (ret == 0 && !bs->enable_write_cache) {
- ret = bdrv_co_flush(bs);
- }
-
- if (bs->dirty_bitmap) {
- bdrv_set_dirty(bs, sector_num, nb_sectors);
- }
-
- if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
- bs->wr_highest_sector = sector_num + nb_sectors - 1;
- }
-
- tracked_request_end(&req);
-
- return ret;
-}
-
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_writev(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE);
-}
-
-/**
- * Truncate file to 'offset' bytes (needed only for file protocols)
- */
-int bdrv_truncate(BlockDriverState *bs, int64_t offset)
-{
- BlockDriver *drv = bs->drv;
- int ret;
- if (!drv)
- return -ENOMEDIUM;
- if (!drv->bdrv_truncate)
- return -ENOTSUP;
- if (bs->read_only)
- return -EACCES;
- if (bdrv_in_use(bs))
- return -EBUSY;
- ret = drv->bdrv_truncate(bs, offset);
- if (ret == 0) {
- ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
- bdrv_dev_resize_cb(bs);
- }
- return ret;
-}
-
-/**
- * Length of a allocated file in bytes. Sparse files are counted by actual
- * allocated space. Return < 0 if error or unknown.
- */
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_get_allocated_file_size) {
- return drv->bdrv_get_allocated_file_size(bs);
- }
- if (bs->file) {
- return bdrv_get_allocated_file_size(bs->file);
- }
- return -ENOTSUP;
-}
-
-/**
- * Length of a file in bytes. Return < 0 if error or unknown.
- */
-int64_t bdrv_getlength(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
-
- if (bs->growable || bdrv_dev_has_removable_media(bs)) {
- if (drv->bdrv_getlength) {
- return drv->bdrv_getlength(bs);
- }
- }
- return bs->total_sectors * BDRV_SECTOR_SIZE;
-}
-
-/* return 0 as number of sectors if no device present or error */
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
-{
- int64_t length;
- length = bdrv_getlength(bs);
- if (length < 0)
- length = 0;
- else
- length = length >> BDRV_SECTOR_BITS;
- *nb_sectors_ptr = length;
-}
-
-/* throttling disk io limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
- BlockIOLimit *io_limits)
-{
- bs->io_limits = *io_limits;
- bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
-}
-
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error)
-{
- bs->on_read_error = on_read_error;
- bs->on_write_error = on_write_error;
-}
-
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
-{
- return is_read ? bs->on_read_error : bs->on_write_error;
-}
-
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
-{
- BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
-
- switch (on_err) {
- case BLOCKDEV_ON_ERROR_ENOSPC:
- return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
- case BLOCKDEV_ON_ERROR_STOP:
- return BDRV_ACTION_STOP;
- case BLOCKDEV_ON_ERROR_REPORT:
- return BDRV_ACTION_REPORT;
- case BLOCKDEV_ON_ERROR_IGNORE:
- return BDRV_ACTION_IGNORE;
- default:
- abort();
- }
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
- bool is_read, int error)
-{
- assert(error >= 0);
- bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
- if (action == BDRV_ACTION_STOP) {
- vm_stop(RUN_STATE_IO_ERROR);
- bdrv_iostatus_set_err(bs, error);
- }
-}
-
-int bdrv_is_read_only(BlockDriverState *bs)
-{
- return bs->read_only;
-}
-
-int bdrv_is_sg(BlockDriverState *bs)
-{
- return bs->sg;
-}
-
-int bdrv_enable_write_cache(BlockDriverState *bs)
-{
- return bs->enable_write_cache;
-}
-
-void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
-{
- bs->enable_write_cache = wce;
-
- /* so a reopen() will preserve wce */
- if (wce) {
- bs->open_flags |= BDRV_O_CACHE_WB;
- } else {
- bs->open_flags &= ~BDRV_O_CACHE_WB;
- }
-}
-
-int bdrv_is_encrypted(BlockDriverState *bs)
-{
- if (bs->backing_hd && bs->backing_hd->encrypted)
- return 1;
- return bs->encrypted;
-}
-
-int bdrv_key_required(BlockDriverState *bs)
-{
- BlockDriverState *backing_hd = bs->backing_hd;
-
- if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
- return 1;
- return (bs->encrypted && !bs->valid_key);
-}
-
-int bdrv_set_key(BlockDriverState *bs, const char *key)
-{
- int ret;
- if (bs->backing_hd && bs->backing_hd->encrypted) {
- ret = bdrv_set_key(bs->backing_hd, key);
- if (ret < 0)
- return ret;
- if (!bs->encrypted)
- return 0;
- }
- if (!bs->encrypted) {
- return -EINVAL;
- } else if (!bs->drv || !bs->drv->bdrv_set_key) {
- return -ENOMEDIUM;
- }
- ret = bs->drv->bdrv_set_key(bs, key);
- if (ret < 0) {
- bs->valid_key = 0;
- } else if (!bs->valid_key) {
- bs->valid_key = 1;
- /* call the change callback now, we skipped it on open */
- bdrv_dev_change_media_cb(bs, true);
- }
- return ret;
-}
-
-const char *bdrv_get_format_name(BlockDriverState *bs)
-{
- return bs->drv ? bs->drv->format_name : NULL;
-}
-
-void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
- void *opaque)
-{
- BlockDriver *drv;
-
- QLIST_FOREACH(drv, &bdrv_drivers, list) {
- it(opaque, drv->format_name);
- }
-}
-
-BlockDriverState *bdrv_find(const char *name)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- if (!strcmp(name, bs->device_name)) {
- return bs;
- }
- }
- return NULL;
-}
-
-BlockDriverState *bdrv_next(BlockDriverState *bs)
-{
- if (!bs) {
- return QTAILQ_FIRST(&bdrv_states);
- }
- return QTAILQ_NEXT(bs, list);
-}
-
-void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- it(opaque, bs);
- }
-}
-
-const char *bdrv_get_device_name(BlockDriverState *bs)
-{
- return bs->device_name;
-}
-
-int bdrv_get_flags(BlockDriverState *bs)
-{
- return bs->open_flags;
-}
-
-int bdrv_flush_all(void)
-{
- BlockDriverState *bs;
- int result = 0;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- int ret = bdrv_flush(bs);
- if (ret < 0 && !result) {
- result = ret;
- }
- }
-
- return result;
-}
-
-int bdrv_has_zero_init_1(BlockDriverState *bs)
-{
- return 1;
-}
-
-int bdrv_has_zero_init(BlockDriverState *bs)
-{
- assert(bs->drv);
-
- if (bs->drv->bdrv_has_zero_init) {
- return bs->drv->bdrv_has_zero_init(bs);
- }
-
- /* safe default */
- return 0;
-}
-
-typedef struct BdrvCoIsAllocatedData {
- BlockDriverState *bs;
- BlockDriverState *base;
- int64_t sector_num;
- int nb_sectors;
- int *pnum;
- int ret;
- bool done;
-} BdrvCoIsAllocatedData;
-
-/*
- * Returns true iff the specified sector is present in the disk image. Drivers
- * not implementing the functionality are assumed to not support backing files,
- * hence all their sectors are reported as allocated.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- */
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- int64_t n;
-
- if (sector_num >= bs->total_sectors) {
- *pnum = 0;
- return 0;
- }
-
- n = bs->total_sectors - sector_num;
- if (n < nb_sectors) {
- nb_sectors = n;
- }
-
- if (!bs->drv->bdrv_co_is_allocated) {
- *pnum = nb_sectors;
- return 1;
- }
-
- return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
-}
-
-/* Coroutine wrapper for bdrv_is_allocated() */
-static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
-{
- BdrvCoIsAllocatedData *data = opaque;
- BlockDriverState *bs = data->bs;
-
- data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
- data->pnum);
- data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_is_allocated().
- *
- * See bdrv_co_is_allocated() for details.
- */
-int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum)
-{
- Coroutine *co;
- BdrvCoIsAllocatedData data = {
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .pnum = pnum,
- .done = false,
- };
-
- co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
- qemu_coroutine_enter(co, &data);
- while (!data.done) {
- qemu_aio_wait();
- }
- return data.ret;
-}
-
-/*
- * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
- *
- * Return true if the given sector is allocated in any image between
- * BASE and TOP (inclusive). BASE can be NULL to check if the given
- * sector is allocated in any image of the chain. Return false otherwise.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- */
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- BlockDriverState *intermediate;
- int ret, n = nb_sectors;
-
- intermediate = top;
- while (intermediate && intermediate != base) {
- int pnum_inter;
- ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
- &pnum_inter);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- *pnum = pnum_inter;
- return 1;
- }
-
- /*
- * [sector_num, nb_sectors] is unallocated on top but intermediate
- * might have
- *
- * [sector_num+x, nr_sectors] allocated.
- */
- if (n > pnum_inter &&
- (intermediate == top ||
- sector_num + pnum_inter < intermediate->total_sectors)) {
- n = pnum_inter;
- }
-
- intermediate = intermediate->backing_hd;
- }
-
- *pnum = n;
- return 0;
-}
-
-/* Coroutine wrapper for bdrv_is_allocated_above() */
-static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque)
-{
- BdrvCoIsAllocatedData *data = opaque;
- BlockDriverState *top = data->bs;
- BlockDriverState *base = data->base;
-
- data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num,
- data->nb_sectors, data->pnum);
- data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_is_allocated_above().
- *
- * See bdrv_co_is_allocated_above() for details.
- */
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- Coroutine *co;
- BdrvCoIsAllocatedData data = {
- .bs = top,
- .base = base,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .pnum = pnum,
- .done = false,
- };
-
- co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry);
- qemu_coroutine_enter(co, &data);
- while (!data.done) {
- qemu_aio_wait();
- }
- return data.ret;
-}
-
-const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
-{
- if (bs->backing_hd && bs->backing_hd->encrypted)
- return bs->backing_file;
- else if (bs->encrypted)
- return bs->filename;
- else
- return NULL;
-}
-
-void bdrv_get_backing_filename(BlockDriverState *bs,
- char *filename, int filename_size)
-{
- pstrcpy(filename, filename_size, bs->backing_file);
-}
-
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (!drv->bdrv_write_compressed)
- return -ENOTSUP;
- if (bdrv_check_request(bs, sector_num, nb_sectors))
- return -EIO;
-
- assert(!bs->dirty_bitmap);
-
- return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
-}
-
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (!drv->bdrv_get_info)
- return -ENOTSUP;
- memset(bdi, 0, sizeof(*bdi));
- return drv->bdrv_get_info(bs, bdi);
-}
-
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = size,
- };
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_writev_vmstate(bs, &qiov, pos);
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
- BlockDriver *drv = bs->drv;
-
- if (!drv) {
- return -ENOMEDIUM;
- } else if (drv->bdrv_save_vmstate) {
- return drv->bdrv_save_vmstate(bs, qiov, pos);
- } else if (bs->file) {
- return bdrv_writev_vmstate(bs->file, qiov, pos);
- }
-
- return -ENOTSUP;
-}
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (drv->bdrv_load_vmstate)
- return drv->bdrv_load_vmstate(bs, buf, pos, size);
- if (bs->file)
- return bdrv_load_vmstate(bs->file, buf, pos, size);
- return -ENOTSUP;
-}
-
-void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
-{
- if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
- return;
- }
-
- bs->drv->bdrv_debug_event(bs, event);
-}
-
-int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
- const char *tag)
-{
- while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
- bs = bs->file;
- }
-
- if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
- return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
- }
-
- return -ENOTSUP;
-}
-
-int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
-{
- while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
- bs = bs->file;
- }
-
- if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
- return bs->drv->bdrv_debug_resume(bs, tag);
- }
-
- return -ENOTSUP;
-}
-
-bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
-{
- while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
- bs = bs->file;
- }
-
- if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
- return bs->drv->bdrv_debug_is_suspended(bs, tag);
- }
-
- return false;
-}
-
-int bdrv_is_snapshot(BlockDriverState *bs)
-{
- return !!(bs->open_flags & BDRV_O_SNAPSHOT);
-}
-
-/* backing_file can either be relative, or absolute, or a protocol. If it is
- * relative, it must be relative to the chain. So, passing in bs->filename
- * from a BDS as backing_file should not be done, as that may be relative to
- * the CWD rather than the chain. */
-BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
- const char *backing_file)
-{
- char *filename_full = NULL;
- char *backing_file_full = NULL;
- char *filename_tmp = NULL;
- int is_protocol = 0;
- BlockDriverState *curr_bs = NULL;
- BlockDriverState *retval = NULL;
-
- if (!bs || !bs->drv || !backing_file) {
- return NULL;
- }
-
- filename_full = g_malloc(PATH_MAX);
- backing_file_full = g_malloc(PATH_MAX);
- filename_tmp = g_malloc(PATH_MAX);
-
- is_protocol = path_has_protocol(backing_file);
-
- for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
-
- /* If either of the filename paths is actually a protocol, then
- * compare unmodified paths; otherwise make paths relative */
- if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
- if (strcmp(backing_file, curr_bs->backing_file) == 0) {
- retval = curr_bs->backing_hd;
- break;
- }
- } else {
- /* If not an absolute filename path, make it relative to the current
- * image's filename path */
- path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
- backing_file);
-
- /* We are going to compare absolute pathnames */
- if (!realpath(filename_tmp, filename_full)) {
- continue;
- }
-
- /* We need to make sure the backing filename we are comparing against
- * is relative to the current image filename (or absolute) */
- path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
- curr_bs->backing_file);
-
- if (!realpath(filename_tmp, backing_file_full)) {
- continue;
- }
-
- if (strcmp(backing_file_full, filename_full) == 0) {
- retval = curr_bs->backing_hd;
- break;
- }
- }
- }
-
- g_free(filename_full);
- g_free(backing_file_full);
- g_free(filename_tmp);
- return retval;
-}
-
-int bdrv_get_backing_file_depth(BlockDriverState *bs)
-{
- if (!bs->drv) {
- return 0;
- }
-
- if (!bs->backing_hd) {
- return 0;
- }
-
- return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
-}
-
-BlockDriverState *bdrv_find_base(BlockDriverState *bs)
-{
- BlockDriverState *curr_bs = NULL;
-
- if (!bs) {
- return NULL;
- }
-
- curr_bs = bs;
-
- while (curr_bs->backing_hd) {
- curr_bs = curr_bs->backing_hd;
- }
- return curr_bs;
-}
-
-/**************************************************************/
-/* async I/Os */
-
-BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
- cb, opaque, false);
-}
-
-BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
- cb, opaque, true);
-}
-
-
-typedef struct MultiwriteCB {
- int error;
- int num_requests;
- int num_callbacks;
- struct {
- BlockDriverCompletionFunc *cb;
- void *opaque;
- QEMUIOVector *free_qiov;
- } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
- int i;
-
- for (i = 0; i < mcb->num_callbacks; i++) {
- mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
- if (mcb->callbacks[i].free_qiov) {
- qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
- }
- g_free(mcb->callbacks[i].free_qiov);
- }
-}
-
-static void multiwrite_cb(void *opaque, int ret)
-{
- MultiwriteCB *mcb = opaque;
-
- trace_multiwrite_cb(mcb, ret);
-
- if (ret < 0 && !mcb->error) {
- mcb->error = ret;
- }
-
- mcb->num_requests--;
- if (mcb->num_requests == 0) {
- multiwrite_user_cb(mcb);
- g_free(mcb);
- }
-}
-
-static int multiwrite_req_compare(const void *a, const void *b)
-{
- const BlockRequest *req1 = a, *req2 = b;
-
- /*
- * Note that we can't simply subtract req2->sector from req1->sector
- * here as that could overflow the return value.
- */
- if (req1->sector > req2->sector) {
- return 1;
- } else if (req1->sector < req2->sector) {
- return -1;
- } else {
- return 0;
- }
-}
-
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs, MultiwriteCB *mcb)
-{
- int i, outidx;
-
- // Sort requests by start sector
- qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
- // Check if adjacent requests touch the same clusters. If so, combine them,
- // filling up gaps with zero sectors.
- outidx = 0;
- for (i = 1; i < num_reqs; i++) {
- int merge = 0;
- int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
- // Handle exactly sequential writes and overlapping writes.
- if (reqs[i].sector <= oldreq_last) {
- merge = 1;
- }
-
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
- merge = 0;
- }
-
- if (merge) {
- size_t size;
- QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
- qemu_iovec_init(qiov,
- reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
- // Add the first request to the merged one. If the requests are
- // overlapping, drop the last sectors of the first request.
- size = (reqs[i].sector - reqs[outidx].sector) << 9;
- qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
- // We should need to add any zeros between the two requests
- assert (reqs[i].sector <= oldreq_last);
-
- // Add the second request
- qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
- reqs[outidx].nb_sectors = qiov->size >> 9;
- reqs[outidx].qiov = qiov;
-
- mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
- } else {
- outidx++;
- reqs[outidx].sector = reqs[i].sector;
- reqs[outidx].nb_sectors = reqs[i].nb_sectors;
- reqs[outidx].qiov = reqs[i].qiov;
- }
- }
-
- return outidx + 1;
-}
-
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
-{
- MultiwriteCB *mcb;
- int i;
-
- /* don't submit writes if we don't have a medium */
- if (bs->drv == NULL) {
- for (i = 0; i < num_reqs; i++) {
- reqs[i].error = -ENOMEDIUM;
- }
- return -1;
- }
-
- if (num_reqs == 0) {
- return 0;
- }
-
- // Create MultiwriteCB structure
- mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
- mcb->num_requests = 0;
- mcb->num_callbacks = num_reqs;
-
- for (i = 0; i < num_reqs; i++) {
- mcb->callbacks[i].cb = reqs[i].cb;
- mcb->callbacks[i].opaque = reqs[i].opaque;
- }
-
- // Check for mergable requests
- num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
-
- trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
-
- /* Run the aio requests. */
- mcb->num_requests = num_reqs;
- for (i = 0; i < num_reqs; i++) {
- bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
- reqs[i].nb_sectors, multiwrite_cb, mcb);
- }
-
- return 0;
-}
-
-void bdrv_aio_cancel(BlockDriverAIOCB *acb)
-{
- acb->aiocb_info->cancel(acb);
-}
-
-/* block I/O throttling */
-static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, double elapsed_time, uint64_t *wait)
-{
- uint64_t bps_limit = 0;
- uint64_t extension;
- double bytes_limit, bytes_base, bytes_res;
- double slice_time, wait_time;
-
- if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
- bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
- } else if (bs->io_limits.bps[is_write]) {
- bps_limit = bs->io_limits.bps[is_write];
- } else {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- slice_time = bs->slice_end - bs->slice_start;
- slice_time /= (NANOSECONDS_PER_SECOND);
- bytes_limit = bps_limit * slice_time;
- bytes_base = bs->slice_submitted.bytes[is_write];
- if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
- bytes_base += bs->slice_submitted.bytes[!is_write];
- }
-
- /* bytes_base: the bytes of data which have been read/written; and
- * it is obtained from the history statistic info.
- * bytes_res: the remaining bytes of data which need to be read/written.
- * (bytes_base + bytes_res) / bps_limit: used to calcuate
- * the total time for completing reading/writting all data.
- */
- bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
-
- if (bytes_base + bytes_res <= bytes_limit) {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- /* Calc approx time to dispatch */
- wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
-
- /* When the I/O rate at runtime exceeds the limits,
- * bs->slice_end need to be extended in order that the current statistic
- * info can be kept until the timer fire, so it is increased and tuned
- * based on the result of experiment.
- */
- extension = wait_time * NANOSECONDS_PER_SECOND;
- extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
- BLOCK_IO_SLICE_TIME;
- bs->slice_end += extension;
- if (wait) {
- *wait = wait_time * NANOSECONDS_PER_SECOND;
- }
-
- return true;
-}
-
-static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
- double elapsed_time, uint64_t *wait)
-{
- uint64_t iops_limit = 0;
- double ios_limit, ios_base;
- double slice_time, wait_time;
-
- if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
- iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
- } else if (bs->io_limits.iops[is_write]) {
- iops_limit = bs->io_limits.iops[is_write];
- } else {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- slice_time = bs->slice_end - bs->slice_start;
- slice_time /= (NANOSECONDS_PER_SECOND);
- ios_limit = iops_limit * slice_time;
- ios_base = bs->slice_submitted.ios[is_write];
- if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
- ios_base += bs->slice_submitted.ios[!is_write];
- }
-
- if (ios_base + 1 <= ios_limit) {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- /* Calc approx time to dispatch, in seconds */
- wait_time = (ios_base + 1) / iops_limit;
- if (wait_time > elapsed_time) {
- wait_time = wait_time - elapsed_time;
- } else {
- wait_time = 0;
- }
-
- /* Exceeded current slice, extend it by another slice time */
- bs->slice_end += BLOCK_IO_SLICE_TIME;
- if (wait) {
- *wait = wait_time * NANOSECONDS_PER_SECOND;
- }
-
- return true;
-}
-
-static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, int64_t *wait)
-{
- int64_t now, max_wait;
- uint64_t bps_wait = 0, iops_wait = 0;
- double elapsed_time;
- int bps_ret, iops_ret;
-
- now = qemu_get_clock_ns(vm_clock);
- if (now > bs->slice_end) {
- bs->slice_start = now;
- bs->slice_end = now + BLOCK_IO_SLICE_TIME;
- memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
- }
-
- elapsed_time = now - bs->slice_start;
- elapsed_time /= (NANOSECONDS_PER_SECOND);
-
- bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
- is_write, elapsed_time, &bps_wait);
- iops_ret = bdrv_exceed_iops_limits(bs, is_write,
- elapsed_time, &iops_wait);
- if (bps_ret || iops_ret) {
- max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
- if (wait) {
- *wait = max_wait;
- }
-
- now = qemu_get_clock_ns(vm_clock);
- if (bs->slice_end < now + max_wait) {
- bs->slice_end = now + max_wait;
- }
-
- return true;
- }
-
- if (wait) {
- *wait = 0;
- }
-
- bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
- BDRV_SECTOR_SIZE;
- bs->slice_submitted.ios[is_write]++;
-
- return false;
-}
-
-/**************************************************************/
-/* async block device emulation */
-
-typedef struct BlockDriverAIOCBSync {
- BlockDriverAIOCB common;
- QEMUBH *bh;
- int ret;
- /* vector translation state */
- QEMUIOVector *qiov;
- uint8_t *bounce;
- int is_write;
-} BlockDriverAIOCBSync;
-
-static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
-{
- BlockDriverAIOCBSync *acb =
- container_of(blockacb, BlockDriverAIOCBSync, common);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_release(acb);
-}
-
-static const AIOCBInfo bdrv_em_aiocb_info = {
- .aiocb_size = sizeof(BlockDriverAIOCBSync),
- .cancel = bdrv_aio_cancel_em,
-};
-
-static void bdrv_aio_bh_cb(void *opaque)
-{
- BlockDriverAIOCBSync *acb = opaque;
-
- if (!acb->is_write)
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
- qemu_vfree(acb->bounce);
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_release(acb);
-}
-
-static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque,
- int is_write)
-
-{
- BlockDriverAIOCBSync *acb;
-
- acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
- acb->is_write = is_write;
- acb->qiov = qiov;
- acb->bounce = qemu_blockalign(bs, qiov->size);
- acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
-
- if (is_write) {
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
- acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
- } else {
- acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
- }
-
- qemu_bh_schedule(acb->bh);
-
- return &acb->common;
-}
-
-static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-}
-
-
-typedef struct BlockDriverAIOCBCoroutine {
- BlockDriverAIOCB common;
- BlockRequest req;
- bool is_write;
- bool *done;
- QEMUBH* bh;
-} BlockDriverAIOCBCoroutine;
-
-static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
-{
- BlockDriverAIOCBCoroutine *acb =
- container_of(blockacb, BlockDriverAIOCBCoroutine, common);
- bool done = false;
-
- acb->done = &done;
- while (!done) {
- qemu_aio_wait();
- }
-}
-
-static const AIOCBInfo bdrv_em_co_aiocb_info = {
- .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
- .cancel = bdrv_aio_co_cancel_em,
-};
-
-static void bdrv_co_em_bh(void *opaque)
-{
- BlockDriverAIOCBCoroutine *acb = opaque;
-
- acb->common.cb(acb->common.opaque, acb->req.error);
-
- if (acb->done) {
- *acb->done = true;
- }
-
- qemu_bh_delete(acb->bh);
- qemu_aio_release(acb);
-}
-
-/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
-static void coroutine_fn bdrv_co_do_rw(void *opaque)
-{
- BlockDriverAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- if (!acb->is_write) {
- acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, 0);
- } else {
- acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, 0);
- }
-
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque,
- bool is_write)
-{
- Coroutine *co;
- BlockDriverAIOCBCoroutine *acb;
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
- acb->req.qiov = qiov;
- acb->is_write = is_write;
- acb->done = NULL;
-
- co = qemu_coroutine_create(bdrv_co_do_rw);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
-{
- BlockDriverAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- acb->req.error = bdrv_co_flush(bs);
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_flush(bs, opaque);
-
- Coroutine *co;
- BlockDriverAIOCBCoroutine *acb;
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->done = NULL;
-
- co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
-{
- BlockDriverAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- Coroutine *co;
- BlockDriverAIOCBCoroutine *acb;
-
- trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
- acb->done = NULL;
- co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
-}
-
-void bdrv_init(void)
-{
- module_call_init(MODULE_INIT_BLOCK);
-}
-
-void bdrv_init_with_whitelist(void)
-{
- use_bdrv_whitelist = 1;
- bdrv_init();
-}
-
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BlockDriverAIOCB *acb;
-
- acb = g_slice_alloc(aiocb_info->aiocb_size);
- acb->aiocb_info = aiocb_info;
- acb->bs = bs;
- acb->cb = cb;
- acb->opaque = opaque;
- return acb;
-}
-
-void qemu_aio_release(void *p)
-{
- BlockDriverAIOCB *acb = p;
- g_slice_free1(acb->aiocb_info->aiocb_size, acb);
-}
-
-/**************************************************************/
-/* Coroutine block device emulation */
-
-typedef struct CoroutineIOCompletion {
- Coroutine *coroutine;
- int ret;
-} CoroutineIOCompletion;
-
-static void bdrv_co_io_em_complete(void *opaque, int ret)
-{
- CoroutineIOCompletion *co = opaque;
-
- co->ret = ret;
- qemu_coroutine_enter(co->coroutine, NULL);
-}
-
-static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *iov,
- bool is_write)
-{
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
- BlockDriverAIOCB *acb;
-
- if (is_write) {
- acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- } else {
- acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- }
-
- trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
- if (!acb) {
- return -EIO;
- }
- qemu_coroutine_yield();
-
- return co.ret;
-}
-
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
-}
-
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
-}
-
-static void coroutine_fn bdrv_flush_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- rwco->ret = bdrv_co_flush(rwco->bs);
-}
-
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
-{
- int ret;
-
- if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
- return 0;
- }
-
- /* Write back cached data to the OS even with cache=unsafe */
- BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
- if (bs->drv->bdrv_co_flush_to_os) {
- ret = bs->drv->bdrv_co_flush_to_os(bs);
- if (ret < 0) {
- return ret;
- }
- }
-
- /* But don't actually force it to the disk with cache=unsafe */
- if (bs->open_flags & BDRV_O_NO_FLUSH) {
- goto flush_parent;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
- if (bs->drv->bdrv_co_flush_to_disk) {
- ret = bs->drv->bdrv_co_flush_to_disk(bs);
- } else if (bs->drv->bdrv_aio_flush) {
- BlockDriverAIOCB *acb;
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
-
- acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
- if (acb == NULL) {
- ret = -EIO;
- } else {
- qemu_coroutine_yield();
- ret = co.ret;
- }
- } else {
- /*
- * Some block drivers always operate in either writethrough or unsafe
- * mode and don't support bdrv_flush therefore. Usually qemu doesn't
- * know how the server works (because the behaviour is hardcoded or
- * depends on server-side configuration), so we can't ensure that
- * everything is safe on disk. Returning an error doesn't work because
- * that would break guests even if the server operates in writethrough
- * mode.
- *
- * Let's hope the user knows what he's doing.
- */
- ret = 0;
- }
- if (ret < 0) {
- return ret;
- }
-
- /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
- * in the case of cache=unsafe, so there are no useless flushes.
- */
-flush_parent:
- return bdrv_co_flush(bs->file);
-}
-
-void bdrv_invalidate_cache(BlockDriverState *bs)
-{
- if (bs->drv && bs->drv->bdrv_invalidate_cache) {
- bs->drv->bdrv_invalidate_cache(bs);
- }
-}
-
-void bdrv_invalidate_cache_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- bdrv_invalidate_cache(bs);
- }
-}
-
-void bdrv_clear_incoming_migration_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
- }
-}
-
-int bdrv_flush(BlockDriverState *bs)
-{
- Coroutine *co;
- RwCo rwco = {
- .bs = bs,
- .ret = NOT_DONE,
- };
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_flush_co_entry(&rwco);
- } else {
- co = qemu_coroutine_create(bdrv_flush_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
- }
- }
-
- return rwco.ret;
-}
-
-static void coroutine_fn bdrv_discard_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
-}
-
-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
-{
- if (!bs->drv) {
- return -ENOMEDIUM;
- } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- } else if (bs->read_only) {
- return -EROFS;
- }
-
- if (bs->dirty_bitmap) {
- bdrv_reset_dirty(bs, sector_num, nb_sectors);
- }
-
- /* Do nothing if disabled. */
- if (!(bs->open_flags & BDRV_O_UNMAP)) {
- return 0;
- }
-
- if (bs->drv->bdrv_co_discard) {
- return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
- } else if (bs->drv->bdrv_aio_discard) {
- BlockDriverAIOCB *acb;
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
-
- acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
- bdrv_co_io_em_complete, &co);
- if (acb == NULL) {
- return -EIO;
- } else {
- qemu_coroutine_yield();
- return co.ret;
- }
- } else {
- return 0;
- }
-}
-
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
-{
- Coroutine *co;
- RwCo rwco = {
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .ret = NOT_DONE,
- };
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_discard_co_entry(&rwco);
- } else {
- co = qemu_coroutine_create(bdrv_discard_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
- }
- }
-
- return rwco.ret;
-}
-
-/**************************************************************/
-/* removable device support */
-
-/**
- * Return TRUE if the media is present
- */
-int bdrv_is_inserted(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
-
- if (!drv)
- return 0;
- if (!drv->bdrv_is_inserted)
- return 1;
- return drv->bdrv_is_inserted(bs);
-}
-
-/**
- * Return whether the media changed since the last call to this
- * function, or -ENOTSUP if we don't know. Most drivers don't know.
- */
-int bdrv_media_changed(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_media_changed) {
- return drv->bdrv_media_changed(bs);
- }
- return -ENOTSUP;
-}
-
-/**
- * If eject_flag is TRUE, eject the media. Otherwise, close the tray
- */
-void bdrv_eject(BlockDriverState *bs, bool eject_flag)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_eject) {
- drv->bdrv_eject(bs, eject_flag);
- }
-
- if (bs->device_name[0] != '\0') {
- bdrv_emit_qmp_eject_event(bs, eject_flag);
- }
-}
-
-/**
- * Lock or unlock the media (if it is locked, the user won't be able
- * to eject it manually).
- */
-void bdrv_lock_medium(BlockDriverState *bs, bool locked)
-{
- BlockDriver *drv = bs->drv;
-
- trace_bdrv_lock_medium(bs, locked);
-
- if (drv && drv->bdrv_lock_medium) {
- drv->bdrv_lock_medium(bs, locked);
- }
-}
-
-/* needed for generic scsi interface */
-
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_ioctl)
- return drv->bdrv_ioctl(bs, req, buf);
- return -ENOTSUP;
-}
-
-BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_aio_ioctl)
- return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
- return NULL;
-}
-
-void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
-{
- bs->buffer_alignment = align;
-}
-
-void *qemu_blockalign(BlockDriverState *bs, size_t size)
-{
- return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
-}
-
-/*
- * Check if all memory in this vector is sector aligned.
- */
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
-{
- int i;
-
- for (i = 0; i < qiov->niov; i++) {
- if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
- return false;
- }
- }
-
- return true;
-}
-
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
-{
- int64_t bitmap_size;
-
- assert((granularity & (granularity - 1)) == 0);
-
- if (granularity) {
- granularity >>= BDRV_SECTOR_BITS;
- assert(!bs->dirty_bitmap);
- bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
- bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
- } else {
- if (bs->dirty_bitmap) {
- hbitmap_free(bs->dirty_bitmap);
- bs->dirty_bitmap = NULL;
- }
- }
-}
-
-int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
-{
- if (bs->dirty_bitmap) {
- return hbitmap_get(bs->dirty_bitmap, sector);
- } else {
- return 0;
- }
-}
-
-void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
-{
- hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
-}
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
- int nr_sectors)
-{
- hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
- int nr_sectors)
-{
- hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
-}
-
-int64_t bdrv_get_dirty_count(BlockDriverState *bs)
-{
- if (bs->dirty_bitmap) {
- return hbitmap_count(bs->dirty_bitmap);
- } else {
- return 0;
- }
-}
-
-void bdrv_set_in_use(BlockDriverState *bs, int in_use)
-{
- assert(bs->in_use != in_use);
- bs->in_use = in_use;
-}
-
-int bdrv_in_use(BlockDriverState *bs)
-{
- return bs->in_use;
-}
-
-void bdrv_iostatus_enable(BlockDriverState *bs)
-{
- bs->iostatus_enabled = true;
- bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-}
-
-/* The I/O status is only enabled if the drive explicitly
- * enables it _and_ the VM is configured to stop on errors */
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
-{
- return (bs->iostatus_enabled &&
- (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
- bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
- bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
-}
-
-void bdrv_iostatus_disable(BlockDriverState *bs)
-{
- bs->iostatus_enabled = false;
-}
-
-void bdrv_iostatus_reset(BlockDriverState *bs)
-{
- if (bdrv_iostatus_is_enabled(bs)) {
- bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
- if (bs->job) {
- block_job_iostatus_reset(bs->job);
- }
- }
-}
-
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
-{
- assert(bdrv_iostatus_is_enabled(bs));
- if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
- bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
- BLOCK_DEVICE_IO_STATUS_FAILED;
- }
-}
-
-void
-bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
- enum BlockAcctType type)
-{
- assert(type < BDRV_MAX_IOTYPE);
-
- cookie->bytes = bytes;
- cookie->start_time_ns = get_clock();
- cookie->type = type;
-}
-
-void
-bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
-{
- assert(cookie->type < BDRV_MAX_IOTYPE);
-
- bs->nr_bytes[cookie->type] += cookie->bytes;
- bs->nr_ops[cookie->type]++;
- bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
-}
-
-void bdrv_img_create(const char *filename, const char *fmt,
- const char *base_filename, const char *base_fmt,
- char *options, uint64_t img_size, int flags,
- Error **errp, bool quiet)
-{
- QEMUOptionParameter *param = NULL, *create_options = NULL;
- QEMUOptionParameter *backing_fmt, *backing_file, *size;
- BlockDriverState *bs = NULL;
- BlockDriver *drv, *proto_drv;
- BlockDriver *backing_drv = NULL;
- int ret = 0;
-
- /* Find driver and parse its options */
- drv = bdrv_find_format(fmt);
- if (!drv) {
- error_setg(errp, "Unknown file format '%s'", fmt);
- return;
- }
-
- proto_drv = bdrv_find_protocol(filename, true);
- if (!proto_drv) {
- error_setg(errp, "Unknown protocol '%s'", filename);
- return;
- }
-
- create_options = append_option_parameters(create_options,
- drv->create_options);
- create_options = append_option_parameters(create_options,
- proto_drv->create_options);
-
- /* Create parameter list with default values */
- param = parse_option_parameters("", create_options, param);
-
- set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
-
- /* Parse -o options */
- if (options) {
- param = parse_option_parameters(options, create_options, param);
- if (param == NULL) {
- error_setg(errp, "Invalid options for file format '%s'.", fmt);
- goto out;
- }
- }
-
- if (base_filename) {
- if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
- base_filename)) {
- error_setg(errp, "Backing file not supported for file format '%s'",
- fmt);
- goto out;
- }
- }
-
- if (base_fmt) {
- if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
- error_setg(errp, "Backing file format not supported for file "
- "format '%s'", fmt);
- goto out;
- }
- }
-
- backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
- if (backing_file && backing_file->value.s) {
- if (!strcmp(filename, backing_file->value.s)) {
- error_setg(errp, "Error: Trying to create an image with the "
- "same filename as the backing file");
- goto out;
- }
- }
-
- backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
- if (backing_fmt && backing_fmt->value.s) {
- backing_drv = bdrv_find_format(backing_fmt->value.s);
- if (!backing_drv) {
- error_setg(errp, "Unknown backing file format '%s'",
- backing_fmt->value.s);
- goto out;
- }
- }
-
- // The size for the image must always be specified, with one exception:
- // If we are using a backing file, we can obtain the size from there
- size = get_option_parameter(param, BLOCK_OPT_SIZE);
- if (size && size->value.n == -1) {
- if (backing_file && backing_file->value.s) {
- uint64_t size;
- char buf[32];
- int back_flags;
-
- /* backing files always opened read-only */
- back_flags =
- flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
- bs = bdrv_new("");
-
- ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags,
- backing_drv);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "Could not open '%s'",
- backing_file->value.s);
- goto out;
- }
- bdrv_get_geometry(bs, &size);
- size *= 512;
-
- snprintf(buf, sizeof(buf), "%" PRId64, size);
- set_option_parameter(param, BLOCK_OPT_SIZE, buf);
- } else {
- error_setg(errp, "Image creation needs a size parameter");
- goto out;
- }
- }
-
- if (!quiet) {
- printf("Formatting '%s', fmt=%s ", filename, fmt);
- print_option_parameters(param);
- puts("");
- }
- ret = bdrv_create(drv, filename, param);
- if (ret < 0) {
- if (ret == -ENOTSUP) {
- error_setg(errp,"Formatting or formatting option not supported for "
- "file format '%s'", fmt);
- } else if (ret == -EFBIG) {
- const char *cluster_size_hint = "";
- if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
- cluster_size_hint = " (try using a larger cluster size)";
- }
- error_setg(errp, "The image size is too large for file format '%s'%s",
- fmt, cluster_size_hint);
- } else {
- error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
- strerror(-ret));
- }
- }
-
-out:
- free_option_parameters(create_options);
- free_option_parameters(param);
-
- if (bs) {
- bdrv_delete(bs);
- }
-}
-
-AioContext *bdrv_get_aio_context(BlockDriverState *bs)
-{
- /* Currently BlockDriverState always uses the main loop AioContext */
- return qemu_get_aio_context();
-}
-
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
- NotifierWithReturn *notifier)
-{
- notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
diff --git a/contrib/qemu/block/qcow.c b/contrib/qemu/block/qcow.c
deleted file mode 100644
index 5239bd68f1c..00000000000
--- a/contrib/qemu/block/qcow.c
+++ /dev/null
@@ -1,914 +0,0 @@
-/*
- * Block driver for the QCOW format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "qemu/aes.h"
-#include "migration/migration.h"
-
-/**************************************************************/
-/* QEMU COW block driver with compression and encryption support */
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 1
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES 1
-
-#define QCOW_OFLAG_COMPRESSED (1LL << 63)
-
-typedef struct QCowHeader {
- uint32_t magic;
- uint32_t version;
- uint64_t backing_file_offset;
- uint32_t backing_file_size;
- uint32_t mtime;
- uint64_t size; /* in bytes */
- uint8_t cluster_bits;
- uint8_t l2_bits;
- uint32_t crypt_method;
- uint64_t l1_table_offset;
-} QCowHeader;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct BDRVQcowState {
- int cluster_bits;
- int cluster_size;
- int cluster_sectors;
- int l2_bits;
- int l2_size;
- int l1_size;
- uint64_t cluster_offset_mask;
- uint64_t l1_table_offset;
- uint64_t *l1_table;
- uint64_t *l2_cache;
- uint64_t l2_cache_offsets[L2_CACHE_SIZE];
- uint32_t l2_cache_counts[L2_CACHE_SIZE];
- uint8_t *cluster_cache;
- uint8_t *cluster_data;
- uint64_t cluster_cache_offset;
- uint32_t crypt_method; /* current crypt method, 0 if no key yet */
- uint32_t crypt_method_header;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- CoMutex lock;
- Error *migration_blocker;
-} BDRVQcowState;
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-
-static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
- const QCowHeader *cow_header = (const void *)buf;
-
- if (buf_size >= sizeof(QCowHeader) &&
- be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
- be32_to_cpu(cow_header->version) == QCOW_VERSION)
- return 100;
- else
- return 0;
-}
-
-static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQcowState *s = bs->opaque;
- int len, i, shift, ret;
- QCowHeader header;
-
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
- if (ret < 0) {
- goto fail;
- }
- be32_to_cpus(&header.magic);
- be32_to_cpus(&header.version);
- be64_to_cpus(&header.backing_file_offset);
- be32_to_cpus(&header.backing_file_size);
- be32_to_cpus(&header.mtime);
- be64_to_cpus(&header.size);
- be32_to_cpus(&header.crypt_method);
- be64_to_cpus(&header.l1_table_offset);
-
- if (header.magic != QCOW_MAGIC) {
- ret = -EMEDIUMTYPE;
- goto fail;
- }
- if (header.version != QCOW_VERSION) {
- char version[64];
- snprintf(version, sizeof(version), "QCOW version %d", header.version);
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "qcow", version);
- ret = -ENOTSUP;
- goto fail;
- }
-
- if (header.size <= 1 || header.cluster_bits < 9) {
- ret = -EINVAL;
- goto fail;
- }
- if (header.crypt_method > QCOW_CRYPT_AES) {
- ret = -EINVAL;
- goto fail;
- }
- s->crypt_method_header = header.crypt_method;
- if (s->crypt_method_header) {
- bs->encrypted = 1;
- }
- s->cluster_bits = header.cluster_bits;
- s->cluster_size = 1 << s->cluster_bits;
- s->cluster_sectors = 1 << (s->cluster_bits - 9);
- s->l2_bits = header.l2_bits;
- s->l2_size = 1 << s->l2_bits;
- bs->total_sectors = header.size / 512;
- s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
-
- /* read the level 1 table */
- shift = s->cluster_bits + s->l2_bits;
- s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
-
- s->l1_table_offset = header.l1_table_offset;
- s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
-
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
-
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
- /* alloc L2 cache */
- s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
- s->cluster_cache = g_malloc(s->cluster_size);
- s->cluster_data = g_malloc(s->cluster_size);
- s->cluster_cache_offset = -1;
-
- /* read the backing file name */
- if (header.backing_file_offset != 0) {
- len = header.backing_file_size;
- if (len > 1023) {
- len = 1023;
- }
- ret = bdrv_pread(bs->file, header.backing_file_offset,
- bs->backing_file, len);
- if (ret < 0) {
- goto fail;
- }
- bs->backing_file[len] = '\0';
- }
-
- /* Disable migration when qcow images are used */
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "qcow", bs->device_name, "live migration");
- migrate_add_blocker(s->migration_blocker);
-
- qemu_co_mutex_init(&s->lock);
- return 0;
-
- fail:
- g_free(s->l1_table);
- g_free(s->l2_cache);
- g_free(s->cluster_cache);
- g_free(s->cluster_data);
- return ret;
-}
-
-
-/* We have nothing to do for QCOW reopen, stubs just return
- * success */
-static int qcow_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
- BDRVQcowState *s = bs->opaque;
- uint8_t keybuf[16];
- int len, i;
-
- memset(keybuf, 0, 16);
- len = strlen(key);
- if (len > 16)
- len = 16;
- /* XXX: we could compress the chars to 7 bits to increase
- entropy */
- for(i = 0;i < len;i++) {
- keybuf[i] = key[i];
- }
- s->crypt_method = s->crypt_method_header;
-
- if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
- return -1;
- if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
- return -1;
- return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
- algorithm for < 4 GB images. NOTE: out_buf == in_buf is
- supported */
-static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key)
-{
- union {
- uint64_t ll[2];
- uint8_t b[16];
- } ivec;
- int i;
-
- for(i = 0; i < nb_sectors; i++) {
- ivec.ll[0] = cpu_to_le64(sector_num);
- ivec.ll[1] = 0;
- AES_cbc_encrypt(in_buf, out_buf, 512, key,
- ivec.b, enc);
- sector_num++;
- in_buf += 512;
- out_buf += 512;
- }
-}
-
-/* 'allocate' is:
- *
- * 0 to not allocate.
- *
- * 1 to allocate a normal cluster (for sector indexes 'n_start' to
- * 'n_end')
- *
- * 2 to allocate a compressed cluster of size
- * 'compressed_size'. 'compressed_size' must be > 0 and <
- * cluster_size
- *
- * return 0 if not allocated.
- */
-static uint64_t get_cluster_offset(BlockDriverState *bs,
- uint64_t offset, int allocate,
- int compressed_size,
- int n_start, int n_end)
-{
- BDRVQcowState *s = bs->opaque;
- int min_index, i, j, l1_index, l2_index;
- uint64_t l2_offset, *l2_table, cluster_offset, tmp;
- uint32_t min_count;
- int new_l2_table;
-
- l1_index = offset >> (s->l2_bits + s->cluster_bits);
- l2_offset = s->l1_table[l1_index];
- new_l2_table = 0;
- if (!l2_offset) {
- if (!allocate)
- return 0;
- /* allocate a new l2 entry */
- l2_offset = bdrv_getlength(bs->file);
- /* round to cluster size */
- l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
- /* update the L1 entry */
- s->l1_table[l1_index] = l2_offset;
- tmp = cpu_to_be64(l2_offset);
- if (bdrv_pwrite_sync(bs->file,
- s->l1_table_offset + l1_index * sizeof(tmp),
- &tmp, sizeof(tmp)) < 0)
- return 0;
- new_l2_table = 1;
- }
- for(i = 0; i < L2_CACHE_SIZE; i++) {
- if (l2_offset == s->l2_cache_offsets[i]) {
- /* increment the hit count */
- if (++s->l2_cache_counts[i] == 0xffffffff) {
- for(j = 0; j < L2_CACHE_SIZE; j++) {
- s->l2_cache_counts[j] >>= 1;
- }
- }
- l2_table = s->l2_cache + (i << s->l2_bits);
- goto found;
- }
- }
- /* not found: load a new entry in the least used one */
- min_index = 0;
- min_count = 0xffffffff;
- for(i = 0; i < L2_CACHE_SIZE; i++) {
- if (s->l2_cache_counts[i] < min_count) {
- min_count = s->l2_cache_counts[i];
- min_index = i;
- }
- }
- l2_table = s->l2_cache + (min_index << s->l2_bits);
- if (new_l2_table) {
- memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
- s->l2_size * sizeof(uint64_t)) < 0)
- return 0;
- } else {
- if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
- s->l2_size * sizeof(uint64_t))
- return 0;
- }
- s->l2_cache_offsets[min_index] = l2_offset;
- s->l2_cache_counts[min_index] = 1;
- found:
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
- if (!cluster_offset ||
- ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
- if (!allocate)
- return 0;
- /* allocate a new cluster */
- if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
- (n_end - n_start) < s->cluster_sectors) {
- /* if the cluster is already compressed, we must
- decompress it in the case it is not completely
- overwritten */
- if (decompress_cluster(bs, cluster_offset) < 0)
- return 0;
- cluster_offset = bdrv_getlength(bs->file);
- cluster_offset = (cluster_offset + s->cluster_size - 1) &
- ~(s->cluster_size - 1);
- /* write the cluster content */
- if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
- s->cluster_size)
- return -1;
- } else {
- cluster_offset = bdrv_getlength(bs->file);
- if (allocate == 1) {
- /* round to cluster size */
- cluster_offset = (cluster_offset + s->cluster_size - 1) &
- ~(s->cluster_size - 1);
- bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
- /* if encrypted, we must initialize the cluster
- content which won't be written */
- if (s->crypt_method &&
- (n_end - n_start) < s->cluster_sectors) {
- uint64_t start_sect;
- start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
- memset(s->cluster_data + 512, 0x00, 512);
- for(i = 0; i < s->cluster_sectors; i++) {
- if (i < n_start || i >= n_end) {
- encrypt_sectors(s, start_sect + i,
- s->cluster_data,
- s->cluster_data + 512, 1, 1,
- &s->aes_encrypt_key);
- if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
- s->cluster_data, 512) != 512)
- return -1;
- }
- }
- }
- } else if (allocate == 2) {
- cluster_offset |= QCOW_OFLAG_COMPRESSED |
- (uint64_t)compressed_size << (63 - s->cluster_bits);
- }
- }
- /* update L2 table */
- tmp = cpu_to_be64(cluster_offset);
- l2_table[l2_index] = tmp;
- if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
- &tmp, sizeof(tmp)) < 0)
- return 0;
- }
- return cluster_offset;
-}
-
-static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster, n;
- uint64_t cluster_offset;
-
- qemu_co_mutex_lock(&s->lock);
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
- qemu_co_mutex_unlock(&s->lock);
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors)
- n = nb_sectors;
- *pnum = n;
- return (cluster_offset != 0);
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
- const uint8_t *buf, int buf_size)
-{
- z_stream strm1, *strm = &strm1;
- int ret, out_len;
-
- memset(strm, 0, sizeof(*strm));
-
- strm->next_in = (uint8_t *)buf;
- strm->avail_in = buf_size;
- strm->next_out = out_buf;
- strm->avail_out = out_buf_size;
-
- ret = inflateInit2(strm, -12);
- if (ret != Z_OK)
- return -1;
- ret = inflate(strm, Z_FINISH);
- out_len = strm->next_out - out_buf;
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
- out_len != out_buf_size) {
- inflateEnd(strm);
- return -1;
- }
- inflateEnd(strm);
- return 0;
-}
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, csize;
- uint64_t coffset;
-
- coffset = cluster_offset & s->cluster_offset_mask;
- if (s->cluster_cache_offset != coffset) {
- csize = cluster_offset >> (63 - s->cluster_bits);
- csize &= (s->cluster_size - 1);
- ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
- if (ret != csize)
- return -1;
- if (decompress_buffer(s->cluster_cache, s->cluster_size,
- s->cluster_data, csize) < 0) {
- return -1;
- }
- s->cluster_cache_offset = coffset;
- }
- return 0;
-}
-
-static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- int ret = 0, n;
- uint64_t cluster_offset;
- struct iovec hd_iov;
- QEMUIOVector hd_qiov;
- uint8_t *buf;
- void *orig_buf;
-
- if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
- } else {
- orig_buf = NULL;
- buf = (uint8_t *)qiov->iov->iov_base;
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- while (nb_sectors != 0) {
- /* prepare next request */
- cluster_offset = get_cluster_offset(bs, sector_num << 9,
- 0, 0, 0, 0);
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
-
- if (!cluster_offset) {
- if (bs->backing_hd) {
- /* read from the base image */
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- } else {
- /* Note: in this case, no need to wait */
- memset(buf, 0, 512 * n);
- }
- } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- /* add AIO support for compressed blocks ? */
- if (decompress_cluster(bs, cluster_offset) < 0) {
- goto fail;
- }
- memcpy(buf,
- s->cluster_cache + index_in_cluster * 512, 512 * n);
- } else {
- if ((cluster_offset & 511) != 0) {
- goto fail;
- }
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- break;
- }
- if (s->crypt_method) {
- encrypt_sectors(s, sector_num, buf, buf,
- n, 0,
- &s->aes_decrypt_key);
- }
- }
- ret = 0;
-
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- }
-
-done:
- qemu_co_mutex_unlock(&s->lock);
-
- if (qiov->niov > 1) {
- qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
- qemu_vfree(orig_buf);
- }
-
- return ret;
-
-fail:
- ret = -EIO;
- goto done;
-}
-
-static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- uint64_t cluster_offset;
- const uint8_t *src_buf;
- int ret = 0, n;
- uint8_t *cluster_data = NULL;
- struct iovec hd_iov;
- QEMUIOVector hd_qiov;
- uint8_t *buf;
- void *orig_buf;
-
- s->cluster_cache_offset = -1; /* disable compressed cache */
-
- if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
- qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
- } else {
- orig_buf = NULL;
- buf = (uint8_t *)qiov->iov->iov_base;
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- while (nb_sectors != 0) {
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
- index_in_cluster,
- index_in_cluster + n);
- if (!cluster_offset || (cluster_offset & 511) != 0) {
- ret = -EIO;
- break;
- }
- if (s->crypt_method) {
- if (!cluster_data) {
- cluster_data = g_malloc0(s->cluster_size);
- }
- encrypt_sectors(s, sector_num, cluster_data, buf,
- n, 1, &s->aes_encrypt_key);
- src_buf = cluster_data;
- } else {
- src_buf = buf;
- }
-
- hd_iov.iov_base = (void *)src_buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- break;
- }
- ret = 0;
-
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- }
- qemu_co_mutex_unlock(&s->lock);
-
- if (qiov->niov > 1) {
- qemu_vfree(orig_buf);
- }
- g_free(cluster_data);
-
- return ret;
-}
-
-static void qcow_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
-
- g_free(s->l1_table);
- g_free(s->l2_cache);
- g_free(s->cluster_cache);
- g_free(s->cluster_data);
-
- migrate_del_blocker(s->migration_blocker);
- error_free(s->migration_blocker);
-}
-
-static int qcow_create(const char *filename, QEMUOptionParameter *options)
-{
- int header_size, backing_filename_len, l1_size, shift, i;
- QCowHeader header;
- uint8_t *tmp;
- int64_t total_size = 0;
- const char *backing_file = NULL;
- int flags = 0;
- int ret;
- BlockDriverState *qcow_bs;
-
- /* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
- flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
- }
- options++;
- }
-
- ret = bdrv_create_file(filename, options);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_truncate(qcow_bs, 0);
- if (ret < 0) {
- goto exit;
- }
-
- memset(&header, 0, sizeof(header));
- header.magic = cpu_to_be32(QCOW_MAGIC);
- header.version = cpu_to_be32(QCOW_VERSION);
- header.size = cpu_to_be64(total_size * 512);
- header_size = sizeof(header);
- backing_filename_len = 0;
- if (backing_file) {
- if (strcmp(backing_file, "fat:")) {
- header.backing_file_offset = cpu_to_be64(header_size);
- backing_filename_len = strlen(backing_file);
- header.backing_file_size = cpu_to_be32(backing_filename_len);
- header_size += backing_filename_len;
- } else {
- /* special backing file for vvfat */
- backing_file = NULL;
- }
- header.cluster_bits = 9; /* 512 byte cluster to avoid copying
- unmodifyed sectors */
- header.l2_bits = 12; /* 32 KB L2 tables */
- } else {
- header.cluster_bits = 12; /* 4 KB clusters */
- header.l2_bits = 9; /* 4 KB L2 tables */
- }
- header_size = (header_size + 7) & ~7;
- shift = header.cluster_bits + header.l2_bits;
- l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
-
- header.l1_table_offset = cpu_to_be64(header_size);
- if (flags & BLOCK_FLAG_ENCRYPT) {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
- } else {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
- }
-
- /* write all the data */
- ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
- if (ret != sizeof(header)) {
- goto exit;
- }
-
- if (backing_file) {
- ret = bdrv_pwrite(qcow_bs, sizeof(header),
- backing_file, backing_filename_len);
- if (ret != backing_filename_len) {
- goto exit;
- }
- }
-
- tmp = g_malloc0(BDRV_SECTOR_SIZE);
- for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
- BDRV_SECTOR_SIZE); i++) {
- ret = bdrv_pwrite(qcow_bs, header_size +
- BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
- if (ret != BDRV_SECTOR_SIZE) {
- g_free(tmp);
- goto exit;
- }
- }
-
- g_free(tmp);
- ret = 0;
-exit:
- bdrv_delete(qcow_bs);
- return ret;
-}
-
-static int qcow_make_empty(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- uint32_t l1_length = s->l1_size * sizeof(uint64_t);
- int ret;
-
- memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
- l1_length) < 0)
- return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
- if (ret < 0)
- return ret;
-
- memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
- memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
- memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
-
- return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
- tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- z_stream strm;
- int ret, out_len;
- uint8_t *out_buf;
- uint64_t cluster_offset;
-
- if (nb_sectors != s->cluster_sectors) {
- ret = -EINVAL;
-
- /* Zero-pad last write if image size is not cluster aligned */
- if (sector_num + nb_sectors == bs->total_sectors &&
- nb_sectors < s->cluster_sectors) {
- uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
- memset(pad_buf, 0, s->cluster_size);
- memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
- ret = qcow_write_compressed(bs, sector_num,
- pad_buf, s->cluster_sectors);
- qemu_vfree(pad_buf);
- }
- return ret;
- }
-
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
- /* best compression, small window, no zlib header */
- memset(&strm, 0, sizeof(strm));
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
- Z_DEFLATED, -12,
- 9, Z_DEFAULT_STRATEGY);
- if (ret != 0) {
- ret = -EINVAL;
- goto fail;
- }
-
- strm.avail_in = s->cluster_size;
- strm.next_in = (uint8_t *)buf;
- strm.avail_out = s->cluster_size;
- strm.next_out = out_buf;
-
- ret = deflate(&strm, Z_FINISH);
- if (ret != Z_STREAM_END && ret != Z_OK) {
- deflateEnd(&strm);
- ret = -EINVAL;
- goto fail;
- }
- out_len = strm.next_out - out_buf;
-
- deflateEnd(&strm);
-
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
- /* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
- if (ret < 0) {
- goto fail;
- }
- } else {
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
- out_len, 0, 0);
- if (cluster_offset == 0) {
- ret = -EIO;
- goto fail;
- }
-
- cluster_offset &= s->cluster_offset_mask;
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = 0;
-fail:
- g_free(out_buf);
- return ret;
-}
-
-static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQcowState *s = bs->opaque;
- bdi->cluster_size = s->cluster_size;
- return 0;
-}
-
-
-static QEMUOptionParameter qcow_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_ENCRYPT,
- .type = OPT_FLAG,
- .help = "Encrypt the image"
- },
- { NULL }
-};
-
-static BlockDriver bdrv_qcow = {
- .format_name = "qcow",
- .instance_size = sizeof(BDRVQcowState),
- .bdrv_probe = qcow_probe,
- .bdrv_open = qcow_open,
- .bdrv_close = qcow_close,
- .bdrv_reopen_prepare = qcow_reopen_prepare,
- .bdrv_create = qcow_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
-
- .bdrv_co_readv = qcow_co_readv,
- .bdrv_co_writev = qcow_co_writev,
- .bdrv_co_is_allocated = qcow_co_is_allocated,
-
- .bdrv_set_key = qcow_set_key,
- .bdrv_make_empty = qcow_make_empty,
- .bdrv_write_compressed = qcow_write_compressed,
- .bdrv_get_info = qcow_get_info,
-
- .create_options = qcow_create_options,
-};
-
-static void bdrv_qcow_init(void)
-{
- bdrv_register(&bdrv_qcow);
-}
-
-block_init(bdrv_qcow_init);
diff --git a/contrib/qemu/block/qcow2-cache.c b/contrib/qemu/block/qcow2-cache.c
deleted file mode 100644
index 2f3114ecc24..00000000000
--- a/contrib/qemu/block/qcow2-cache.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * L2/refcount table cache for the QCOW2 format
- *
- * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/block_int.h"
-#include "qemu-common.h"
-#include "qcow2.h"
-#include "trace.h"
-
-typedef struct Qcow2CachedTable {
- void* table;
- int64_t offset;
- bool dirty;
- int cache_hits;
- int ref;
-} Qcow2CachedTable;
-
-struct Qcow2Cache {
- Qcow2CachedTable* entries;
- struct Qcow2Cache* depends;
- int size;
- bool depends_on_flush;
-};
-
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2Cache *c;
- int i;
-
- c = g_malloc0(sizeof(*c));
- c->size = num_tables;
- c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
-
- for (i = 0; i < c->size; i++) {
- c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
- }
-
- return c;
-}
-
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- assert(c->entries[i].ref == 0);
- qemu_vfree(c->entries[i].table);
- }
-
- g_free(c->entries);
- g_free(c);
-
- return 0;
-}
-
-static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
-{
- int ret;
-
- ret = qcow2_cache_flush(bs, c->depends);
- if (ret < 0) {
- return ret;
- }
-
- c->depends = NULL;
- c->depends_on_flush = false;
-
- return 0;
-}
-
-static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
-{
- BDRVQcowState *s = bs->opaque;
- int ret = 0;
-
- if (!c->entries[i].dirty || !c->entries[i].offset) {
- return 0;
- }
-
- trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
-
- if (c->depends) {
- ret = qcow2_cache_flush_dependency(bs, c);
- } else if (c->depends_on_flush) {
- ret = bdrv_flush(bs->file);
- if (ret >= 0) {
- c->depends_on_flush = false;
- }
- }
-
- if (ret < 0) {
- return ret;
- }
-
- if (c == s->refcount_block_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
- } else if (c == s->l2_table_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
- }
-
- ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
- s->cluster_size);
- if (ret < 0) {
- return ret;
- }
-
- c->entries[i].dirty = false;
-
- return 0;
-}
-
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
-{
- BDRVQcowState *s = bs->opaque;
- int result = 0;
- int ret;
- int i;
-
- trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
-
- for (i = 0; i < c->size; i++) {
- ret = qcow2_cache_entry_flush(bs, c, i);
- if (ret < 0 && result != -ENOSPC) {
- result = ret;
- }
- }
-
- if (result == 0) {
- ret = bdrv_flush(bs->file);
- if (ret < 0) {
- result = ret;
- }
- }
-
- return result;
-}
-
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
- Qcow2Cache *dependency)
-{
- int ret;
-
- if (dependency->depends) {
- ret = qcow2_cache_flush_dependency(bs, dependency);
- if (ret < 0) {
- return ret;
- }
- }
-
- if (c->depends && (c->depends != dependency)) {
- ret = qcow2_cache_flush_dependency(bs, c);
- if (ret < 0) {
- return ret;
- }
- }
-
- c->depends = dependency;
- return 0;
-}
-
-void qcow2_cache_depends_on_flush(Qcow2Cache *c)
-{
- c->depends_on_flush = true;
-}
-
-static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
-{
- int i;
- int min_count = INT_MAX;
- int min_index = -1;
-
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].ref) {
- continue;
- }
-
- if (c->entries[i].cache_hits < min_count) {
- min_index = i;
- min_count = c->entries[i].cache_hits;
- }
-
- /* Give newer hits priority */
- /* TODO Check how to optimize the replacement strategy */
- c->entries[i].cache_hits /= 2;
- }
-
- if (min_index == -1) {
- /* This can't happen in current synchronous code, but leave the check
- * here as a reminder for whoever starts using AIO with the cache */
- abort();
- }
- return min_index;
-}
-
-static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
- uint64_t offset, void **table, bool read_from_disk)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
- int ret;
-
- trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
- offset, read_from_disk);
-
- /* Check if the table is already cached */
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].offset == offset) {
- goto found;
- }
- }
-
- /* If not, write a table back and replace it */
- i = qcow2_cache_find_entry_to_replace(c);
- trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
- if (i < 0) {
- return i;
- }
-
- ret = qcow2_cache_entry_flush(bs, c, i);
- if (ret < 0) {
- return ret;
- }
-
- trace_qcow2_cache_get_read(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
- c->entries[i].offset = 0;
- if (read_from_disk) {
- if (c == s->l2_table_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
- }
-
- ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
- if (ret < 0) {
- return ret;
- }
- }
-
- /* Give the table some hits for the start so that it won't be replaced
- * immediately. The number 32 is completely arbitrary. */
- c->entries[i].cache_hits = 32;
- c->entries[i].offset = offset;
-
- /* And return the right table */
-found:
- c->entries[i].cache_hits++;
- c->entries[i].ref++;
- *table = c->entries[i].table;
-
- trace_qcow2_cache_get_done(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
-
- return 0;
-}
-
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table)
-{
- return qcow2_cache_do_get(bs, c, offset, table, true);
-}
-
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table)
-{
- return qcow2_cache_do_get(bs, c, offset, table, false);
-}
-
-int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].table == *table) {
- goto found;
- }
- }
- return -ENOENT;
-
-found:
- c->entries[i].ref--;
- *table = NULL;
-
- assert(c->entries[i].ref >= 0);
- return 0;
-}
-
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].table == table) {
- goto found;
- }
- }
- abort();
-
-found:
- c->entries[i].dirty = true;
-}
diff --git a/contrib/qemu/block/qcow2-cluster.c b/contrib/qemu/block/qcow2-cluster.c
deleted file mode 100644
index cca76d4fcdd..00000000000
--- a/contrib/qemu/block/qcow2-cluster.c
+++ /dev/null
@@ -1,1478 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include <zlib.h>
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-#include "trace.h"
-
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
- bool exact_size)
-{
- BDRVQcowState *s = bs->opaque;
- int new_l1_size2, ret, i;
- uint64_t *new_l1_table;
- int64_t new_l1_table_offset, new_l1_size;
- uint8_t data[12];
-
- if (min_size <= s->l1_size)
- return 0;
-
- if (exact_size) {
- new_l1_size = min_size;
- } else {
- /* Bump size up to reduce the number of times we have to grow */
- new_l1_size = s->l1_size;
- if (new_l1_size == 0) {
- new_l1_size = 1;
- }
- while (min_size > new_l1_size) {
- new_l1_size = (new_l1_size * 3 + 1) / 2;
- }
- }
-
- if (new_l1_size > INT_MAX) {
- return -EFBIG;
- }
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
- s->l1_size, new_l1_size);
-#endif
-
- new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
- memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
-
- /* write new table (align to cluster) */
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
- new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
- if (new_l1_table_offset < 0) {
- g_free(new_l1_table);
- return new_l1_table_offset;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
- for(i = 0; i < s->l1_size; i++)
- new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
- ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
- if (ret < 0)
- goto fail;
- for(i = 0; i < s->l1_size; i++)
- new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
-
- /* set new table */
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
- cpu_to_be32w((uint32_t*)data, new_l1_size);
- cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
- if (ret < 0) {
- goto fail;
- }
- g_free(s->l1_table);
- qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- s->l1_table_offset = new_l1_table_offset;
- s->l1_table = new_l1_table;
- s->l1_size = new_l1_size;
- return 0;
- fail:
- g_free(new_l1_table);
- qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
- QCOW2_DISCARD_OTHER);
- return ret;
-}
-
-/*
- * l2_load
- *
- * Loads a L2 table into memory. If the table is in the cache, the cache
- * is used; otherwise the L2 table is loaded from the image file.
- *
- * Returns a pointer to the L2 table on success, or NULL if the read from
- * the image file failed.
- */
-
-static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
- uint64_t **l2_table)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
-
- return ret;
-}
-
-/*
- * Writes one sector of the L1 table to the disk (can't update single entries
- * and we really don't want bdrv_pread to perform a read-modify-write)
- */
-#define L1_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l1_entry(BlockDriverState *bs, int l1_index)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t buf[L1_ENTRIES_PER_SECTOR];
- int l1_start_index;
- int i, ret;
-
- l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
- for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
- buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
- buf, sizeof(buf));
- if (ret < 0) {
- return ret;
- }
-
- return 0;
-}
-
-/*
- * l2_allocate
- *
- * Allocate a new l2 entry in the file. If l1_index points to an already
- * used entry in the L2 table (i.e. we are doing a copy on write for the L2
- * table) copy the contents of the old L2 table into the newly allocated one.
- * Otherwise the new table is initialized with zeros.
- *
- */
-
-static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t old_l2_offset;
- uint64_t *l2_table;
- int64_t l2_offset;
- int ret;
-
- old_l2_offset = s->l1_table[l1_index];
-
- trace_qcow2_l2_allocate(bs, l1_index);
-
- /* allocate a new l2 entry */
-
- l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
- if (l2_offset < 0) {
- return l2_offset;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail;
- }
-
- /* allocate a new entry in the l2 cache */
-
- trace_qcow2_l2_allocate_get_empty(bs, l1_index);
- ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
- if (ret < 0) {
- return ret;
- }
-
- l2_table = *table;
-
- if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
- /* if there was no old l2 table, clear the new table */
- memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- } else {
- uint64_t* old_table;
-
- /* if there was an old l2 table, read it from the disk */
- BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
- ret = qcow2_cache_get(bs, s->l2_table_cache,
- old_l2_offset & L1E_OFFSET_MASK,
- (void**) &old_table);
- if (ret < 0) {
- goto fail;
- }
-
- memcpy(l2_table, old_table, s->cluster_size);
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* write the l2 table to the file */
- BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-
- trace_qcow2_l2_allocate_write_l2(bs, l1_index);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- goto fail;
- }
-
- /* update the L1 entry */
- trace_qcow2_l2_allocate_write_l1(bs, l1_index);
- s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
- ret = write_l1_entry(bs, l1_index);
- if (ret < 0) {
- goto fail;
- }
-
- *table = l2_table;
- trace_qcow2_l2_allocate_done(bs, l1_index, 0);
- return 0;
-
-fail:
- trace_qcow2_l2_allocate_done(bs, l1_index, ret);
- qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
- s->l1_table[l1_index] = old_l2_offset;
- return ret;
-}
-
-/*
- * Checks how many clusters in a given L2 table are contiguous in the image
- * file. As soon as one of the flags in the bitmask stop_flags changes compared
- * to the first cluster, the search is stopped and the cluster is not counted
- * as contiguous. (This allows it, for example, to stop at the first compressed
- * cluster which may require a different handling)
- */
-static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
- uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
-{
- int i;
- uint64_t mask = stop_flags | L2E_OFFSET_MASK;
- uint64_t offset = be64_to_cpu(l2_table[0]) & mask;
-
- if (!offset)
- return 0;
-
- for (i = start; i < start + nb_clusters; i++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
- if (offset + (uint64_t) i * cluster_size != l2_entry) {
- break;
- }
- }
-
- return (i - start);
-}
-
-static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
-{
- int i;
-
- for (i = 0; i < nb_clusters; i++) {
- int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
-
- if (type != QCOW2_CLUSTER_UNALLOCATED) {
- break;
- }
- }
-
- return i;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
- algorithm for < 4 GB images. NOTE: out_buf == in_buf is
- supported */
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key)
-{
- union {
- uint64_t ll[2];
- uint8_t b[16];
- } ivec;
- int i;
-
- for(i = 0; i < nb_sectors; i++) {
- ivec.ll[0] = cpu_to_le64(sector_num);
- ivec.ll[1] = 0;
- AES_cbc_encrypt(in_buf, out_buf, 512, key,
- ivec.b, enc);
- sector_num++;
- in_buf += 512;
- out_buf += 512;
- }
-}
-
-static int coroutine_fn copy_sectors(BlockDriverState *bs,
- uint64_t start_sect,
- uint64_t cluster_offset,
- int n_start, int n_end)
-{
- BDRVQcowState *s = bs->opaque;
- QEMUIOVector qiov;
- struct iovec iov;
- int n, ret;
-
- /*
- * If this is the last cluster and it is only partially used, we must only
- * copy until the end of the image, or bdrv_check_request will fail for the
- * bdrv_read/write calls below.
- */
- if (start_sect + n_end > bs->total_sectors) {
- n_end = bs->total_sectors - start_sect;
- }
-
- n = n_end - n_start;
- if (n <= 0) {
- return 0;
- }
-
- iov.iov_len = n * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
-
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
-
- /* Call .bdrv_co_readv() directly instead of using the public block-layer
- * interface. This avoids double I/O throttling and request tracking,
- * which can lead to deadlock when block layer copy-on-read is enabled.
- */
- ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
- if (ret < 0) {
- goto out;
- }
-
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, start_sect + n_start,
- iov.iov_base, iov.iov_base, n, 1,
- &s->aes_encrypt_key);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
- if (ret < 0) {
- goto out;
- }
-
- ret = 0;
-out:
- qemu_vfree(iov.iov_base);
- return ret;
-}
-
-
-/*
- * get_cluster_offset
- *
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
- *
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
- *
- * on exit, *num is the number of contiguous sectors we can read.
- *
- * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
- * cases.
- */
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int l2_index;
- uint64_t l1_index, l2_offset, *l2_table;
- int l1_bits, c;
- unsigned int index_in_cluster, nb_clusters;
- uint64_t nb_available, nb_needed;
- int ret;
-
- index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
- nb_needed = *num + index_in_cluster;
-
- l1_bits = s->l2_bits + s->cluster_bits;
-
- /* compute how many bytes there are between the offset and
- * the end of the l1 entry
- */
-
- nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
-
- /* compute the number of available sectors */
-
- nb_available = (nb_available >> 9) + index_in_cluster;
-
- if (nb_needed > nb_available) {
- nb_needed = nb_available;
- }
-
- *cluster_offset = 0;
-
- /* seek the the l2 offset in the l1 table */
-
- l1_index = offset >> l1_bits;
- if (l1_index >= s->l1_size) {
- ret = QCOW2_CLUSTER_UNALLOCATED;
- goto out;
- }
-
- l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
- if (!l2_offset) {
- ret = QCOW2_CLUSTER_UNALLOCATED;
- goto out;
- }
-
- /* load the l2 table in memory */
-
- ret = l2_load(bs, l2_offset, &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* find the cluster offset for the given disk offset */
-
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
- *cluster_offset = be64_to_cpu(l2_table[l2_index]);
- nb_clusters = size_to_clusters(s, nb_needed << 9);
-
- ret = qcow2_get_cluster_type(*cluster_offset);
- switch (ret) {
- case QCOW2_CLUSTER_COMPRESSED:
- /* Compressed clusters can only be processed one by one */
- c = 1;
- *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
- break;
- case QCOW2_CLUSTER_ZERO:
- if (s->qcow_version < 3) {
- return -EIO;
- }
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
- *cluster_offset = 0;
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- /* how many empty clusters ? */
- c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
- *cluster_offset = 0;
- break;
- case QCOW2_CLUSTER_NORMAL:
- /* how many allocated clusters ? */
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
- *cluster_offset &= L2E_OFFSET_MASK;
- break;
- default:
- abort();
- }
-
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-
- nb_available = (c * s->cluster_sectors);
-
-out:
- if (nb_available > nb_needed)
- nb_available = nb_needed;
-
- *num = nb_available - index_in_cluster;
-
- return ret;
-}
-
-/*
- * get_cluster_table
- *
- * for a given disk offset, load (and allocate if needed)
- * the l2 table.
- *
- * the l2 table offset in the qcow2 file and the cluster index
- * in the l2 table are given to the caller.
- *
- * Returns 0 on success, -errno in failure case
- */
-static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
- uint64_t **new_l2_table,
- int *new_l2_index)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int l2_index;
- uint64_t l1_index, l2_offset;
- uint64_t *l2_table = NULL;
- int ret;
-
- /* seek the the l2 offset in the l1 table */
-
- l1_index = offset >> (s->l2_bits + s->cluster_bits);
- if (l1_index >= s->l1_size) {
- ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
- if (ret < 0) {
- return ret;
- }
- }
-
- assert(l1_index < s->l1_size);
- l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
-
- /* seek the l2 table of the given l2 offset */
-
- if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
- /* load the l2 table in memory */
- ret = l2_load(bs, l2_offset, &l2_table);
- if (ret < 0) {
- return ret;
- }
- } else {
- /* First allocate a new L2 table (and do COW if needed) */
- ret = l2_allocate(bs, l1_index, &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* Then decrease the refcount of the old table */
- if (l2_offset) {
- qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- }
- }
-
- /* find the cluster offset for the given disk offset */
-
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-
- *new_l2_table = l2_table;
- *new_l2_index = l2_index;
-
- return 0;
-}
-
-/*
- * alloc_compressed_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
- * If the offset is not found, allocate a new compressed cluster.
- *
- * Return the cluster offset if successful,
- * Return 0, otherwise.
- *
- */
-
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
- uint64_t offset,
- int compressed_size)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index, ret;
- uint64_t *l2_table;
- int64_t cluster_offset;
- int nb_csectors;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return 0;
- }
-
- /* Compression can't overwrite anything. Fail if the cluster was already
- * allocated. */
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
- if (cluster_offset & L2E_OFFSET_MASK) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- return 0;
- }
-
- cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
- if (cluster_offset < 0) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- return 0;
- }
-
- nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
- (cluster_offset >> 9);
-
- cluster_offset |= QCOW_OFLAG_COMPRESSED |
- ((uint64_t)nb_csectors << s->csize_shift);
-
- /* update L2 table */
-
- /* compressed clusters never have the copied flag */
-
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- l2_table[l2_index] = cpu_to_be64(cluster_offset);
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return 0;
- }
-
- return cluster_offset;
-}
-
-static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- if (r->nb_sectors == 0) {
- return 0;
- }
-
- qemu_co_mutex_unlock(&s->lock);
- ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
- r->offset / BDRV_SECTOR_SIZE,
- r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
- qemu_co_mutex_lock(&s->lock);
-
- if (ret < 0) {
- return ret;
- }
-
- /*
- * Before we update the L2 table to actually point to the new cluster, we
- * need to be sure that the refcounts have been increased and COW was
- * handled.
- */
- qcow2_cache_depends_on_flush(s->l2_table_cache);
-
- return 0;
-}
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
-{
- BDRVQcowState *s = bs->opaque;
- int i, j = 0, l2_index, ret;
- uint64_t *old_cluster, *l2_table;
- uint64_t cluster_offset = m->alloc_offset;
-
- trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
- assert(m->nb_clusters > 0);
-
- old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
-
- /* copy content of unmodified sectors */
- ret = perform_cow(bs, m, &m->cow_start);
- if (ret < 0) {
- goto err;
- }
-
- ret = perform_cow(bs, m, &m->cow_end);
- if (ret < 0) {
- goto err;
- }
-
- /* Update L2 table. */
- if (s->use_lazy_refcounts) {
- qcow2_mark_dirty(bs);
- }
- if (qcow2_need_accurate_refcounts(s)) {
- qcow2_cache_set_dependency(bs, s->l2_table_cache,
- s->refcount_block_cache);
- }
-
- ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
- if (ret < 0) {
- goto err;
- }
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-
- for (i = 0; i < m->nb_clusters; i++) {
- /* if two concurrent writes happen to the same unallocated cluster
- * each write allocates separate cluster and writes data concurrently.
- * The first one to complete updates l2 table with pointer to its
- * cluster the second one has to do RMW (which is done above by
- * copy_sectors()), update l2 table with its cluster pointer and free
- * old cluster. This is what this loop does */
- if(l2_table[l2_index + i] != 0)
- old_cluster[j++] = l2_table[l2_index + i];
-
- l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
- (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
- }
-
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- goto err;
- }
-
- /*
- * If this was a COW, we need to decrease the refcount of the old cluster.
- * Also flush bs->file to get the right order for L2 and refcount update.
- *
- * Don't discard clusters that reach a refcount of 0 (e.g. compressed
- * clusters), the next write will reuse them anyway.
- */
- if (j != 0) {
- for (i = 0; i < j; i++) {
- qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
- QCOW2_DISCARD_NEVER);
- }
- }
-
- ret = 0;
-err:
- g_free(old_cluster);
- return ret;
- }
-
-/*
- * Returns the number of contiguous clusters that can be used for an allocating
- * write, but require COW to be performed (this includes yet unallocated space,
- * which must copy from the backing file)
- */
-static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
- uint64_t *l2_table, int l2_index)
-{
- int i;
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
- int cluster_type = qcow2_get_cluster_type(l2_entry);
-
- switch(cluster_type) {
- case QCOW2_CLUSTER_NORMAL:
- if (l2_entry & QCOW_OFLAG_COPIED) {
- goto out;
- }
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- case QCOW2_CLUSTER_COMPRESSED:
- case QCOW2_CLUSTER_ZERO:
- break;
- default:
- abort();
- }
- }
-
-out:
- assert(i <= nb_clusters);
- return i;
-}
-
-/*
- * Check if there already is an AIO write request in flight which allocates
- * the same cluster. In this case we need to wait until the previous
- * request has completed and updated the L2 table accordingly.
- *
- * Returns:
- * 0 if there was no dependency. *cur_bytes indicates the number of
- * bytes from guest_offset that can be read before the next
- * dependency must be processed (or the request is complete)
- *
- * -EAGAIN if we had to wait for another request, previously gathered
- * information on cluster allocation may be invalid now. The caller
- * must start over anyway, so consider *cur_bytes undefined.
- */
-static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *cur_bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- QCowL2Meta *old_alloc;
- uint64_t bytes = *cur_bytes;
-
- QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
-
- uint64_t start = guest_offset;
- uint64_t end = start + bytes;
- uint64_t old_start = l2meta_cow_start(old_alloc);
- uint64_t old_end = l2meta_cow_end(old_alloc);
-
- if (end <= old_start || start >= old_end) {
- /* No intersection */
- } else {
- if (start < old_start) {
- /* Stop at the start of a running allocation */
- bytes = old_start - start;
- } else {
- bytes = 0;
- }
-
- /* Stop if already an l2meta exists. After yielding, it wouldn't
- * be valid any more, so we'd have to clean up the old L2Metas
- * and deal with requests depending on them before starting to
- * gather new ones. Not worth the trouble. */
- if (bytes == 0 && *m) {
- *cur_bytes = 0;
- return 0;
- }
-
- if (bytes == 0) {
- /* Wait for the dependency to complete. We need to recheck
- * the free/allocated clusters when we continue. */
- qemu_co_mutex_unlock(&s->lock);
- qemu_co_queue_wait(&old_alloc->dependent_requests);
- qemu_co_mutex_lock(&s->lock);
- return -EAGAIN;
- }
- }
- }
-
- /* Make sure that existing clusters and new allocations are only used up to
- * the next dependency if we shortened the request above */
- *cur_bytes = bytes;
-
- return 0;
-}
-
-/*
- * Checks how many already allocated clusters that don't require a copy on
- * write there are at the given guest_offset (up to *bytes). If
- * *host_offset is not zero, only physically contiguous clusters beginning at
- * this host offset are counted.
- *
- * Note that guest_offset may not be cluster aligned. In this case, the
- * returned *host_offset points to exact byte referenced by guest_offset and
- * therefore isn't cluster aligned as well.
- *
- * Returns:
- * 0: if no allocated clusters are available at the given offset.
- * *bytes is normally unchanged. It is set to 0 if the cluster
- * is allocated and doesn't need COW, but doesn't have the right
- * physical offset.
- *
- * 1: if allocated clusters that don't require a COW are available at
- * the requested offset. *bytes may have decreased and describes
- * the length of the area that can be written to.
- *
- * -errno: in error cases
- */
-static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index;
- uint64_t cluster_offset;
- uint64_t *l2_table;
- unsigned int nb_clusters;
- unsigned int keep_clusters;
- int ret, pret;
-
- trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
- *bytes);
-
- assert(*host_offset == 0 || offset_into_cluster(s, guest_offset)
- == offset_into_cluster(s, *host_offset));
-
- /*
- * Calculate the number of clusters to look for. We stop at L2 table
- * boundaries to keep things simple.
- */
- nb_clusters =
- size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
-
- l2_index = offset_to_l2_index(s, guest_offset);
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- /* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
-
- /* Check how many clusters are already allocated and don't need COW */
- if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
- && (cluster_offset & QCOW_OFLAG_COPIED))
- {
- /* If a specific host_offset is required, check it */
- bool offset_matches =
- (cluster_offset & L2E_OFFSET_MASK) == *host_offset;
-
- if (*host_offset != 0 && !offset_matches) {
- *bytes = 0;
- ret = 0;
- goto out;
- }
-
- /* We keep all QCOW_OFLAG_COPIED clusters */
- keep_clusters =
- count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
- assert(keep_clusters <= nb_clusters);
-
- *bytes = MIN(*bytes,
- keep_clusters * s->cluster_size
- - offset_into_cluster(s, guest_offset));
-
- ret = 1;
- } else {
- ret = 0;
- }
-
- /* Cleanup */
-out:
- pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (pret < 0) {
- return pret;
- }
-
- /* Only return a host offset if we actually made progress. Otherwise we
- * would make requirements for handle_alloc() that it can't fulfill */
- if (ret) {
- *host_offset = (cluster_offset & L2E_OFFSET_MASK)
- + offset_into_cluster(s, guest_offset);
- }
-
- return ret;
-}
-
-/*
- * Allocates new clusters for the given guest_offset.
- *
- * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
- * contain the number of clusters that have been allocated and are contiguous
- * in the image file.
- *
- * If *host_offset is non-zero, it specifies the offset in the image file at
- * which the new clusters must start. *nb_clusters can be 0 on return in this
- * case if the cluster at host_offset is already in use. If *host_offset is
- * zero, the clusters can be allocated anywhere in the image file.
- *
- * *host_offset is updated to contain the offset into the image file at which
- * the first allocated cluster starts.
- *
- * Return 0 on success and -errno in error cases. -EAGAIN means that the
- * function has been waiting for another request and the allocation must be
- * restarted, but the whole request should not be failed.
- */
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, unsigned int *nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
-
- trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
- *host_offset, *nb_clusters);
-
- /* Allocate new clusters */
- trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
- if (*host_offset == 0) {
- int64_t cluster_offset =
- qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
- if (cluster_offset < 0) {
- return cluster_offset;
- }
- *host_offset = cluster_offset;
- return 0;
- } else {
- int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
- if (ret < 0) {
- return ret;
- }
- *nb_clusters = ret;
- return 0;
- }
-}
-
-/*
- * Allocates new clusters for an area that either is yet unallocated or needs a
- * copy on write. If *host_offset is non-zero, clusters are only allocated if
- * the new allocation can match the specified host offset.
- *
- * Note that guest_offset may not be cluster aligned. In this case, the
- * returned *host_offset points to exact byte referenced by guest_offset and
- * therefore isn't cluster aligned as well.
- *
- * Returns:
- * 0: if no clusters could be allocated. *bytes is set to 0,
- * *host_offset is left unchanged.
- *
- * 1: if new clusters were allocated. *bytes may be decreased if the
- * new allocation doesn't cover all of the requested area.
- * *host_offset is updated to contain the host offset of the first
- * newly allocated cluster.
- *
- * -errno: in error cases
- */
-static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index;
- uint64_t *l2_table;
- uint64_t entry;
- unsigned int nb_clusters;
- int ret;
-
- uint64_t alloc_cluster_offset;
-
- trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
- *bytes);
- assert(*bytes > 0);
-
- /*
- * Calculate the number of clusters to look for. We stop at L2 table
- * boundaries to keep things simple.
- */
- nb_clusters =
- size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
-
- l2_index = offset_to_l2_index(s, guest_offset);
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- /* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- entry = be64_to_cpu(l2_table[l2_index]);
-
- /* For the moment, overwrite compressed clusters one by one */
- if (entry & QCOW_OFLAG_COMPRESSED) {
- nb_clusters = 1;
- } else {
- nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
- }
-
- /* This function is only called when there were no non-COW clusters, so if
- * we can't find any unallocated or COW clusters either, something is
- * wrong with our code. */
- assert(nb_clusters > 0);
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* Allocate, if necessary at a given offset in the image file */
- alloc_cluster_offset = start_of_cluster(s, *host_offset);
- ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
- &nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- /* Can't extend contiguous allocation */
- if (nb_clusters == 0) {
- *bytes = 0;
- return 0;
- }
-
- /*
- * Save info needed for meta data update.
- *
- * requested_sectors: Number of sectors from the start of the first
- * newly allocated cluster to the end of the (possibly shortened
- * before) write request.
- *
- * avail_sectors: Number of sectors from the start of the first
- * newly allocated to the end of the last newly allocated cluster.
- *
- * nb_sectors: The number of sectors from the start of the first
- * newly allocated cluster to the end of the area that the write
- * request actually writes to (excluding COW at the end)
- */
- int requested_sectors =
- (*bytes + offset_into_cluster(s, guest_offset))
- >> BDRV_SECTOR_BITS;
- int avail_sectors = nb_clusters
- << (s->cluster_bits - BDRV_SECTOR_BITS);
- int alloc_n_start = offset_into_cluster(s, guest_offset)
- >> BDRV_SECTOR_BITS;
- int nb_sectors = MIN(requested_sectors, avail_sectors);
- QCowL2Meta *old_m = *m;
-
- *m = g_malloc0(sizeof(**m));
-
- **m = (QCowL2Meta) {
- .next = old_m,
-
- .alloc_offset = alloc_cluster_offset,
- .offset = start_of_cluster(s, guest_offset),
- .nb_clusters = nb_clusters,
- .nb_available = nb_sectors,
-
- .cow_start = {
- .offset = 0,
- .nb_sectors = alloc_n_start,
- },
- .cow_end = {
- .offset = nb_sectors * BDRV_SECTOR_SIZE,
- .nb_sectors = avail_sectors - nb_sectors,
- },
- };
- qemu_co_queue_init(&(*m)->dependent_requests);
- QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
-
- *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
- *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
- - offset_into_cluster(s, guest_offset));
- assert(*bytes != 0);
-
- return 1;
-
-fail:
- if (*m && (*m)->nb_clusters > 0) {
- QLIST_REMOVE(*m, next_in_flight);
- }
- return ret;
-}
-
-/*
- * alloc_cluster_offset
- *
- * For a given offset on the virtual disk, find the cluster offset in qcow2
- * file. If the offset is not found, allocate a new cluster.
- *
- * If the cluster was already allocated, m->nb_clusters is set to 0 and
- * other fields in m are meaningless.
- *
- * If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. In this case, the other
- * fields of m are valid and contain information about the first allocated
- * cluster.
- *
- * If the request conflicts with another write request in flight, the coroutine
- * is queued and will be reentered when the dependency has completed.
- *
- * Return 0 on success and -errno in error cases
- */
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t start, remaining;
- uint64_t cluster_offset;
- uint64_t cur_bytes;
- int ret;
-
- trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
- n_start, n_end);
-
- assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
- offset = start_of_cluster(s, offset);
-
-again:
- start = offset + (n_start << BDRV_SECTOR_BITS);
- remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
- cluster_offset = 0;
- *host_offset = 0;
- cur_bytes = 0;
- *m = NULL;
-
- while (true) {
-
- if (!*host_offset) {
- *host_offset = start_of_cluster(s, cluster_offset);
- }
-
- assert(remaining >= cur_bytes);
-
- start += cur_bytes;
- remaining -= cur_bytes;
- cluster_offset += cur_bytes;
-
- if (remaining == 0) {
- break;
- }
-
- cur_bytes = remaining;
-
- /*
- * Now start gathering as many contiguous clusters as possible:
- *
- * 1. Check for overlaps with in-flight allocations
- *
- * a) Overlap not in the first cluster -> shorten this request and
- * let the caller handle the rest in its next loop iteration.
- *
- * b) Real overlaps of two requests. Yield and restart the search
- * for contiguous clusters (the situation could have changed
- * while we were sleeping)
- *
- * c) TODO: Request starts in the same cluster as the in-flight
- * allocation ends. Shorten the COW of the in-fight allocation,
- * set cluster_offset to write to the same cluster and set up
- * the right synchronisation between the in-flight request and
- * the new one.
- */
- ret = handle_dependencies(bs, start, &cur_bytes, m);
- if (ret == -EAGAIN) {
- /* Currently handle_dependencies() doesn't yield if we already had
- * an allocation. If it did, we would have to clean up the L2Meta
- * structs before starting over. */
- assert(*m == NULL);
- goto again;
- } else if (ret < 0) {
- return ret;
- } else if (cur_bytes == 0) {
- break;
- } else {
- /* handle_dependencies() may have decreased cur_bytes (shortened
- * the allocations below) so that the next dependency is processed
- * correctly during the next loop iteration. */
- }
-
- /*
- * 2. Count contiguous COPIED clusters.
- */
- ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- continue;
- } else if (cur_bytes == 0) {
- break;
- }
-
- /*
- * 3. If the request still hasn't completed, allocate new clusters,
- * considering any cluster_offset of steps 1c or 2.
- */
- ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- continue;
- } else {
- assert(cur_bytes == 0);
- break;
- }
- }
-
- *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
- assert(*num > 0);
- assert(*host_offset != 0);
-
- return 0;
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
- const uint8_t *buf, int buf_size)
-{
- z_stream strm1, *strm = &strm1;
- int ret, out_len;
-
- memset(strm, 0, sizeof(*strm));
-
- strm->next_in = (uint8_t *)buf;
- strm->avail_in = buf_size;
- strm->next_out = out_buf;
- strm->avail_out = out_buf_size;
-
- ret = inflateInit2(strm, -12);
- if (ret != Z_OK)
- return -1;
- ret = inflate(strm, Z_FINISH);
- out_len = strm->next_out - out_buf;
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
- out_len != out_buf_size) {
- inflateEnd(strm);
- return -1;
- }
- inflateEnd(strm);
- return 0;
-}
-
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, csize, nb_csectors, sector_offset;
- uint64_t coffset;
-
- coffset = cluster_offset & s->cluster_offset_mask;
- if (s->cluster_cache_offset != coffset) {
- nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
- sector_offset = coffset & 511;
- csize = nb_csectors * 512 - sector_offset;
- BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
- if (ret < 0) {
- return ret;
- }
- if (decompress_buffer(s->cluster_cache, s->cluster_size,
- s->cluster_data + sector_offset, csize) < 0) {
- return -EIO;
- }
- s->cluster_cache_offset = coffset;
- }
- return 0;
-}
-
-/*
- * This discards as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of discarded
- * clusters.
- */
-static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table;
- int l2_index;
- int ret;
- int i;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- /* Limit nb_clusters to one L2 table */
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t old_offset;
-
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
- if ((old_offset & L2E_OFFSET_MASK) == 0) {
- continue;
- }
-
- /* First remove L2 entries */
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- l2_table[l2_index + i] = cpu_to_be64(0);
-
- /* Then decrease the refcount */
- qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- return nb_clusters;
-}
-
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t end_offset;
- unsigned int nb_clusters;
- int ret;
-
- end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
-
- /* Round start up and end down */
- offset = align_offset(offset, s->cluster_size);
- end_offset &= ~(s->cluster_size - 1);
-
- if (offset > end_offset) {
- return 0;
- }
-
- nb_clusters = size_to_clusters(s, end_offset - offset);
-
- s->cache_discards = true;
-
- /* Each L2 table is handled by its own loop iteration */
- while (nb_clusters > 0) {
- ret = discard_single_l2(bs, offset, nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- nb_clusters -= ret;
- offset += (ret * s->cluster_size);
- }
-
- ret = 0;
-fail:
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- return ret;
-}
-
-/*
- * This zeroes as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of zeroed
- * clusters.
- */
-static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table;
- int l2_index;
- int ret;
- int i;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- /* Limit nb_clusters to one L2 table */
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t old_offset;
-
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
-
- /* Update L2 entries */
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- if (old_offset & QCOW_OFLAG_COMPRESSED) {
- l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
- qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
- } else {
- l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
- }
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- return nb_clusters;
-}
-
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int nb_clusters;
- int ret;
-
- /* The zero flag is only supported by version 3 and newer */
- if (s->qcow_version < 3) {
- return -ENOTSUP;
- }
-
- /* Each L2 table is handled by its own loop iteration */
- nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
-
- s->cache_discards = true;
-
- while (nb_clusters > 0) {
- ret = zero_single_l2(bs, offset, nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- nb_clusters -= ret;
- offset += (ret * s->cluster_size);
- }
-
- ret = 0;
-fail:
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- return ret;
-}
diff --git a/contrib/qemu/block/qcow2-refcount.c b/contrib/qemu/block/qcow2-refcount.c
deleted file mode 100644
index 1244693f39e..00000000000
--- a/contrib/qemu/block/qcow2-refcount.c
+++ /dev/null
@@ -1,1374 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length,
- int addend, enum qcow2_discard_type type);
-
-
-/*********************************************************/
-/* refcount handling */
-
-int qcow2_refcount_init(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, refcount_table_size2, i;
-
- refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
- s->refcount_table = g_malloc(refcount_table_size2);
- if (s->refcount_table_size > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
- ret = bdrv_pread(bs->file, s->refcount_table_offset,
- s->refcount_table, refcount_table_size2);
- if (ret != refcount_table_size2)
- goto fail;
- for(i = 0; i < s->refcount_table_size; i++)
- be64_to_cpus(&s->refcount_table[i]);
- }
- return 0;
- fail:
- return -ENOMEM;
-}
-
-void qcow2_refcount_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- g_free(s->refcount_table);
-}
-
-
-static int load_refcount_block(BlockDriverState *bs,
- int64_t refcount_block_offset,
- void **refcount_block)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
- ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- refcount_block);
-
- return ret;
-}
-
-/*
- * Returns the refcount of the cluster given by its index. Any non-negative
- * return value is the refcount of the cluster, negative values are -errno
- * and indicate an error.
- */
-static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
-{
- BDRVQcowState *s = bs->opaque;
- int refcount_table_index, block_index;
- int64_t refcount_block_offset;
- int ret;
- uint16_t *refcount_block;
- uint16_t refcount;
-
- refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
- if (refcount_table_index >= s->refcount_table_size)
- return 0;
- refcount_block_offset = s->refcount_table[refcount_table_index];
- if (!refcount_block_offset)
- return 0;
-
- ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- (void**) &refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- block_index = cluster_index &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
- refcount = be16_to_cpu(refcount_block[block_index]);
-
- ret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- return refcount;
-}
-
-/*
- * Rounds the refcount table size up to avoid growing the table for each single
- * refcount block that is allocated.
- */
-static unsigned int next_refcount_table_size(BDRVQcowState *s,
- unsigned int min_size)
-{
- unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
- unsigned int refcount_table_clusters =
- MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
-
- while (min_clusters > refcount_table_clusters) {
- refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
- }
-
- return refcount_table_clusters << (s->cluster_bits - 3);
-}
-
-
-/* Checks if two offsets are described by the same refcount block */
-static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
- uint64_t offset_b)
-{
- uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
- uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
-
- return (block_a == block_b);
-}
-
-/*
- * Loads a refcount block. If it doesn't exist yet, it is allocated first
- * (including growing the refcount table if needed).
- *
- * Returns 0 on success or -errno in error case
- */
-static int alloc_refcount_block(BlockDriverState *bs,
- int64_t cluster_index, uint16_t **refcount_block)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int refcount_table_index;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
-
- /* Find the refcount block for the given cluster */
- refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-
- if (refcount_table_index < s->refcount_table_size) {
-
- uint64_t refcount_block_offset =
- s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
-
- /* If it's already there, we're done */
- if (refcount_block_offset) {
- return load_refcount_block(bs, refcount_block_offset,
- (void**) refcount_block);
- }
- }
-
- /*
- * If we came here, we need to allocate something. Something is at least
- * a cluster for the new refcount block. It may also include a new refcount
- * table if the old refcount table is too small.
- *
- * Note that allocating clusters here needs some special care:
- *
- * - We can't use the normal qcow2_alloc_clusters(), it would try to
- * increase the refcount and very likely we would end up with an endless
- * recursion. Instead we must place the refcount blocks in a way that
- * they can describe them themselves.
- *
- * - We need to consider that at this point we are inside update_refcounts
- * and doing the initial refcount increase. This means that some clusters
- * have already been allocated by the caller, but their refcount isn't
- * accurate yet. free_cluster_index tells us where this allocation ends
- * as long as we don't overwrite it by freeing clusters.
- *
- * - alloc_clusters_noref and qcow2_free_clusters may load a different
- * refcount block into the cache
- */
-
- *refcount_block = NULL;
-
- /* We write to the refcount table, so we might depend on L2 tables */
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- return ret;
- }
-
- /* Allocate the refcount block itself and mark it as used */
- int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
- if (new_block < 0) {
- return new_block;
- }
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
- " at %" PRIx64 "\n",
- refcount_table_index, cluster_index << s->cluster_bits, new_block);
-#endif
-
- if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
- /* Zero the new refcount block before updating it */
- ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
- (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- memset(*refcount_block, 0, s->cluster_size);
-
- /* The block describes itself, need to update the cache */
- int block_index = (new_block >> s->cluster_bits) &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
- (*refcount_block)[block_index] = cpu_to_be16(1);
- } else {
- /* Described somewhere else. This can recurse at most twice before we
- * arrive at a block that describes itself. */
- ret = update_refcount(bs, new_block, s->cluster_size, 1,
- QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- goto fail_block;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail_block;
- }
-
- /* Initialize the new refcount block only after updating its refcount,
- * update_refcount uses the refcount cache itself */
- ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
- (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- memset(*refcount_block, 0, s->cluster_size);
- }
-
- /* Now the new refcount block needs to be written to disk */
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
- qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail_block;
- }
-
- /* If the refcount table is big enough, just hook the block up there */
- if (refcount_table_index < s->refcount_table_size) {
- uint64_t data64 = cpu_to_be64(new_block);
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
- ret = bdrv_pwrite_sync(bs->file,
- s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
- &data64, sizeof(data64));
- if (ret < 0) {
- goto fail_block;
- }
-
- s->refcount_table[refcount_table_index] = new_block;
- return 0;
- }
-
- ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- /*
- * If we come here, we need to grow the refcount table. Again, a new
- * refcount table needs some space and we can't simply allocate to avoid
- * endless recursion.
- *
- * Therefore let's grab new refcount blocks at the end of the image, which
- * will describe themselves and the new refcount table. This way we can
- * reference them only in the new table and do the switch to the new
- * refcount table at once without producing an inconsistent state in
- * between.
- */
- BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
-
- /* Calculate the number of refcount blocks needed so far */
- uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
- uint64_t blocks_used = (s->free_cluster_index +
- refcount_block_clusters - 1) / refcount_block_clusters;
-
- /* And now we need at least one block more for the new metadata */
- uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
- uint64_t last_table_size;
- uint64_t blocks_clusters;
- do {
- uint64_t table_clusters =
- size_to_clusters(s, table_size * sizeof(uint64_t));
- blocks_clusters = 1 +
- ((table_clusters + refcount_block_clusters - 1)
- / refcount_block_clusters);
- uint64_t meta_clusters = table_clusters + blocks_clusters;
-
- last_table_size = table_size;
- table_size = next_refcount_table_size(s, blocks_used +
- ((meta_clusters + refcount_block_clusters - 1)
- / refcount_block_clusters));
-
- } while (last_table_size != table_size);
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
- s->refcount_table_size, table_size);
-#endif
-
- /* Create the new refcount table and blocks */
- uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
- s->cluster_size;
- uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
- uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
- uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
-
- assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
-
- /* Fill the new refcount table */
- memcpy(new_table, s->refcount_table,
- s->refcount_table_size * sizeof(uint64_t));
- new_table[refcount_table_index] = new_block;
-
- int i;
- for (i = 0; i < blocks_clusters; i++) {
- new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
- }
-
- /* Fill the refcount blocks */
- uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
- int block = 0;
- for (i = 0; i < table_clusters + blocks_clusters; i++) {
- new_blocks[block++] = cpu_to_be16(1);
- }
-
- /* Write refcount blocks to disk */
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
- ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
- blocks_clusters * s->cluster_size);
- g_free(new_blocks);
- if (ret < 0) {
- goto fail_table;
- }
-
- /* Write refcount table to disk */
- for(i = 0; i < table_size; i++) {
- cpu_to_be64s(&new_table[i]);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
- ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
- table_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail_table;
- }
-
- for(i = 0; i < table_size; i++) {
- be64_to_cpus(&new_table[i]);
- }
-
- /* Hook up the new refcount table in the qcow2 header */
- uint8_t data[12];
- cpu_to_be64w((uint64_t*)data, table_offset);
- cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
- data, sizeof(data));
- if (ret < 0) {
- goto fail_table;
- }
-
- /* And switch it in memory */
- uint64_t old_table_offset = s->refcount_table_offset;
- uint64_t old_table_size = s->refcount_table_size;
-
- g_free(s->refcount_table);
- s->refcount_table = new_table;
- s->refcount_table_size = table_size;
- s->refcount_table_offset = table_offset;
-
- /* Free old table. Remember, we must not change free_cluster_index */
- uint64_t old_free_cluster_index = s->free_cluster_index;
- qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- s->free_cluster_index = old_free_cluster_index;
-
- ret = load_refcount_block(bs, new_block, (void**) refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- return 0;
-
-fail_table:
- g_free(new_table);
-fail_block:
- if (*refcount_block != NULL) {
- qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
- }
- return ret;
-}
-
-void qcow2_process_discards(BlockDriverState *bs, int ret)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2DiscardRegion *d, *next;
-
- QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
- QTAILQ_REMOVE(&s->discards, d, next);
-
- /* Discard is optional, ignore the return value */
- if (ret >= 0) {
- bdrv_discard(bs->file,
- d->offset >> BDRV_SECTOR_BITS,
- d->bytes >> BDRV_SECTOR_BITS);
- }
-
- g_free(d);
- }
-}
-
-static void update_refcount_discard(BlockDriverState *bs,
- uint64_t offset, uint64_t length)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2DiscardRegion *d, *p, *next;
-
- QTAILQ_FOREACH(d, &s->discards, next) {
- uint64_t new_start = MIN(offset, d->offset);
- uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
-
- if (new_end - new_start <= length + d->bytes) {
- /* There can't be any overlap, areas ending up here have no
- * references any more and therefore shouldn't get freed another
- * time. */
- assert(d->bytes + length == new_end - new_start);
- d->offset = new_start;
- d->bytes = new_end - new_start;
- goto found;
- }
- }
-
- d = g_malloc(sizeof(*d));
- *d = (Qcow2DiscardRegion) {
- .bs = bs,
- .offset = offset,
- .bytes = length,
- };
- QTAILQ_INSERT_TAIL(&s->discards, d, next);
-
-found:
- /* Merge discard requests if they are adjacent now */
- QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
- if (p == d
- || p->offset > d->offset + d->bytes
- || d->offset > p->offset + p->bytes)
- {
- continue;
- }
-
- /* Still no overlap possible */
- assert(p->offset == d->offset + d->bytes
- || d->offset == p->offset + p->bytes);
-
- QTAILQ_REMOVE(&s->discards, p, next);
- d->offset = MIN(d->offset, p->offset);
- d->bytes += p->bytes;
- }
-}
-
-/* XXX: cache several refcount block clusters ? */
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t start, last, cluster_offset;
- uint16_t *refcount_block = NULL;
- int64_t old_table_index = -1;
- int ret;
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
- offset, length, addend);
-#endif
- if (length < 0) {
- return -EINVAL;
- } else if (length == 0) {
- return 0;
- }
-
- if (addend < 0) {
- qcow2_cache_set_dependency(bs, s->refcount_block_cache,
- s->l2_table_cache);
- }
-
- start = offset & ~(s->cluster_size - 1);
- last = (offset + length - 1) & ~(s->cluster_size - 1);
- for(cluster_offset = start; cluster_offset <= last;
- cluster_offset += s->cluster_size)
- {
- int block_index, refcount;
- int64_t cluster_index = cluster_offset >> s->cluster_bits;
- int64_t table_index =
- cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-
- /* Load the refcount block and allocate it if needed */
- if (table_index != old_table_index) {
- if (refcount_block) {
- ret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
- if (ret < 0) {
- goto fail;
- }
- }
- old_table_index = table_index;
-
- qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
-
- /* we can update the count and save it */
- block_index = cluster_index &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
-
- refcount = be16_to_cpu(refcount_block[block_index]);
- refcount += addend;
- if (refcount < 0 || refcount > 0xffff) {
- ret = -EINVAL;
- goto fail;
- }
- if (refcount == 0 && cluster_index < s->free_cluster_index) {
- s->free_cluster_index = cluster_index;
- }
- refcount_block[block_index] = cpu_to_be16(refcount);
-
- if (refcount == 0 && s->discard_passthrough[type]) {
- update_refcount_discard(bs, cluster_offset, s->cluster_size);
- }
- }
-
- ret = 0;
-fail:
- if (!s->cache_discards) {
- qcow2_process_discards(bs, ret);
- }
-
- /* Write last changed block to disk */
- if (refcount_block) {
- int wret;
- wret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (wret < 0) {
- return ret < 0 ? ret : wret;
- }
- }
-
- /*
- * Try do undo any updates if an error is returned (This may succeed in
- * some cases like ENOSPC for allocating a new refcount block)
- */
- if (ret < 0) {
- int dummy;
- dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
- QCOW2_DISCARD_NEVER);
- (void)dummy;
- }
-
- return ret;
-}
-
-/*
- * Increases or decreases the refcount of a given cluster by one.
- * addend must be 1 or -1.
- *
- * If the return value is non-negative, it is the new refcount of the cluster.
- * If it is negative, it is -errno and indicates an error.
- */
-static int update_cluster_refcount(BlockDriverState *bs,
- int64_t cluster_index,
- int addend,
- enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
- type);
- if (ret < 0) {
- return ret;
- }
-
- return get_refcount(bs, cluster_index);
-}
-
-
-
-/*********************************************************/
-/* cluster allocation functions */
-
-
-
-/* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
-{
- BDRVQcowState *s = bs->opaque;
- int i, nb_clusters, refcount;
-
- nb_clusters = size_to_clusters(s, size);
-retry:
- for(i = 0; i < nb_clusters; i++) {
- int64_t next_cluster_index = s->free_cluster_index++;
- refcount = get_refcount(bs, next_cluster_index);
-
- if (refcount < 0) {
- return refcount;
- } else if (refcount != 0) {
- goto retry;
- }
- }
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
- size,
- (s->free_cluster_index - nb_clusters) << s->cluster_bits);
-#endif
- return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
-}
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
-{
- int64_t offset;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
- offset = alloc_clusters_noref(bs, size);
- if (offset < 0) {
- return offset;
- }
-
- ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- return ret;
- }
-
- return offset;
-}
-
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t cluster_index;
- uint64_t old_free_cluster_index;
- int i, refcount, ret;
-
- /* Check how many clusters there are free */
- cluster_index = offset >> s->cluster_bits;
- for(i = 0; i < nb_clusters; i++) {
- refcount = get_refcount(bs, cluster_index++);
-
- if (refcount < 0) {
- return refcount;
- } else if (refcount != 0) {
- break;
- }
- }
-
- /* And then allocate them */
- old_free_cluster_index = s->free_cluster_index;
- s->free_cluster_index = cluster_index + i;
-
- ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- return ret;
- }
-
- s->free_cluster_index = old_free_cluster_index;
-
- return i;
-}
-
-/* only used to allocate compressed sectors. We try to allocate
- contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t offset, cluster_offset;
- int free_in_cluster;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
- assert(size > 0 && size <= s->cluster_size);
- if (s->free_byte_offset == 0) {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
- }
- s->free_byte_offset = offset;
- }
- redo:
- free_in_cluster = s->cluster_size -
- (s->free_byte_offset & (s->cluster_size - 1));
- if (size <= free_in_cluster) {
- /* enough space in current cluster */
- offset = s->free_byte_offset;
- s->free_byte_offset += size;
- free_in_cluster -= size;
- if (free_in_cluster == 0)
- s->free_byte_offset = 0;
- if ((offset & (s->cluster_size - 1)) != 0)
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- } else {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
- }
- cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
- if ((cluster_offset + s->cluster_size) == offset) {
- /* we are lucky: contiguous data */
- offset = s->free_byte_offset;
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- s->free_byte_offset += size;
- } else {
- s->free_byte_offset = offset;
- goto redo;
- }
- }
-
- /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
- * or explicitly by update_cluster_refcount(). Refcount blocks must be
- * flushed before the caller's L2 table updates.
- */
- qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
- return offset;
-}
-
-void qcow2_free_clusters(BlockDriverState *bs,
- int64_t offset, int64_t size,
- enum qcow2_discard_type type)
-{
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
- ret = update_refcount(bs, offset, size, -1, type);
- if (ret < 0) {
- fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
- /* TODO Remember the clusters to free them later and avoid leaking */
- }
-}
-
-/*
- * Free a cluster using its L2 entry (handles clusters of all types, e.g.
- * normal cluster, compressed cluster, etc.)
- */
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
- int nb_clusters, enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
-
- switch (qcow2_get_cluster_type(l2_entry)) {
- case QCOW2_CLUSTER_COMPRESSED:
- {
- int nb_csectors;
- nb_csectors = ((l2_entry >> s->csize_shift) &
- s->csize_mask) + 1;
- qcow2_free_clusters(bs,
- (l2_entry & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, type);
- }
- break;
- case QCOW2_CLUSTER_NORMAL:
- qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
- nb_clusters << s->cluster_bits, type);
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- case QCOW2_CLUSTER_ZERO:
- break;
- default:
- abort();
- }
-}
-
-
-
-/*********************************************************/
-/* snapshots and image creation */
-
-
-
-/* update the refcounts of snapshots and the copied flag */
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
- int64_t l1_table_offset, int l1_size, int addend)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
- int64_t old_offset, old_l2_offset;
- int i, j, l1_modified = 0, nb_csectors, refcount;
- int ret;
-
- l2_table = NULL;
- l1_table = NULL;
- l1_size2 = l1_size * sizeof(uint64_t);
-
- s->cache_discards = true;
-
- /* WARNING: qcow2_snapshot_goto relies on this function not using the
- * l1_table_offset when it is the current s->l1_table_offset! Be careful
- * when changing this! */
- if (l1_table_offset != s->l1_table_offset) {
- l1_table = g_malloc0(align_offset(l1_size2, 512));
- l1_allocated = 1;
-
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
- if (ret < 0) {
- goto fail;
- }
-
- for(i = 0;i < l1_size; i++)
- be64_to_cpus(&l1_table[i]);
- } else {
- assert(l1_size == s->l1_size);
- l1_table = s->l1_table;
- l1_allocated = 0;
- }
-
- for(i = 0; i < l1_size; i++) {
- l2_offset = l1_table[i];
- if (l2_offset) {
- old_l2_offset = l2_offset;
- l2_offset &= L1E_OFFSET_MASK;
-
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
- (void**) &l2_table);
- if (ret < 0) {
- goto fail;
- }
-
- for(j = 0; j < s->l2_size; j++) {
- offset = be64_to_cpu(l2_table[j]);
- if (offset != 0) {
- old_offset = offset;
- offset &= ~QCOW_OFLAG_COPIED;
- if (offset & QCOW_OFLAG_COMPRESSED) {
- nb_csectors = ((offset >> s->csize_shift) &
- s->csize_mask) + 1;
- if (addend != 0) {
- int ret;
- ret = update_refcount(bs,
- (offset & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, addend,
- QCOW2_DISCARD_SNAPSHOT);
- if (ret < 0) {
- goto fail;
- }
- }
- /* compressed clusters are never modified */
- refcount = 2;
- } else {
- uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
- if (addend != 0) {
- refcount = update_cluster_refcount(bs, cluster_index, addend,
- QCOW2_DISCARD_SNAPSHOT);
- } else {
- refcount = get_refcount(bs, cluster_index);
- }
-
- if (refcount < 0) {
- ret = refcount;
- goto fail;
- }
- }
-
- if (refcount == 1) {
- offset |= QCOW_OFLAG_COPIED;
- }
- if (offset != old_offset) {
- if (addend > 0) {
- qcow2_cache_set_dependency(bs, s->l2_table_cache,
- s->refcount_block_cache);
- }
- l2_table[j] = cpu_to_be64(offset);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- }
- }
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- goto fail;
- }
-
-
- if (addend != 0) {
- refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
- QCOW2_DISCARD_SNAPSHOT);
- } else {
- refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
- }
- if (refcount < 0) {
- ret = refcount;
- goto fail;
- } else if (refcount == 1) {
- l2_offset |= QCOW_OFLAG_COPIED;
- }
- if (l2_offset != old_l2_offset) {
- l1_table[i] = l2_offset;
- l1_modified = 1;
- }
- }
- }
-
- ret = bdrv_flush(bs);
-fail:
- if (l2_table) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- }
-
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- /* Update L1 only if it isn't deleted anyway (addend = -1) */
- if (ret == 0 && addend >= 0 && l1_modified) {
- for (i = 0; i < l1_size; i++) {
- cpu_to_be64s(&l1_table[i]);
- }
-
- ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
-
- for (i = 0; i < l1_size; i++) {
- be64_to_cpus(&l1_table[i]);
- }
- }
- if (l1_allocated)
- g_free(l1_table);
- return ret;
-}
-
-
-
-
-/*********************************************************/
-/* refcount checking functions */
-
-
-
-/*
- * Increases the refcount for a range of clusters in a given refcount table.
- * This is used to construct a temporary refcount table out of L1 and L2 tables
- * which can be compared the the refcount table saved in the image.
- *
- * Modifies the number of errors in res.
- */
-static void inc_refcounts(BlockDriverState *bs,
- BdrvCheckResult *res,
- uint16_t *refcount_table,
- int refcount_table_size,
- int64_t offset, int64_t size)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t start, last, cluster_offset;
- int k;
-
- if (size <= 0)
- return;
-
- start = offset & ~(s->cluster_size - 1);
- last = (offset + size - 1) & ~(s->cluster_size - 1);
- for(cluster_offset = start; cluster_offset <= last;
- cluster_offset += s->cluster_size) {
- k = cluster_offset >> s->cluster_bits;
- if (k < 0) {
- fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
- cluster_offset);
- res->corruptions++;
- } else if (k >= refcount_table_size) {
- fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
- "the end of the image file, can't properly check refcounts.\n",
- cluster_offset);
- res->check_errors++;
- } else {
- if (++refcount_table[k] == 0) {
- fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
- "\n", cluster_offset);
- res->corruptions++;
- }
- }
- }
-}
-
-/* Flags for check_refcounts_l1() and check_refcounts_l2() */
-enum {
- CHECK_OFLAG_COPIED = 0x1, /* check QCOW_OFLAG_COPIED matches refcount */
- CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */
-};
-
-/*
- * Increases the refcount in the given refcount table for the all clusters
- * referenced in the L2 table. While doing so, performs some checks on L2
- * entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
- uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
- int flags)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table, l2_entry;
- uint64_t next_contiguous_offset = 0;
- int i, l2_size, nb_csectors, refcount;
-
- /* Read L2 table from disk */
- l2_size = s->l2_size * sizeof(uint64_t);
- l2_table = g_malloc(l2_size);
-
- if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
- goto fail;
-
- /* Do the actual checks */
- for(i = 0; i < s->l2_size; i++) {
- l2_entry = be64_to_cpu(l2_table[i]);
-
- switch (qcow2_get_cluster_type(l2_entry)) {
- case QCOW2_CLUSTER_COMPRESSED:
- /* Compressed clusters don't have QCOW_OFLAG_COPIED */
- if (l2_entry & QCOW_OFLAG_COPIED) {
- fprintf(stderr, "ERROR: cluster %" PRId64 ": "
- "copied flag must never be set for compressed "
- "clusters\n", l2_entry >> s->cluster_bits);
- l2_entry &= ~QCOW_OFLAG_COPIED;
- res->corruptions++;
- }
-
- /* Mark cluster as used */
- nb_csectors = ((l2_entry >> s->csize_shift) &
- s->csize_mask) + 1;
- l2_entry &= s->cluster_offset_mask;
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l2_entry & ~511, nb_csectors * 512);
-
- if (flags & CHECK_FRAG_INFO) {
- res->bfi.allocated_clusters++;
- res->bfi.compressed_clusters++;
-
- /* Compressed clusters are fragmented by nature. Since they
- * take up sub-sector space but we only have sector granularity
- * I/O we need to re-read the same sectors even for adjacent
- * compressed clusters.
- */
- res->bfi.fragmented_clusters++;
- }
- break;
-
- case QCOW2_CLUSTER_ZERO:
- if ((l2_entry & L2E_OFFSET_MASK) == 0) {
- break;
- }
- /* fall through */
-
- case QCOW2_CLUSTER_NORMAL:
- {
- /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
- uint64_t offset = l2_entry & L2E_OFFSET_MASK;
-
- if (flags & CHECK_OFLAG_COPIED) {
- refcount = get_refcount(bs, offset >> s->cluster_bits);
- if (refcount < 0) {
- fprintf(stderr, "Can't get refcount for offset %"
- PRIx64 ": %s\n", l2_entry, strerror(-refcount));
- goto fail;
- }
- if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
- fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
- PRIx64 " refcount=%d\n", l2_entry, refcount);
- res->corruptions++;
- }
- }
-
- if (flags & CHECK_FRAG_INFO) {
- res->bfi.allocated_clusters++;
- if (next_contiguous_offset &&
- offset != next_contiguous_offset) {
- res->bfi.fragmented_clusters++;
- }
- next_contiguous_offset = offset + s->cluster_size;
- }
-
- /* Mark cluster as used */
- inc_refcounts(bs, res, refcount_table,refcount_table_size,
- offset, s->cluster_size);
-
- /* Correct offsets are cluster aligned */
- if (offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
- "properly aligned; L2 entry corrupted.\n", offset);
- res->corruptions++;
- }
- break;
- }
-
- case QCOW2_CLUSTER_UNALLOCATED:
- break;
-
- default:
- abort();
- }
- }
-
- g_free(l2_table);
- return 0;
-
-fail:
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
- g_free(l2_table);
- return -EIO;
-}
-
-/*
- * Increases the refcount for the L1 table, its L2 tables and all referenced
- * clusters in the given refcount table. While doing so, performs some checks
- * on L1 and L2 entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l1(BlockDriverState *bs,
- BdrvCheckResult *res,
- uint16_t *refcount_table,
- int refcount_table_size,
- int64_t l1_table_offset, int l1_size,
- int flags)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, l2_offset, l1_size2;
- int i, refcount, ret;
-
- l1_size2 = l1_size * sizeof(uint64_t);
-
- /* Mark L1 table as used */
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l1_table_offset, l1_size2);
-
- /* Read L1 table entries from disk */
- if (l1_size2 == 0) {
- l1_table = NULL;
- } else {
- l1_table = g_malloc(l1_size2);
- if (bdrv_pread(bs->file, l1_table_offset,
- l1_table, l1_size2) != l1_size2)
- goto fail;
- for(i = 0;i < l1_size; i++)
- be64_to_cpus(&l1_table[i]);
- }
-
- /* Do the actual checks */
- for(i = 0; i < l1_size; i++) {
- l2_offset = l1_table[i];
- if (l2_offset) {
- /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
- if (flags & CHECK_OFLAG_COPIED) {
- refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
- >> s->cluster_bits);
- if (refcount < 0) {
- fprintf(stderr, "Can't get refcount for l2_offset %"
- PRIx64 ": %s\n", l2_offset, strerror(-refcount));
- goto fail;
- }
- if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
- fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
- " refcount=%d\n", l2_offset, refcount);
- res->corruptions++;
- }
- }
-
- /* Mark L2 table as used */
- l2_offset &= L1E_OFFSET_MASK;
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l2_offset, s->cluster_size);
-
- /* L2 tables are cluster aligned */
- if (l2_offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
- "cluster aligned; L1 entry corrupted\n", l2_offset);
- res->corruptions++;
- }
-
- /* Process and check L2 entries */
- ret = check_refcounts_l2(bs, res, refcount_table,
- refcount_table_size, l2_offset, flags);
- if (ret < 0) {
- goto fail;
- }
- }
- }
- g_free(l1_table);
- return 0;
-
-fail:
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
- res->check_errors++;
- g_free(l1_table);
- return -EIO;
-}
-
-/*
- * Checks an image for refcount consistency.
- *
- * Returns 0 if no errors are found, the number of errors in case the image is
- * detected as corrupted, and -errno when an internal error occurred.
- */
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t size, i, highest_cluster;
- int nb_clusters, refcount1, refcount2;
- QCowSnapshot *sn;
- uint16_t *refcount_table;
- int ret;
-
- size = bdrv_getlength(bs->file);
- nb_clusters = size_to_clusters(s, size);
- refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
-
- res->bfi.total_clusters =
- size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
-
- /* header */
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- 0, s->cluster_size);
-
- /* current L1 table */
- ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
- s->l1_table_offset, s->l1_size,
- CHECK_OFLAG_COPIED | CHECK_FRAG_INFO);
- if (ret < 0) {
- goto fail;
- }
-
- /* snapshots */
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
- sn->l1_table_offset, sn->l1_size, 0);
- if (ret < 0) {
- goto fail;
- }
- }
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- s->snapshots_offset, s->snapshots_size);
-
- /* refcount data */
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- s->refcount_table_offset,
- s->refcount_table_size * sizeof(uint64_t));
-
- for(i = 0; i < s->refcount_table_size; i++) {
- uint64_t offset, cluster;
- offset = s->refcount_table[i];
- cluster = offset >> s->cluster_bits;
-
- /* Refcount blocks are cluster aligned */
- if (offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
- "cluster aligned; refcount table entry corrupted\n", i);
- res->corruptions++;
- continue;
- }
-
- if (cluster >= nb_clusters) {
- fprintf(stderr, "ERROR refcount block %" PRId64
- " is outside image\n", i);
- res->corruptions++;
- continue;
- }
-
- if (offset != 0) {
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- offset, s->cluster_size);
- if (refcount_table[cluster] != 1) {
- fprintf(stderr, "ERROR refcount block %" PRId64
- " refcount=%d\n",
- i, refcount_table[cluster]);
- res->corruptions++;
- }
- }
- }
-
- /* compare ref counts */
- for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
- refcount1 = get_refcount(bs, i);
- if (refcount1 < 0) {
- fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
- i, strerror(-refcount1));
- res->check_errors++;
- continue;
- }
-
- refcount2 = refcount_table[i];
-
- if (refcount1 > 0 || refcount2 > 0) {
- highest_cluster = i;
- }
-
- if (refcount1 != refcount2) {
-
- /* Check if we're allowed to fix the mismatch */
- int *num_fixed = NULL;
- if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
- num_fixed = &res->leaks_fixed;
- } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
- num_fixed = &res->corruptions_fixed;
- }
-
- fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
- num_fixed != NULL ? "Repairing" :
- refcount1 < refcount2 ? "ERROR" :
- "Leaked",
- i, refcount1, refcount2);
-
- if (num_fixed) {
- ret = update_refcount(bs, i << s->cluster_bits, 1,
- refcount2 - refcount1,
- QCOW2_DISCARD_ALWAYS);
- if (ret >= 0) {
- (*num_fixed)++;
- continue;
- }
- }
-
- /* And if we couldn't, print an error */
- if (refcount1 < refcount2) {
- res->corruptions++;
- } else {
- res->leaks++;
- }
- }
- }
-
- res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
- ret = 0;
-
-fail:
- g_free(refcount_table);
-
- return ret;
-}
-
diff --git a/contrib/qemu/block/qcow2-snapshot.c b/contrib/qemu/block/qcow2-snapshot.c
deleted file mode 100644
index 0caac9055f8..00000000000
--- a/contrib/qemu/block/qcow2-snapshot.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-
-typedef struct QEMU_PACKED QCowSnapshotHeader {
- /* header is 8 byte aligned */
- uint64_t l1_table_offset;
-
- uint32_t l1_size;
- uint16_t id_str_size;
- uint16_t name_size;
-
- uint32_t date_sec;
- uint32_t date_nsec;
-
- uint64_t vm_clock_nsec;
-
- uint32_t vm_state_size;
- uint32_t extra_data_size; /* for extension */
- /* extra data follows */
- /* id_str follows */
- /* name follows */
-} QCowSnapshotHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotExtraData {
- uint64_t vm_state_size_large;
- uint64_t disk_size;
-} QCowSnapshotExtraData;
-
-void qcow2_free_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- g_free(s->snapshots[i].name);
- g_free(s->snapshots[i].id_str);
- }
- g_free(s->snapshots);
- s->snapshots = NULL;
- s->nb_snapshots = 0;
-}
-
-int qcow2_read_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshotHeader h;
- QCowSnapshotExtraData extra;
- QCowSnapshot *sn;
- int i, id_str_size, name_size;
- int64_t offset;
- uint32_t extra_data_size;
- int ret;
-
- if (!s->nb_snapshots) {
- s->snapshots = NULL;
- s->snapshots_size = 0;
- return 0;
- }
-
- offset = s->snapshots_offset;
- s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
-
- for(i = 0; i < s->nb_snapshots; i++) {
- /* Read statically sized part of the snapshot header */
- offset = align_offset(offset, 8);
- ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
- if (ret < 0) {
- goto fail;
- }
-
- offset += sizeof(h);
- sn = s->snapshots + i;
- sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
- sn->l1_size = be32_to_cpu(h.l1_size);
- sn->vm_state_size = be32_to_cpu(h.vm_state_size);
- sn->date_sec = be32_to_cpu(h.date_sec);
- sn->date_nsec = be32_to_cpu(h.date_nsec);
- sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
- extra_data_size = be32_to_cpu(h.extra_data_size);
-
- id_str_size = be16_to_cpu(h.id_str_size);
- name_size = be16_to_cpu(h.name_size);
-
- /* Read extra data */
- ret = bdrv_pread(bs->file, offset, &extra,
- MIN(sizeof(extra), extra_data_size));
- if (ret < 0) {
- goto fail;
- }
- offset += extra_data_size;
-
- if (extra_data_size >= 8) {
- sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
- }
-
- if (extra_data_size >= 16) {
- sn->disk_size = be64_to_cpu(extra.disk_size);
- } else {
- sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- }
-
- /* Read snapshot ID */
- sn->id_str = g_malloc(id_str_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
- if (ret < 0) {
- goto fail;
- }
- offset += id_str_size;
- sn->id_str[id_str_size] = '\0';
-
- /* Read snapshot name */
- sn->name = g_malloc(name_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->name, name_size);
- if (ret < 0) {
- goto fail;
- }
- offset += name_size;
- sn->name[name_size] = '\0';
- }
-
- s->snapshots_size = offset - s->snapshots_offset;
- return 0;
-
-fail:
- qcow2_free_snapshots(bs);
- return ret;
-}
-
-/* add at the end of the file a new list of snapshots */
-static int qcow2_write_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- QCowSnapshotHeader h;
- QCowSnapshotExtraData extra;
- int i, name_size, id_str_size, snapshots_size;
- struct {
- uint32_t nb_snapshots;
- uint64_t snapshots_offset;
- } QEMU_PACKED header_data;
- int64_t offset, snapshots_offset;
- int ret;
-
- /* compute the size of the snapshots */
- offset = 0;
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- offset = align_offset(offset, 8);
- offset += sizeof(h);
- offset += sizeof(extra);
- offset += strlen(sn->id_str);
- offset += strlen(sn->name);
- }
- snapshots_size = offset;
-
- /* Allocate space for the new snapshot list */
- snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
- offset = snapshots_offset;
- if (offset < 0) {
- return offset;
- }
- ret = bdrv_flush(bs);
- if (ret < 0) {
- return ret;
- }
-
- /* Write all snapshots to the new list */
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- memset(&h, 0, sizeof(h));
- h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
- h.l1_size = cpu_to_be32(sn->l1_size);
- /* If it doesn't fit in 32 bit, older implementations should treat it
- * as a disk-only snapshot rather than truncate the VM state */
- if (sn->vm_state_size <= 0xffffffff) {
- h.vm_state_size = cpu_to_be32(sn->vm_state_size);
- }
- h.date_sec = cpu_to_be32(sn->date_sec);
- h.date_nsec = cpu_to_be32(sn->date_nsec);
- h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
- h.extra_data_size = cpu_to_be32(sizeof(extra));
-
- memset(&extra, 0, sizeof(extra));
- extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
- extra.disk_size = cpu_to_be64(sn->disk_size);
-
- id_str_size = strlen(sn->id_str);
- name_size = strlen(sn->name);
- h.id_str_size = cpu_to_be16(id_str_size);
- h.name_size = cpu_to_be16(name_size);
- offset = align_offset(offset, 8);
-
- ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
- if (ret < 0) {
- goto fail;
- }
- offset += sizeof(h);
-
- ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
- if (ret < 0) {
- goto fail;
- }
- offset += sizeof(extra);
-
- ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
- if (ret < 0) {
- goto fail;
- }
- offset += id_str_size;
-
- ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
- if (ret < 0) {
- goto fail;
- }
- offset += name_size;
- }
-
- /*
- * Update the header to point to the new snapshot table. This requires the
- * new table and its refcounts to be stable on disk.
- */
- ret = bdrv_flush(bs);
- if (ret < 0) {
- goto fail;
- }
-
- QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
- offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
-
- header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
- header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
-
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
- &header_data, sizeof(header_data));
- if (ret < 0) {
- goto fail;
- }
-
- /* free the old snapshot table */
- qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
- QCOW2_DISCARD_SNAPSHOT);
- s->snapshots_offset = snapshots_offset;
- s->snapshots_size = snapshots_size;
- return 0;
-
-fail:
- return ret;
-}
-
-static void find_new_snapshot_id(BlockDriverState *bs,
- char *id_str, int id_str_size)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- int i, id, id_max = 0;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- id = strtoul(sn->id_str, NULL, 10);
- if (id > id_max)
- id_max = id;
- }
- snprintf(id_str, id_str_size, "%d", id_max + 1);
-}
-
-static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- if (!strcmp(s->snapshots[i].id_str, id_str))
- return i;
- }
- return -1;
-}
-
-static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
-{
- BDRVQcowState *s = bs->opaque;
- int i, ret;
-
- ret = find_snapshot_by_id(bs, name);
- if (ret >= 0)
- return ret;
- for(i = 0; i < s->nb_snapshots; i++) {
- if (!strcmp(s->snapshots[i].name, name))
- return i;
- }
- return -1;
-}
-
-/* if no id is provided, a new one is constructed */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *new_snapshot_list = NULL;
- QCowSnapshot *old_snapshot_list = NULL;
- QCowSnapshot sn1, *sn = &sn1;
- int i, ret;
- uint64_t *l1_table = NULL;
- int64_t l1_table_offset;
-
- memset(sn, 0, sizeof(*sn));
-
- /* Generate an ID if it wasn't passed */
- if (sn_info->id_str[0] == '\0') {
- find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
- }
-
- /* Check that the ID is unique */
- if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
- return -EEXIST;
- }
-
- /* Populate sn with passed data */
- sn->id_str = g_strdup(sn_info->id_str);
- sn->name = g_strdup(sn_info->name);
-
- sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- sn->vm_state_size = sn_info->vm_state_size;
- sn->date_sec = sn_info->date_sec;
- sn->date_nsec = sn_info->date_nsec;
- sn->vm_clock_nsec = sn_info->vm_clock_nsec;
-
- /* Allocate the L1 table of the snapshot and copy the current one there. */
- l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
- if (l1_table_offset < 0) {
- ret = l1_table_offset;
- goto fail;
- }
-
- sn->l1_table_offset = l1_table_offset;
- sn->l1_size = s->l1_size;
-
- l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
- for(i = 0; i < s->l1_size; i++) {
- l1_table[i] = cpu_to_be64(s->l1_table[i]);
- }
-
- ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
-
- g_free(l1_table);
- l1_table = NULL;
-
- /*
- * Increase the refcounts of all clusters and make sure everything is
- * stable on disk before updating the snapshot table to contain a pointer
- * to the new L1 table.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
- if (ret < 0) {
- goto fail;
- }
-
- /* Append the new snapshot to the snapshot list */
- new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
- if (s->snapshots) {
- memcpy(new_snapshot_list, s->snapshots,
- s->nb_snapshots * sizeof(QCowSnapshot));
- old_snapshot_list = s->snapshots;
- }
- s->snapshots = new_snapshot_list;
- s->snapshots[s->nb_snapshots++] = *sn;
-
- ret = qcow2_write_snapshots(bs);
- if (ret < 0) {
- g_free(s->snapshots);
- s->snapshots = old_snapshot_list;
- goto fail;
- }
-
- g_free(old_snapshot_list);
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-
-fail:
- g_free(sn->id_str);
- g_free(sn->name);
- g_free(l1_table);
-
- return ret;
-}
-
-/* copy the snapshot 'snapshot_name' into the current disk image */
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- int i, snapshot_index;
- int cur_l1_bytes, sn_l1_bytes;
- int ret;
- uint64_t *sn_l1_table = NULL;
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = &s->snapshots[snapshot_index];
-
- if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
- error_report("qcow2: Loading snapshots with different disk "
- "size is not implemented");
- ret = -ENOTSUP;
- goto fail;
- }
-
- /*
- * Make sure that the current L1 table is big enough to contain the whole
- * L1 table of the snapshot. If the snapshot L1 table is smaller, the
- * current one must be padded with zeros.
- */
- ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
- if (ret < 0) {
- goto fail;
- }
-
- cur_l1_bytes = s->l1_size * sizeof(uint64_t);
- sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
-
- /*
- * Copy the snapshot L1 table to the current L1 table.
- *
- * Before overwriting the old current L1 table on disk, make sure to
- * increase all refcounts for the clusters referenced by the new one.
- * Decrease the refcount referenced by the old one only when the L1
- * table is overwritten.
- */
- sn_l1_table = g_malloc0(cur_l1_bytes);
-
- ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
- if (ret < 0) {
- goto fail;
- }
-
- ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
- sn->l1_size, 1);
- if (ret < 0) {
- goto fail;
- }
-
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
- cur_l1_bytes);
- if (ret < 0) {
- goto fail;
- }
-
- /*
- * Decrease refcount of clusters of current L1 table.
- *
- * At this point, the in-memory s->l1_table points to the old L1 table,
- * whereas on disk we already have the new one.
- *
- * qcow2_update_snapshot_refcount special cases the current L1 table to use
- * the in-memory data instead of really using the offset to load a new one,
- * which is why this works.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
- s->l1_size, -1);
-
- /*
- * Now update the in-memory L1 table to be in sync with the on-disk one. We
- * need to do this even if updating refcounts failed.
- */
- for(i = 0;i < s->l1_size; i++) {
- s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
- }
-
- if (ret < 0) {
- goto fail;
- }
-
- g_free(sn_l1_table);
- sn_l1_table = NULL;
-
- /*
- * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
- * when we decreased the refcount of the old snapshot.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
- if (ret < 0) {
- goto fail;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-
-fail:
- g_free(sn_l1_table);
- return ret;
-}
-
-int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot sn;
- int snapshot_index, ret;
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = s->snapshots[snapshot_index];
-
- /* Remove it from the snapshot list */
- memmove(s->snapshots + snapshot_index,
- s->snapshots + snapshot_index + 1,
- (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
- s->nb_snapshots--;
- ret = qcow2_write_snapshots(bs);
- if (ret < 0) {
- return ret;
- }
-
- /*
- * The snapshot is now unused, clean up. If we fail after this point, we
- * won't recover but just leak clusters.
- */
- g_free(sn.id_str);
- g_free(sn.name);
-
- /*
- * Now decrease the refcounts of clusters referenced by the snapshot and
- * free the L1 table.
- */
- ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
- sn.l1_size, -1);
- if (ret < 0) {
- return ret;
- }
- qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
- QCOW2_DISCARD_SNAPSHOT);
-
- /* must update the copied flag on the current cluster offsets */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
- if (ret < 0) {
- return ret;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-}
-
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
-{
- BDRVQcowState *s = bs->opaque;
- QEMUSnapshotInfo *sn_tab, *sn_info;
- QCowSnapshot *sn;
- int i;
-
- if (!s->nb_snapshots) {
- *psn_tab = NULL;
- return s->nb_snapshots;
- }
-
- sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
- for(i = 0; i < s->nb_snapshots; i++) {
- sn_info = sn_tab + i;
- sn = s->snapshots + i;
- pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
- sn->id_str);
- pstrcpy(sn_info->name, sizeof(sn_info->name),
- sn->name);
- sn_info->vm_state_size = sn->vm_state_size;
- sn_info->date_sec = sn->date_sec;
- sn_info->date_nsec = sn->date_nsec;
- sn_info->vm_clock_nsec = sn->vm_clock_nsec;
- }
- *psn_tab = sn_tab;
- return s->nb_snapshots;
-}
-
-int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
-{
- int i, snapshot_index;
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- uint64_t *new_l1_table;
- int new_l1_bytes;
- int ret;
-
- assert(bs->read_only);
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = &s->snapshots[snapshot_index];
-
- /* Allocate and read in the snapshot's L1 table */
- new_l1_bytes = s->l1_size * sizeof(uint64_t);
- new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
-
- ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
- if (ret < 0) {
- g_free(new_l1_table);
- return ret;
- }
-
- /* Switch the L1 table */
- g_free(s->l1_table);
-
- s->l1_size = sn->l1_size;
- s->l1_table_offset = sn->l1_table_offset;
- s->l1_table = new_l1_table;
-
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
-
- return 0;
-}
diff --git a/contrib/qemu/block/qcow2.c b/contrib/qemu/block/qcow2.c
deleted file mode 100644
index 0eceefe2cd9..00000000000
--- a/contrib/qemu/block/qcow2.c
+++ /dev/null
@@ -1,1825 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "qemu/aes.h"
-#include "block/qcow2.h"
-#include "qemu/error-report.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qbool.h"
-#include "trace.h"
-
-/*
- Differences with QCOW:
-
- - Support for multiple incremental snapshots.
- - Memory management by reference counts.
- - Clusters which have a reference count of one have the bit
- QCOW_OFLAG_COPIED to optimize write performance.
- - Size of compressed clusters is stored in sectors to reduce bit usage
- in the cluster offsets.
- - Support for storing additional data (such as the VM state) in the
- snapshots.
- - If a backing store is used, the cluster size is not constrained
- (could be backported to QCOW).
- - L2 tables have always a size of one cluster.
-*/
-
-
-typedef struct {
- uint32_t magic;
- uint32_t len;
-} QCowExtension;
-
-#define QCOW2_EXT_MAGIC_END 0
-#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
-#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
-
-static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
- const QCowHeader *cow_header = (const void *)buf;
-
- if (buf_size >= sizeof(QCowHeader) &&
- be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
- be32_to_cpu(cow_header->version) >= 2)
- return 100;
- else
- return 0;
-}
-
-
-/*
- * read qcow2 extension and fill bs
- * start reading from start_offset
- * finish reading upon magic of value 0 or when end_offset reached
- * unknown magic is skipped (future extension this version knows nothing about)
- * return 0 upon success, non-0 otherwise
- */
-static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
- uint64_t end_offset, void **p_feature_table)
-{
- BDRVQcowState *s = bs->opaque;
- QCowExtension ext;
- uint64_t offset;
- int ret;
-
-#ifdef DEBUG_EXT
- printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
-#endif
- offset = start_offset;
- while (offset < end_offset) {
-
-#ifdef DEBUG_EXT
- /* Sanity check */
- if (offset > s->cluster_size)
- printf("qcow2_read_extension: suspicious offset %lu\n", offset);
-
- printf("attempting to read extended header in offset %lu\n", offset);
-#endif
-
- if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
- fprintf(stderr, "qcow2_read_extension: ERROR: "
- "pread fail from offset %" PRIu64 "\n",
- offset);
- return 1;
- }
- be32_to_cpus(&ext.magic);
- be32_to_cpus(&ext.len);
- offset += sizeof(ext);
-#ifdef DEBUG_EXT
- printf("ext.magic = 0x%x\n", ext.magic);
-#endif
- if (ext.len > end_offset - offset) {
- error_report("Header extension too large");
- return -EINVAL;
- }
-
- switch (ext.magic) {
- case QCOW2_EXT_MAGIC_END:
- return 0;
-
- case QCOW2_EXT_MAGIC_BACKING_FORMAT:
- if (ext.len >= sizeof(bs->backing_format)) {
- fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
- " (>=%zu)\n",
- ext.len, sizeof(bs->backing_format));
- return 2;
- }
- if (bdrv_pread(bs->file, offset , bs->backing_format,
- ext.len) != ext.len)
- return 3;
- bs->backing_format[ext.len] = '\0';
-#ifdef DEBUG_EXT
- printf("Qcow2: Got format extension %s\n", bs->backing_format);
-#endif
- break;
-
- case QCOW2_EXT_MAGIC_FEATURE_TABLE:
- if (p_feature_table != NULL) {
- void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
- ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
- if (ret < 0) {
- return ret;
- }
-
- *p_feature_table = feature_table;
- }
- break;
-
- default:
- /* unknown magic - save it in case we need to rewrite the header */
- {
- Qcow2UnknownHeaderExtension *uext;
-
- uext = g_malloc0(sizeof(*uext) + ext.len);
- uext->magic = ext.magic;
- uext->len = ext.len;
- QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
-
- ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
- if (ret < 0) {
- return ret;
- }
- }
- break;
- }
-
- offset += ((ext.len + 7) & ~7);
- }
-
- return 0;
-}
-
-static void cleanup_unknown_header_ext(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2UnknownHeaderExtension *uext, *next;
-
- QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
- QLIST_REMOVE(uext, next);
- g_free(uext);
- }
-}
-
-static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs,
- const char *fmt, ...)
-{
- char msg[64];
- va_list ap;
-
- va_start(ap, fmt);
- vsnprintf(msg, sizeof(msg), fmt, ap);
- va_end(ap);
-
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "qcow2", msg);
-}
-
-static void report_unsupported_feature(BlockDriverState *bs,
- Qcow2Feature *table, uint64_t mask)
-{
- while (table && table->name[0] != '\0') {
- if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
- if (mask & (1 << table->bit)) {
- report_unsupported(bs, "%.46s",table->name);
- mask &= ~(1 << table->bit);
- }
- }
- table++;
- }
-
- if (mask) {
- report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask);
- }
-}
-
-/*
- * Sets the dirty bit and flushes afterwards if necessary.
- *
- * The incompatible_features bit is only set if the image file header was
- * updated successfully. Therefore it is not required to check the return
- * value of this function.
- */
-int qcow2_mark_dirty(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t val;
- int ret;
-
- assert(s->qcow_version >= 3);
-
- if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
- return 0; /* already dirty */
- }
-
- val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
- ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
- &val, sizeof(val));
- if (ret < 0) {
- return ret;
- }
- ret = bdrv_flush(bs->file);
- if (ret < 0) {
- return ret;
- }
-
- /* Only treat image as dirty if the header was updated successfully */
- s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
- return 0;
-}
-
-/*
- * Clears the dirty bit and flushes before if necessary. Only call this
- * function when there are no pending requests, it does not guard against
- * concurrent requests dirtying the image.
- */
-static int qcow2_mark_clean(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
-
- if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
- int ret = bdrv_flush(bs);
- if (ret < 0) {
- return ret;
- }
-
- s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
- return qcow2_update_header(bs);
- }
- return 0;
-}
-
-static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
- BdrvCheckMode fix)
-{
- int ret = qcow2_check_refcounts(bs, result, fix);
- if (ret < 0) {
- return ret;
- }
-
- if (fix && result->check_errors == 0 && result->corruptions == 0) {
- return qcow2_mark_clean(bs);
- }
- return ret;
-}
-
-static QemuOptsList qcow2_runtime_opts = {
- .name = "qcow2",
- .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
- .desc = {
- {
- .name = "lazy_refcounts",
- .type = QEMU_OPT_BOOL,
- .help = "Postpone refcount updates",
- },
- {
- .name = QCOW2_OPT_DISCARD_REQUEST,
- .type = QEMU_OPT_BOOL,
- .help = "Pass guest discard requests to the layer below",
- },
- {
- .name = QCOW2_OPT_DISCARD_SNAPSHOT,
- .type = QEMU_OPT_BOOL,
- .help = "Generate discard requests when snapshot related space "
- "is freed",
- },
- {
- .name = QCOW2_OPT_DISCARD_OTHER,
- .type = QEMU_OPT_BOOL,
- .help = "Generate discard requests when other clusters are freed",
- },
- { /* end of list */ }
- },
-};
-
-static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQcowState *s = bs->opaque;
- int len, i, ret = 0;
- QCowHeader header;
- QemuOpts *opts;
- Error *local_err = NULL;
- uint64_t ext_end;
- uint64_t l1_vm_state_index;
-
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
- if (ret < 0) {
- goto fail;
- }
- be32_to_cpus(&header.magic);
- be32_to_cpus(&header.version);
- be64_to_cpus(&header.backing_file_offset);
- be32_to_cpus(&header.backing_file_size);
- be64_to_cpus(&header.size);
- be32_to_cpus(&header.cluster_bits);
- be32_to_cpus(&header.crypt_method);
- be64_to_cpus(&header.l1_table_offset);
- be32_to_cpus(&header.l1_size);
- be64_to_cpus(&header.refcount_table_offset);
- be32_to_cpus(&header.refcount_table_clusters);
- be64_to_cpus(&header.snapshots_offset);
- be32_to_cpus(&header.nb_snapshots);
-
- if (header.magic != QCOW_MAGIC) {
- ret = -EMEDIUMTYPE;
- goto fail;
- }
- if (header.version < 2 || header.version > 3) {
- report_unsupported(bs, "QCOW version %d", header.version);
- ret = -ENOTSUP;
- goto fail;
- }
-
- s->qcow_version = header.version;
-
- /* Initialise version 3 header fields */
- if (header.version == 2) {
- header.incompatible_features = 0;
- header.compatible_features = 0;
- header.autoclear_features = 0;
- header.refcount_order = 4;
- header.header_length = 72;
- } else {
- be64_to_cpus(&header.incompatible_features);
- be64_to_cpus(&header.compatible_features);
- be64_to_cpus(&header.autoclear_features);
- be32_to_cpus(&header.refcount_order);
- be32_to_cpus(&header.header_length);
- }
-
- if (header.header_length > sizeof(header)) {
- s->unknown_header_fields_size = header.header_length - sizeof(header);
- s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
- ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
- s->unknown_header_fields_size);
- if (ret < 0) {
- goto fail;
- }
- }
-
- if (header.backing_file_offset) {
- ext_end = header.backing_file_offset;
- } else {
- ext_end = 1 << header.cluster_bits;
- }
-
- /* Handle feature bits */
- s->incompatible_features = header.incompatible_features;
- s->compatible_features = header.compatible_features;
- s->autoclear_features = header.autoclear_features;
-
- if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
- void *feature_table = NULL;
- qcow2_read_extensions(bs, header.header_length, ext_end,
- &feature_table);
- report_unsupported_feature(bs, feature_table,
- s->incompatible_features &
- ~QCOW2_INCOMPAT_MASK);
- ret = -ENOTSUP;
- goto fail;
- }
-
- /* Check support for various header values */
- if (header.refcount_order != 4) {
- report_unsupported(bs, "%d bit reference counts",
- 1 << header.refcount_order);
- ret = -ENOTSUP;
- goto fail;
- }
-
- if (header.cluster_bits < MIN_CLUSTER_BITS ||
- header.cluster_bits > MAX_CLUSTER_BITS) {
- ret = -EINVAL;
- goto fail;
- }
- if (header.crypt_method > QCOW_CRYPT_AES) {
- ret = -EINVAL;
- goto fail;
- }
- s->crypt_method_header = header.crypt_method;
- if (s->crypt_method_header) {
- bs->encrypted = 1;
- }
- s->cluster_bits = header.cluster_bits;
- s->cluster_size = 1 << s->cluster_bits;
- s->cluster_sectors = 1 << (s->cluster_bits - 9);
- s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
- s->l2_size = 1 << s->l2_bits;
- bs->total_sectors = header.size / 512;
- s->csize_shift = (62 - (s->cluster_bits - 8));
- s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
- s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
- s->refcount_table_offset = header.refcount_table_offset;
- s->refcount_table_size =
- header.refcount_table_clusters << (s->cluster_bits - 3);
-
- s->snapshots_offset = header.snapshots_offset;
- s->nb_snapshots = header.nb_snapshots;
-
- /* read the level 1 table */
- s->l1_size = header.l1_size;
-
- l1_vm_state_index = size_to_l1(s, header.size);
- if (l1_vm_state_index > INT_MAX) {
- ret = -EFBIG;
- goto fail;
- }
- s->l1_vm_state_index = l1_vm_state_index;
-
- /* the L1 table must contain at least enough entries to put
- header.size bytes */
- if (s->l1_size < s->l1_vm_state_index) {
- ret = -EINVAL;
- goto fail;
- }
- s->l1_table_offset = header.l1_table_offset;
- if (s->l1_size > 0) {
- s->l1_table = g_malloc0(
- align_offset(s->l1_size * sizeof(uint64_t), 512));
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
- }
-
- /* alloc L2 table/refcount block cache */
- s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
- s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
-
- s->cluster_cache = g_malloc(s->cluster_size);
- /* one more sector for decompressed data alignment */
- s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
- + 512);
- s->cluster_cache_offset = -1;
- s->flags = flags;
-
- ret = qcow2_refcount_init(bs);
- if (ret != 0) {
- goto fail;
- }
-
- QLIST_INIT(&s->cluster_allocs);
- QTAILQ_INIT(&s->discards);
-
- /* read qcow2 extensions */
- if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
- ret = -EINVAL;
- goto fail;
- }
-
- /* read the backing file name */
- if (header.backing_file_offset != 0) {
- len = header.backing_file_size;
- if (len > 1023) {
- len = 1023;
- }
- ret = bdrv_pread(bs->file, header.backing_file_offset,
- bs->backing_file, len);
- if (ret < 0) {
- goto fail;
- }
- bs->backing_file[len] = '\0';
- }
-
- ret = qcow2_read_snapshots(bs);
- if (ret < 0) {
- goto fail;
- }
-
- /* Clear unknown autoclear feature bits */
- if (!bs->read_only && s->autoclear_features != 0) {
- s->autoclear_features = 0;
- ret = qcow2_update_header(bs);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* Initialise locks */
- qemu_co_mutex_init(&s->lock);
-
- /* Repair image if dirty */
- if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
- (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
- BdrvCheckResult result = {0};
-
- ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* Enable lazy_refcounts according to image and command line options */
- opts = qemu_opts_create_nofail(&qcow2_runtime_opts);
- qemu_opts_absorb_qdict(opts, options, &local_err);
- if (error_is_set(&local_err)) {
- qerror_report_err(local_err);
- error_free(local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
- (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
-
- s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
- s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
- s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
- flags & BDRV_O_UNMAP);
- s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
- s->discard_passthrough[QCOW2_DISCARD_OTHER] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
-
- qemu_opts_del(opts);
-
- if (s->use_lazy_refcounts && s->qcow_version < 3) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require "
- "a qcow2 image with at least qemu 1.1 compatibility level");
- ret = -EINVAL;
- goto fail;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return ret;
-
- fail:
- g_free(s->unknown_header_fields);
- cleanup_unknown_header_ext(bs);
- qcow2_free_snapshots(bs);
- qcow2_refcount_close(bs);
- g_free(s->l1_table);
- if (s->l2_table_cache) {
- qcow2_cache_destroy(bs, s->l2_table_cache);
- }
- g_free(s->cluster_cache);
- qemu_vfree(s->cluster_data);
- return ret;
-}
-
-static int qcow2_set_key(BlockDriverState *bs, const char *key)
-{
- BDRVQcowState *s = bs->opaque;
- uint8_t keybuf[16];
- int len, i;
-
- memset(keybuf, 0, 16);
- len = strlen(key);
- if (len > 16)
- len = 16;
- /* XXX: we could compress the chars to 7 bits to increase
- entropy */
- for(i = 0;i < len;i++) {
- keybuf[i] = key[i];
- }
- s->crypt_method = s->crypt_method_header;
-
- if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
- return -1;
- if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
- return -1;
-#if 0
- /* test */
- {
- uint8_t in[16];
- uint8_t out[16];
- uint8_t tmp[16];
- for(i=0;i<16;i++)
- in[i] = i;
- AES_encrypt(in, tmp, &s->aes_encrypt_key);
- AES_decrypt(tmp, out, &s->aes_decrypt_key);
- for(i = 0; i < 16; i++)
- printf(" %02x", tmp[i]);
- printf("\n");
- for(i = 0; i < 16; i++)
- printf(" %02x", out[i]);
- printf("\n");
- }
-#endif
- return 0;
-}
-
-/* We have nothing to do for QCOW2 reopen, stubs just return
- * success */
-static int qcow2_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t cluster_offset;
- int ret;
-
- *pnum = nb_sectors;
- /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
- * can't pass them on today */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
- qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- *pnum = 0;
- }
-
- return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
-}
-
-/* handle reading after the end of the backing file */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors)
-{
- int n1;
- if ((sector_num + nb_sectors) <= bs->total_sectors)
- return nb_sectors;
- if (sector_num >= bs->total_sectors)
- n1 = 0;
- else
- n1 = bs->total_sectors - sector_num;
-
- qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
-
- return n1;
-}
-
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
- int remaining_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster, n1;
- int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
- uint64_t cluster_offset = 0;
- uint64_t bytes_done = 0;
- QEMUIOVector hd_qiov;
- uint8_t *cluster_data = NULL;
-
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
- qemu_co_mutex_lock(&s->lock);
-
- while (remaining_sectors != 0) {
-
- /* prepare next request */
- cur_nr_sectors = remaining_sectors;
- if (s->crypt_method) {
- cur_nr_sectors = MIN(cur_nr_sectors,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
- }
-
- ret = qcow2_get_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset);
- if (ret < 0) {
- goto fail;
- }
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
-
- switch (ret) {
- case QCOW2_CLUSTER_UNALLOCATED:
-
- if (bs->backing_hd) {
- /* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
- sector_num, cur_nr_sectors);
- if (n1 > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n1, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- }
- } else {
- /* Note: in this case, no need to wait */
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
- }
- break;
-
- case QCOW2_CLUSTER_ZERO:
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
- break;
-
- case QCOW2_CLUSTER_COMPRESSED:
- /* add AIO support for compressed blocks ? */
- ret = qcow2_decompress_cluster(bs, cluster_offset);
- if (ret < 0) {
- goto fail;
- }
-
- qemu_iovec_from_buf(&hd_qiov, 0,
- s->cluster_cache + index_in_cluster * 512,
- 512 * cur_nr_sectors);
- break;
-
- case QCOW2_CLUSTER_NORMAL:
- if ((cluster_offset & 511) != 0) {
- ret = -EIO;
- goto fail;
- }
-
- if (s->crypt_method) {
- /*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
- if (!cluster_data) {
- cluster_data =
- qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- }
-
- assert(cur_nr_sectors <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- 512 * cur_nr_sectors);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
- qemu_iovec_from_buf(qiov, bytes_done,
- cluster_data, 512 * cur_nr_sectors);
- }
- break;
-
- default:
- g_assert_not_reached();
- ret = -EIO;
- goto fail;
- }
-
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- }
- ret = 0;
-
-fail:
- qemu_co_mutex_unlock(&s->lock);
-
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
-
- return ret;
-}
-
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
- int64_t sector_num,
- int remaining_sectors,
- QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- int n_end;
- int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
- uint64_t cluster_offset;
- QEMUIOVector hd_qiov;
- uint64_t bytes_done = 0;
- uint8_t *cluster_data = NULL;
- QCowL2Meta *l2meta = NULL;
-
- trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
- remaining_sectors);
-
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
- s->cluster_cache_offset = -1; /* disable compressed cache */
-
- qemu_co_mutex_lock(&s->lock);
-
- while (remaining_sectors != 0) {
-
- l2meta = NULL;
-
- trace_qcow2_writev_start_part(qemu_coroutine_self());
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n_end = index_in_cluster + remaining_sectors;
- if (s->crypt_method &&
- n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
- n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
- }
-
- ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
- index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
- if (ret < 0) {
- goto fail;
- }
-
- assert((cluster_offset & 511) == 0);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
-
- if (s->crypt_method) {
- if (!cluster_data) {
- cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
- s->cluster_size);
- }
-
- assert(hd_qiov.size <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
-
- qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- cur_nr_sectors * 512);
- }
-
- qemu_co_mutex_unlock(&s->lock);
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- trace_qcow2_writev_data(qemu_coroutine_self(),
- (cluster_offset >> 9) + index_in_cluster);
- ret = bdrv_co_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
-
- while (l2meta != NULL) {
- QCowL2Meta *next;
-
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
- if (ret < 0) {
- goto fail;
- }
-
- /* Take the request off the list of running requests */
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
-
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
- }
-
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
- }
- ret = 0;
-
-fail:
- qemu_co_mutex_unlock(&s->lock);
-
- while (l2meta != NULL) {
- QCowL2Meta *next;
-
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
- }
-
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
- trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
-
- return ret;
-}
-
-static void qcow2_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- g_free(s->l1_table);
-
- qcow2_cache_flush(bs, s->l2_table_cache);
- qcow2_cache_flush(bs, s->refcount_block_cache);
-
- qcow2_mark_clean(bs);
-
- qcow2_cache_destroy(bs, s->l2_table_cache);
- qcow2_cache_destroy(bs, s->refcount_block_cache);
-
- g_free(s->unknown_header_fields);
- cleanup_unknown_header_ext(bs);
-
- g_free(s->cluster_cache);
- qemu_vfree(s->cluster_data);
- qcow2_refcount_close(bs);
- qcow2_free_snapshots(bs);
-}
-
-static void qcow2_invalidate_cache(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int flags = s->flags;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- uint32_t crypt_method = 0;
- QDict *options;
-
- /*
- * Backing files are read-only which makes all of their metadata immutable,
- * that means we don't have to worry about reopening them here.
- */
-
- if (s->crypt_method) {
- crypt_method = s->crypt_method;
- memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
- memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
- }
-
- qcow2_close(bs);
-
- options = qdict_new();
- qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS,
- qbool_from_int(s->use_lazy_refcounts));
-
- memset(s, 0, sizeof(BDRVQcowState));
- qcow2_open(bs, options, flags);
-
- QDECREF(options);
-
- if (crypt_method) {
- s->crypt_method = crypt_method;
- memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
- memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
- }
-}
-
-static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
- size_t len, size_t buflen)
-{
- QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
- size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
-
- if (buflen < ext_len) {
- return -ENOSPC;
- }
-
- *ext_backing_fmt = (QCowExtension) {
- .magic = cpu_to_be32(magic),
- .len = cpu_to_be32(len),
- };
- memcpy(buf + sizeof(QCowExtension), s, len);
-
- return ext_len;
-}
-
-/*
- * Updates the qcow2 header, including the variable length parts of it, i.e.
- * the backing file name and all extensions. qcow2 was not designed to allow
- * such changes, so if we run out of space (we can only use the first cluster)
- * this function may fail.
- *
- * Returns 0 on success, -errno in error cases.
- */
-int qcow2_update_header(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowHeader *header;
- char *buf;
- size_t buflen = s->cluster_size;
- int ret;
- uint64_t total_size;
- uint32_t refcount_table_clusters;
- size_t header_length;
- Qcow2UnknownHeaderExtension *uext;
-
- buf = qemu_blockalign(bs, buflen);
-
- /* Header structure */
- header = (QCowHeader*) buf;
-
- if (buflen < sizeof(*header)) {
- ret = -ENOSPC;
- goto fail;
- }
-
- header_length = sizeof(*header) + s->unknown_header_fields_size;
- total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
-
- *header = (QCowHeader) {
- /* Version 2 fields */
- .magic = cpu_to_be32(QCOW_MAGIC),
- .version = cpu_to_be32(s->qcow_version),
- .backing_file_offset = 0,
- .backing_file_size = 0,
- .cluster_bits = cpu_to_be32(s->cluster_bits),
- .size = cpu_to_be64(total_size),
- .crypt_method = cpu_to_be32(s->crypt_method_header),
- .l1_size = cpu_to_be32(s->l1_size),
- .l1_table_offset = cpu_to_be64(s->l1_table_offset),
- .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
- .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
- .nb_snapshots = cpu_to_be32(s->nb_snapshots),
- .snapshots_offset = cpu_to_be64(s->snapshots_offset),
-
- /* Version 3 fields */
- .incompatible_features = cpu_to_be64(s->incompatible_features),
- .compatible_features = cpu_to_be64(s->compatible_features),
- .autoclear_features = cpu_to_be64(s->autoclear_features),
- .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
- .header_length = cpu_to_be32(header_length),
- };
-
- /* For older versions, write a shorter header */
- switch (s->qcow_version) {
- case 2:
- ret = offsetof(QCowHeader, incompatible_features);
- break;
- case 3:
- ret = sizeof(*header);
- break;
- default:
- ret = -EINVAL;
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- memset(buf, 0, buflen);
-
- /* Preserve any unknown field in the header */
- if (s->unknown_header_fields_size) {
- if (buflen < s->unknown_header_fields_size) {
- ret = -ENOSPC;
- goto fail;
- }
-
- memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
- buf += s->unknown_header_fields_size;
- buflen -= s->unknown_header_fields_size;
- }
-
- /* Backing file format header extension */
- if (*bs->backing_format) {
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
- bs->backing_format, strlen(bs->backing_format),
- buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- }
-
- /* Feature table */
- Qcow2Feature features[] = {
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
- .name = "dirty bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_COMPATIBLE,
- .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
- .name = "lazy refcounts",
- },
- };
-
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
- features, sizeof(features), buflen);
- if (ret < 0) {
- goto fail;
- }
- buf += ret;
- buflen -= ret;
-
- /* Keep unknown header extensions */
- QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
- ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- }
-
- /* End of header extensions */
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
-
- /* Backing file name */
- if (*bs->backing_file) {
- size_t backing_file_len = strlen(bs->backing_file);
-
- if (buflen < backing_file_len) {
- ret = -ENOSPC;
- goto fail;
- }
-
- /* Using strncpy is ok here, since buf is not NUL-terminated. */
- strncpy(buf, bs->backing_file, buflen);
-
- header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
- header->backing_file_size = cpu_to_be32(backing_file_len);
- }
-
- /* Write the new header */
- ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
- if (ret < 0) {
- goto fail;
- }
-
- ret = 0;
-fail:
- qemu_vfree(header);
- return ret;
-}
-
-static int qcow2_change_backing_file(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt)
-{
- pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
- pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
-
- return qcow2_update_header(bs);
-}
-
-static int preallocate(BlockDriverState *bs)
-{
- uint64_t nb_sectors;
- uint64_t offset;
- uint64_t host_offset = 0;
- int num;
- int ret;
- QCowL2Meta *meta;
-
- nb_sectors = bdrv_getlength(bs) >> 9;
- offset = 0;
-
- while (nb_sectors) {
- num = MIN(nb_sectors, INT_MAX >> 9);
- ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
- &host_offset, &meta);
- if (ret < 0) {
- return ret;
- }
-
- ret = qcow2_alloc_cluster_link_l2(bs, meta);
- if (ret < 0) {
- qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
- QCOW2_DISCARD_NEVER);
- return ret;
- }
-
- /* There are no dependent requests, but we need to remove our request
- * from the list of in-flight requests */
- if (meta != NULL) {
- QLIST_REMOVE(meta, next_in_flight);
- }
-
- /* TODO Preallocate data if requested */
-
- nb_sectors -= num;
- offset += num << 9;
- }
-
- /*
- * It is expected that the image file is large enough to actually contain
- * all of the allocated clusters (otherwise we get failing reads after
- * EOF). Extend the image to the last allocated sector.
- */
- if (host_offset != 0) {
- uint8_t buf[512];
- memset(buf, 0, 512);
- ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
- if (ret < 0) {
- return ret;
- }
- }
-
- return 0;
-}
-
-static int qcow2_create2(const char *filename, int64_t total_size,
- const char *backing_file, const char *backing_format,
- int flags, size_t cluster_size, int prealloc,
- QEMUOptionParameter *options, int version)
-{
- /* Calculate cluster_bits */
- int cluster_bits;
- cluster_bits = ffs(cluster_size) - 1;
- if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
- (1 << cluster_bits) != cluster_size)
- {
- error_report(
- "Cluster size must be a power of two between %d and %dk",
- 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
- return -EINVAL;
- }
-
- /*
- * Open the image file and write a minimal qcow2 header.
- *
- * We keep things simple and start with a zero-sized image. We also
- * do without refcount blocks or a L1 table for now. We'll fix the
- * inconsistency later.
- *
- * We do need a refcount table because growing the refcount table means
- * allocating two new refcount blocks - the seconds of which would be at
- * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
- * size for any qcow2 image.
- */
- BlockDriverState* bs;
- QCowHeader header;
- uint8_t* refcount_table;
- int ret;
-
- ret = bdrv_create_file(filename, options);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
- if (ret < 0) {
- return ret;
- }
-
- /* Write the header */
- memset(&header, 0, sizeof(header));
- header.magic = cpu_to_be32(QCOW_MAGIC);
- header.version = cpu_to_be32(version);
- header.cluster_bits = cpu_to_be32(cluster_bits);
- header.size = cpu_to_be64(0);
- header.l1_table_offset = cpu_to_be64(0);
- header.l1_size = cpu_to_be32(0);
- header.refcount_table_offset = cpu_to_be64(cluster_size);
- header.refcount_table_clusters = cpu_to_be32(1);
- header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
- header.header_length = cpu_to_be32(sizeof(header));
-
- if (flags & BLOCK_FLAG_ENCRYPT) {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
- } else {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
- }
-
- if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
- header.compatible_features |=
- cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
- }
-
- ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
- if (ret < 0) {
- goto out;
- }
-
- /* Write an empty refcount table */
- refcount_table = g_malloc0(cluster_size);
- ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
- g_free(refcount_table);
-
- if (ret < 0) {
- goto out;
- }
-
- bdrv_close(bs);
-
- /*
- * And now open the image and make it consistent first (i.e. increase the
- * refcount of the cluster that is occupied by the header and the refcount
- * table)
- */
- BlockDriver* drv = bdrv_find_format("qcow2");
- assert(drv != NULL);
- ret = bdrv_open(bs, filename, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv);
- if (ret < 0) {
- goto out;
- }
-
- ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
- if (ret < 0) {
- goto out;
-
- } else if (ret != 0) {
- error_report("Huh, first cluster in empty image is already in use?");
- abort();
- }
-
- /* Okay, now that we have a valid image, let's give it the right size */
- ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
- if (ret < 0) {
- goto out;
- }
-
- /* Want a backing file? There you go.*/
- if (backing_file) {
- ret = bdrv_change_backing_file(bs, backing_file, backing_format);
- if (ret < 0) {
- goto out;
- }
- }
-
- /* And if we're supposed to preallocate metadata, do that now */
- if (prealloc) {
- BDRVQcowState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = preallocate(bs);
- qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- goto out;
- }
- }
-
- ret = 0;
-out:
- bdrv_delete(bs);
- return ret;
-}
-
-static int qcow2_create(const char *filename, QEMUOptionParameter *options)
-{
- const char *backing_file = NULL;
- const char *backing_fmt = NULL;
- uint64_t sectors = 0;
- int flags = 0;
- size_t cluster_size = DEFAULT_CLUSTER_SIZE;
- int prealloc = 0;
- int version = 2;
-
- /* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- sectors = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
- backing_fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
- flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- cluster_size = options->value.n;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
- if (!options->value.s || !strcmp(options->value.s, "off")) {
- prealloc = 0;
- } else if (!strcmp(options->value.s, "metadata")) {
- prealloc = 1;
- } else {
- fprintf(stderr, "Invalid preallocation mode: '%s'\n",
- options->value.s);
- return -EINVAL;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
- if (!options->value.s || !strcmp(options->value.s, "0.10")) {
- version = 2;
- } else if (!strcmp(options->value.s, "1.1")) {
- version = 3;
- } else {
- fprintf(stderr, "Invalid compatibility level: '%s'\n",
- options->value.s);
- return -EINVAL;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
- flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
- }
- options++;
- }
-
- if (backing_file && prealloc) {
- fprintf(stderr, "Backing file and preallocation cannot be used at "
- "the same time\n");
- return -EINVAL;
- }
-
- if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
- fprintf(stderr, "Lazy refcounts only supported with compatibility "
- "level 1.1 and above (use compat=1.1 or greater)\n");
- return -EINVAL;
- }
-
- return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
- cluster_size, prealloc, options, version);
-}
-
-static int qcow2_make_empty(BlockDriverState *bs)
-{
-#if 0
- /* XXX: not correct */
- BDRVQcowState *s = bs->opaque;
- uint32_t l1_length = s->l1_size * sizeof(uint64_t);
- int ret;
-
- memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
- return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
- if (ret < 0)
- return ret;
-
- l2_cache_reset(bs);
-#endif
- return 0;
-}
-
-static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- int ret;
- BDRVQcowState *s = bs->opaque;
-
- /* Emulate misaligned zero writes */
- if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
- return -ENOTSUP;
- }
-
- /* Whatever is left can use real zero clusters */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
-
- return ret;
-}
-
-static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- int ret;
- BDRVQcowState *s = bs->opaque;
-
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
- return ret;
-}
-
-static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t new_l1_size;
- int ret;
-
- if (offset & 511) {
- error_report("The new size must be a multiple of 512");
- return -EINVAL;
- }
-
- /* cannot proceed if image has snapshots */
- if (s->nb_snapshots) {
- error_report("Can't resize an image which has snapshots");
- return -ENOTSUP;
- }
-
- /* shrinking is currently not supported */
- if (offset < bs->total_sectors * 512) {
- error_report("qcow2 doesn't support shrinking images yet");
- return -ENOTSUP;
- }
-
- new_l1_size = size_to_l1(s, offset);
- ret = qcow2_grow_l1_table(bs, new_l1_size, true);
- if (ret < 0) {
- return ret;
- }
-
- /* write updated header.size */
- offset = cpu_to_be64(offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
- &offset, sizeof(uint64_t));
- if (ret < 0) {
- return ret;
- }
-
- s->l1_vm_state_index = new_l1_size;
- return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
- tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- z_stream strm;
- int ret, out_len;
- uint8_t *out_buf;
- uint64_t cluster_offset;
-
- if (nb_sectors == 0) {
- /* align end of file to a sector boundary to ease reading with
- sector based I/Os */
- cluster_offset = bdrv_getlength(bs->file);
- cluster_offset = (cluster_offset + 511) & ~511;
- bdrv_truncate(bs->file, cluster_offset);
- return 0;
- }
-
- if (nb_sectors != s->cluster_sectors) {
- ret = -EINVAL;
-
- /* Zero-pad last write if image size is not cluster aligned */
- if (sector_num + nb_sectors == bs->total_sectors &&
- nb_sectors < s->cluster_sectors) {
- uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
- memset(pad_buf, 0, s->cluster_size);
- memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
- ret = qcow2_write_compressed(bs, sector_num,
- pad_buf, s->cluster_sectors);
- qemu_vfree(pad_buf);
- }
- return ret;
- }
-
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
- /* best compression, small window, no zlib header */
- memset(&strm, 0, sizeof(strm));
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
- Z_DEFLATED, -12,
- 9, Z_DEFAULT_STRATEGY);
- if (ret != 0) {
- ret = -EINVAL;
- goto fail;
- }
-
- strm.avail_in = s->cluster_size;
- strm.next_in = (uint8_t *)buf;
- strm.avail_out = s->cluster_size;
- strm.next_out = out_buf;
-
- ret = deflate(&strm, Z_FINISH);
- if (ret != Z_STREAM_END && ret != Z_OK) {
- deflateEnd(&strm);
- ret = -EINVAL;
- goto fail;
- }
- out_len = strm.next_out - out_buf;
-
- deflateEnd(&strm);
-
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
- /* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
- if (ret < 0) {
- goto fail;
- }
- } else {
- cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
- sector_num << 9, out_len);
- if (!cluster_offset) {
- ret = -EIO;
- goto fail;
- }
- cluster_offset &= s->cluster_offset_mask;
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = 0;
-fail:
- g_free(out_buf);
- return ret;
-}
-
-static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
- }
-
- if (qcow2_need_accurate_refcounts(s)) {
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
- }
- }
- qemu_co_mutex_unlock(&s->lock);
-
- return 0;
-}
-
-static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
-{
- return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
-}
-
-static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQcowState *s = bs->opaque;
- bdi->cluster_size = s->cluster_size;
- bdi->vm_state_offset = qcow2_vm_state_offset(s);
- return 0;
-}
-
-#if 0
-static void dump_refcounts(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t nb_clusters, k, k1, size;
- int refcount;
-
- size = bdrv_getlength(bs->file);
- nb_clusters = size_to_clusters(s, size);
- for(k = 0; k < nb_clusters;) {
- k1 = k;
- refcount = get_refcount(bs, k);
- k++;
- while (k < nb_clusters && get_refcount(bs, k) == refcount)
- k++;
- printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
- k - k1);
- }
-}
-#endif
-
-static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t pos)
-{
- BDRVQcowState *s = bs->opaque;
- int growable = bs->growable;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- bs->growable = 1;
- ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
- bs->growable = growable;
-
- return ret;
-}
-
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
-{
- BDRVQcowState *s = bs->opaque;
- int growable = bs->growable;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
- bs->growable = 1;
- ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
- bs->growable = growable;
-
- return ret;
-}
-
-static QEMUOptionParameter qcow2_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_COMPAT_LEVEL,
- .type = OPT_STRING,
- .help = "Compatibility level (0.10 or 1.1)"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_BACKING_FMT,
- .type = OPT_STRING,
- .help = "Image format of the base image"
- },
- {
- .name = BLOCK_OPT_ENCRYPT,
- .type = OPT_FLAG,
- .help = "Encrypt the image"
- },
- {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "qcow2 cluster size",
- .value = { .n = DEFAULT_CLUSTER_SIZE },
- },
- {
- .name = BLOCK_OPT_PREALLOC,
- .type = OPT_STRING,
- .help = "Preallocation mode (allowed values: off, metadata)"
- },
- {
- .name = BLOCK_OPT_LAZY_REFCOUNTS,
- .type = OPT_FLAG,
- .help = "Postpone refcount updates",
- },
- { NULL }
-};
-
-static BlockDriver bdrv_qcow2 = {
- .format_name = "qcow2",
- .instance_size = sizeof(BDRVQcowState),
- .bdrv_probe = qcow2_probe,
- .bdrv_open = qcow2_open,
- .bdrv_close = qcow2_close,
- .bdrv_reopen_prepare = qcow2_reopen_prepare,
- .bdrv_create = qcow2_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = qcow2_co_is_allocated,
- .bdrv_set_key = qcow2_set_key,
- .bdrv_make_empty = qcow2_make_empty,
-
- .bdrv_co_readv = qcow2_co_readv,
- .bdrv_co_writev = qcow2_co_writev,
- .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
-
- .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
- .bdrv_co_discard = qcow2_co_discard,
- .bdrv_truncate = qcow2_truncate,
- .bdrv_write_compressed = qcow2_write_compressed,
-
- .bdrv_snapshot_create = qcow2_snapshot_create,
- .bdrv_snapshot_goto = qcow2_snapshot_goto,
- .bdrv_snapshot_delete = qcow2_snapshot_delete,
- .bdrv_snapshot_list = qcow2_snapshot_list,
- .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
- .bdrv_get_info = qcow2_get_info,
-
- .bdrv_save_vmstate = qcow2_save_vmstate,
- .bdrv_load_vmstate = qcow2_load_vmstate,
-
- .bdrv_change_backing_file = qcow2_change_backing_file,
-
- .bdrv_invalidate_cache = qcow2_invalidate_cache,
-
- .create_options = qcow2_create_options,
- .bdrv_check = qcow2_check,
-};
-
-static void bdrv_qcow2_init(void)
-{
- bdrv_register(&bdrv_qcow2);
-}
-
-block_init(bdrv_qcow2_init);
diff --git a/contrib/qemu/block/qcow2.h b/contrib/qemu/block/qcow2.h
deleted file mode 100644
index 3b2d5cda71f..00000000000
--- a/contrib/qemu/block/qcow2.h
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef BLOCK_QCOW2_H
-#define BLOCK_QCOW2_H
-
-#include "qemu/aes.h"
-#include "block/coroutine.h"
-
-//#define DEBUG_ALLOC
-//#define DEBUG_ALLOC2
-//#define DEBUG_EXT
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES 1
-
-#define QCOW_MAX_CRYPT_CLUSTERS 32
-
-/* indicate that the refcount of the referenced cluster is exactly one. */
-#define QCOW_OFLAG_COPIED (1LL << 63)
-/* indicate that the cluster is compressed (they never have the copied flag) */
-#define QCOW_OFLAG_COMPRESSED (1LL << 62)
-/* The cluster reads as all zeros */
-#define QCOW_OFLAG_ZERO (1LL << 0)
-
-#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
-
-#define MIN_CLUSTER_BITS 9
-#define MAX_CLUSTER_BITS 21
-
-#define L2_CACHE_SIZE 16
-
-/* Must be at least 4 to cover all cases of refcount table growth */
-#define REFCOUNT_CACHE_SIZE 4
-
-#define DEFAULT_CLUSTER_SIZE 65536
-
-
-#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request"
-#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot"
-#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other"
-
-typedef struct QCowHeader {
- uint32_t magic;
- uint32_t version;
- uint64_t backing_file_offset;
- uint32_t backing_file_size;
- uint32_t cluster_bits;
- uint64_t size; /* in bytes */
- uint32_t crypt_method;
- uint32_t l1_size; /* XXX: save number of clusters instead ? */
- uint64_t l1_table_offset;
- uint64_t refcount_table_offset;
- uint32_t refcount_table_clusters;
- uint32_t nb_snapshots;
- uint64_t snapshots_offset;
-
- /* The following fields are only valid for version >= 3 */
- uint64_t incompatible_features;
- uint64_t compatible_features;
- uint64_t autoclear_features;
-
- uint32_t refcount_order;
- uint32_t header_length;
-} QCowHeader;
-
-typedef struct QCowSnapshot {
- uint64_t l1_table_offset;
- uint32_t l1_size;
- char *id_str;
- char *name;
- uint64_t disk_size;
- uint64_t vm_state_size;
- uint32_t date_sec;
- uint32_t date_nsec;
- uint64_t vm_clock_nsec;
-} QCowSnapshot;
-
-struct Qcow2Cache;
-typedef struct Qcow2Cache Qcow2Cache;
-
-typedef struct Qcow2UnknownHeaderExtension {
- uint32_t magic;
- uint32_t len;
- QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
- uint8_t data[];
-} Qcow2UnknownHeaderExtension;
-
-enum {
- QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
- QCOW2_FEAT_TYPE_COMPATIBLE = 1,
- QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
-};
-
-/* Incompatible feature bits */
-enum {
- QCOW2_INCOMPAT_DIRTY_BITNR = 0,
- QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
-
- QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
-};
-
-/* Compatible feature bits */
-enum {
- QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
- QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
-
- QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
-};
-
-enum qcow2_discard_type {
- QCOW2_DISCARD_NEVER = 0,
- QCOW2_DISCARD_ALWAYS,
- QCOW2_DISCARD_REQUEST,
- QCOW2_DISCARD_SNAPSHOT,
- QCOW2_DISCARD_OTHER,
- QCOW2_DISCARD_MAX
-};
-
-typedef struct Qcow2Feature {
- uint8_t type;
- uint8_t bit;
- char name[46];
-} QEMU_PACKED Qcow2Feature;
-
-typedef struct Qcow2DiscardRegion {
- BlockDriverState *bs;
- uint64_t offset;
- uint64_t bytes;
- QTAILQ_ENTRY(Qcow2DiscardRegion) next;
-} Qcow2DiscardRegion;
-
-typedef struct BDRVQcowState {
- int cluster_bits;
- int cluster_size;
- int cluster_sectors;
- int l2_bits;
- int l2_size;
- int l1_size;
- int l1_vm_state_index;
- int csize_shift;
- int csize_mask;
- uint64_t cluster_offset_mask;
- uint64_t l1_table_offset;
- uint64_t *l1_table;
-
- Qcow2Cache* l2_table_cache;
- Qcow2Cache* refcount_block_cache;
-
- uint8_t *cluster_cache;
- uint8_t *cluster_data;
- uint64_t cluster_cache_offset;
- QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
-
- uint64_t *refcount_table;
- uint64_t refcount_table_offset;
- uint32_t refcount_table_size;
- int64_t free_cluster_index;
- int64_t free_byte_offset;
-
- CoMutex lock;
-
- uint32_t crypt_method; /* current crypt method, 0 if no key yet */
- uint32_t crypt_method_header;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- uint64_t snapshots_offset;
- int snapshots_size;
- int nb_snapshots;
- QCowSnapshot *snapshots;
-
- int flags;
- int qcow_version;
- bool use_lazy_refcounts;
-
- bool discard_passthrough[QCOW2_DISCARD_MAX];
-
- uint64_t incompatible_features;
- uint64_t compatible_features;
- uint64_t autoclear_features;
-
- size_t unknown_header_fields_size;
- void* unknown_header_fields;
- QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
- QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
- bool cache_discards;
-} BDRVQcowState;
-
-/* XXX: use std qcow open function ? */
-typedef struct QCowCreateState {
- int cluster_size;
- int cluster_bits;
- uint16_t *refcount_block;
- uint64_t *refcount_table;
- int64_t l1_table_offset;
- int64_t refcount_table_offset;
- int64_t refcount_block_offset;
-} QCowCreateState;
-
-struct QCowAIOCB;
-
-typedef struct Qcow2COWRegion {
- /**
- * Offset of the COW region in bytes from the start of the first cluster
- * touched by the request.
- */
- uint64_t offset;
-
- /** Number of sectors to copy */
- int nb_sectors;
-} Qcow2COWRegion;
-
-/**
- * Describes an in-flight (part of a) write request that writes to clusters
- * that are not referenced in their L2 table yet.
- */
-typedef struct QCowL2Meta
-{
- /** Guest offset of the first newly allocated cluster */
- uint64_t offset;
-
- /** Host offset of the first newly allocated cluster */
- uint64_t alloc_offset;
-
- /**
- * Number of sectors from the start of the first allocated cluster to
- * the end of the (possibly shortened) request
- */
- int nb_available;
-
- /** Number of newly allocated clusters */
- int nb_clusters;
-
- /**
- * Requests that overlap with this allocation and wait to be restarted
- * when the allocating request has completed.
- */
- CoQueue dependent_requests;
-
- /**
- * The COW Region between the start of the first allocated cluster and the
- * area the guest actually writes to.
- */
- Qcow2COWRegion cow_start;
-
- /**
- * The COW Region between the area the guest actually writes to and the
- * end of the last allocated cluster.
- */
- Qcow2COWRegion cow_end;
-
- /** Pointer to next L2Meta of the same write request */
- struct QCowL2Meta *next;
-
- QLIST_ENTRY(QCowL2Meta) next_in_flight;
-} QCowL2Meta;
-
-enum {
- QCOW2_CLUSTER_UNALLOCATED,
- QCOW2_CLUSTER_NORMAL,
- QCOW2_CLUSTER_COMPRESSED,
- QCOW2_CLUSTER_ZERO
-};
-
-#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
-
-#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
-
-static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
-{
- return offset & ~(s->cluster_size - 1);
-}
-
-static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
-{
- return offset & (s->cluster_size - 1);
-}
-
-static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
-{
- return (size + (s->cluster_size - 1)) >> s->cluster_bits;
-}
-
-static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
-{
- int shift = s->cluster_bits + s->l2_bits;
- return (size + (1ULL << shift) - 1) >> shift;
-}
-
-static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
-{
- return (offset >> s->cluster_bits) & (s->l2_size - 1);
-}
-
-static inline int64_t align_offset(int64_t offset, int n)
-{
- offset = (offset + n - 1) & ~(n - 1);
- return offset;
-}
-
-static inline int qcow2_get_cluster_type(uint64_t l2_entry)
-{
- if (l2_entry & QCOW_OFLAG_COMPRESSED) {
- return QCOW2_CLUSTER_COMPRESSED;
- } else if (l2_entry & QCOW_OFLAG_ZERO) {
- return QCOW2_CLUSTER_ZERO;
- } else if (!(l2_entry & L2E_OFFSET_MASK)) {
- return QCOW2_CLUSTER_UNALLOCATED;
- } else {
- return QCOW2_CLUSTER_NORMAL;
- }
-}
-
-/* Check whether refcounts are eager or lazy */
-static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
-{
- return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
-}
-
-static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
-{
- return m->offset + m->cow_start.offset;
-}
-
-static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
-{
- return m->offset + m->cow_end.offset
- + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
-}
-
-// FIXME Need qcow2_ prefix to global functions
-
-/* qcow2.c functions */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors);
-
-int qcow2_mark_dirty(BlockDriverState *bs);
-int qcow2_update_header(BlockDriverState *bs);
-
-/* qcow2-refcount.c functions */
-int qcow2_refcount_init(BlockDriverState *bs);
-void qcow2_refcount_close(BlockDriverState *bs);
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters);
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
-void qcow2_free_clusters(BlockDriverState *bs,
- int64_t offset, int64_t size,
- enum qcow2_discard_type type);
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
- int nb_clusters, enum qcow2_discard_type type);
-
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
- int64_t l1_table_offset, int l1_size, int addend);
-
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix);
-
-void qcow2_process_discards(BlockDriverState *bs, int ret);
-
-/* qcow2-cluster.c functions */
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
- bool exact_size);
-void qcow2_l2_cache_reset(BlockDriverState *bs);
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key);
-
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset);
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
- uint64_t offset,
- int compressed_size);
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
-
-/* qcow2-snapshot.c functions */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);
-
-void qcow2_free_snapshots(BlockDriverState *bs);
-int qcow2_read_snapshots(BlockDriverState *bs);
-
-/* qcow2-cache.c functions */
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
-
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
- Qcow2Cache *dependency);
-void qcow2_cache_depends_on_flush(Qcow2Cache *c);
-
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table);
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table);
-int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
-
-#endif
diff --git a/contrib/qemu/block/qed-check.c b/contrib/qemu/block/qed-check.c
deleted file mode 100644
index b473dcd61f6..00000000000
--- a/contrib/qemu/block/qed-check.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Consistency Check
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-typedef struct {
- BDRVQEDState *s;
- BdrvCheckResult *result;
- bool fix; /* whether to fix invalid offsets */
-
- uint64_t nclusters;
- uint32_t *used_clusters; /* referenced cluster bitmap */
-
- QEDRequest request;
-} QEDCheck;
-
-static bool qed_test_bit(uint32_t *bitmap, uint64_t n) {
- return !!(bitmap[n / 32] & (1 << (n % 32)));
-}
-
-static void qed_set_bit(uint32_t *bitmap, uint64_t n) {
- bitmap[n / 32] |= 1 << (n % 32);
-}
-
-/**
- * Set bitmap bits for clusters
- *
- * @check: Check structure
- * @offset: Starting offset in bytes
- * @n: Number of clusters
- */
-static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset,
- unsigned int n)
-{
- uint64_t cluster = qed_bytes_to_clusters(check->s, offset);
- unsigned int corruptions = 0;
-
- while (n-- != 0) {
- /* Clusters should only be referenced once */
- if (qed_test_bit(check->used_clusters, cluster)) {
- corruptions++;
- }
-
- qed_set_bit(check->used_clusters, cluster);
- cluster++;
- }
-
- check->result->corruptions += corruptions;
- return corruptions == 0;
-}
-
-/**
- * Check an L2 table
- *
- * @ret: Number of invalid cluster offsets
- */
-static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
-{
- BDRVQEDState *s = check->s;
- unsigned int i, num_invalid = 0;
- uint64_t last_offset = 0;
-
- for (i = 0; i < s->table_nelems; i++) {
- uint64_t offset = table->offsets[i];
-
- if (qed_offset_is_unalloc_cluster(offset) ||
- qed_offset_is_zero_cluster(offset)) {
- continue;
- }
- check->result->bfi.allocated_clusters++;
- if (last_offset && (last_offset + s->header.cluster_size != offset)) {
- check->result->bfi.fragmented_clusters++;
- }
- last_offset = offset;
-
- /* Detect invalid cluster offset */
- if (!qed_check_cluster_offset(s, offset)) {
- if (check->fix) {
- table->offsets[i] = 0;
- check->result->corruptions_fixed++;
- } else {
- check->result->corruptions++;
- }
-
- num_invalid++;
- continue;
- }
-
- qed_set_used_clusters(check, offset, 1);
- }
-
- return num_invalid;
-}
-
-/**
- * Descend tables and check each cluster is referenced once only
- */
-static int qed_check_l1_table(QEDCheck *check, QEDTable *table)
-{
- BDRVQEDState *s = check->s;
- unsigned int i, num_invalid_l1 = 0;
- int ret, last_error = 0;
-
- /* Mark L1 table clusters used */
- qed_set_used_clusters(check, s->header.l1_table_offset,
- s->header.table_size);
-
- for (i = 0; i < s->table_nelems; i++) {
- unsigned int num_invalid_l2;
- uint64_t offset = table->offsets[i];
-
- if (qed_offset_is_unalloc_cluster(offset)) {
- continue;
- }
-
- /* Detect invalid L2 offset */
- if (!qed_check_table_offset(s, offset)) {
- /* Clear invalid offset */
- if (check->fix) {
- table->offsets[i] = 0;
- check->result->corruptions_fixed++;
- } else {
- check->result->corruptions++;
- }
-
- num_invalid_l1++;
- continue;
- }
-
- if (!qed_set_used_clusters(check, offset, s->header.table_size)) {
- continue; /* skip an invalid table */
- }
-
- ret = qed_read_l2_table_sync(s, &check->request, offset);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- continue;
- }
-
- num_invalid_l2 = qed_check_l2_table(check,
- check->request.l2_table->table);
-
- /* Write out fixed L2 table */
- if (num_invalid_l2 > 0 && check->fix) {
- ret = qed_write_l2_table_sync(s, &check->request, 0,
- s->table_nelems, false);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- continue;
- }
- }
- }
-
- /* Drop reference to final table */
- qed_unref_l2_cache_entry(check->request.l2_table);
- check->request.l2_table = NULL;
-
- /* Write out fixed L1 table */
- if (num_invalid_l1 > 0 && check->fix) {
- ret = qed_write_l1_table_sync(s, 0, s->table_nelems);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- }
- }
-
- return last_error;
-}
-
-/**
- * Check for unreferenced (leaked) clusters
- */
-static void qed_check_for_leaks(QEDCheck *check)
-{
- BDRVQEDState *s = check->s;
- uint64_t i;
-
- for (i = s->header.header_size; i < check->nclusters; i++) {
- if (!qed_test_bit(check->used_clusters, i)) {
- check->result->leaks++;
- }
- }
-}
-
-/**
- * Mark an image clean once it passes check or has been repaired
- */
-static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
-{
- /* Skip if there were unfixable corruptions or I/O errors */
- if (result->corruptions > 0 || result->check_errors > 0) {
- return;
- }
-
- /* Skip if image is already marked clean */
- if (!(s->header.features & QED_F_NEED_CHECK)) {
- return;
- }
-
- /* Ensure fixes reach storage before clearing check bit */
- bdrv_flush(s->bs);
-
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header_sync(s);
-}
-
-int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
-{
- QEDCheck check = {
- .s = s,
- .result = result,
- .nclusters = qed_bytes_to_clusters(s, s->file_size),
- .request = { .l2_table = NULL },
- .fix = fix,
- };
- int ret;
-
- check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
- sizeof(check.used_clusters[0]));
-
- check.result->bfi.total_clusters =
- (s->header.image_size + s->header.cluster_size - 1) /
- s->header.cluster_size;
- ret = qed_check_l1_table(&check, s->l1_table);
- if (ret == 0) {
- /* Only check for leaks if entire image was scanned successfully */
- qed_check_for_leaks(&check);
-
- if (fix) {
- qed_check_mark_clean(s, result);
- }
- }
-
- g_free(check.used_clusters);
- return ret;
-}
diff --git a/contrib/qemu/block/qed-cluster.c b/contrib/qemu/block/qed-cluster.c
deleted file mode 100644
index f64b2af8f7e..00000000000
--- a/contrib/qemu/block/qed-cluster.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Cluster functions
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-/**
- * Count the number of contiguous data clusters
- *
- * @s: QED state
- * @table: L2 table
- * @index: First cluster index
- * @n: Maximum number of clusters
- * @offset: Set to first cluster offset
- *
- * This function scans tables for contiguous clusters. A contiguous run of
- * clusters may be allocated, unallocated, or zero.
- */
-static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
- QEDTable *table,
- unsigned int index,
- unsigned int n,
- uint64_t *offset)
-{
- unsigned int end = MIN(index + n, s->table_nelems);
- uint64_t last = table->offsets[index];
- unsigned int i;
-
- *offset = last;
-
- for (i = index + 1; i < end; i++) {
- if (qed_offset_is_unalloc_cluster(last)) {
- /* Counting unallocated clusters */
- if (!qed_offset_is_unalloc_cluster(table->offsets[i])) {
- break;
- }
- } else if (qed_offset_is_zero_cluster(last)) {
- /* Counting zero clusters */
- if (!qed_offset_is_zero_cluster(table->offsets[i])) {
- break;
- }
- } else {
- /* Counting allocated clusters */
- if (table->offsets[i] != last + s->header.cluster_size) {
- break;
- }
- last = table->offsets[i];
- }
- }
- return i - index;
-}
-
-typedef struct {
- BDRVQEDState *s;
- uint64_t pos;
- size_t len;
-
- QEDRequest *request;
-
- /* User callback */
- QEDFindClusterFunc *cb;
- void *opaque;
-} QEDFindClusterCB;
-
-static void qed_find_cluster_cb(void *opaque, int ret)
-{
- QEDFindClusterCB *find_cluster_cb = opaque;
- BDRVQEDState *s = find_cluster_cb->s;
- QEDRequest *request = find_cluster_cb->request;
- uint64_t offset = 0;
- size_t len = 0;
- unsigned int index;
- unsigned int n;
-
- if (ret) {
- goto out;
- }
-
- index = qed_l2_index(s, find_cluster_cb->pos);
- n = qed_bytes_to_clusters(s,
- qed_offset_into_cluster(s, find_cluster_cb->pos) +
- find_cluster_cb->len);
- n = qed_count_contiguous_clusters(s, request->l2_table->table,
- index, n, &offset);
-
- if (qed_offset_is_unalloc_cluster(offset)) {
- ret = QED_CLUSTER_L2;
- } else if (qed_offset_is_zero_cluster(offset)) {
- ret = QED_CLUSTER_ZERO;
- } else if (qed_check_cluster_offset(s, offset)) {
- ret = QED_CLUSTER_FOUND;
- } else {
- ret = -EINVAL;
- }
-
- len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
- qed_offset_into_cluster(s, find_cluster_cb->pos));
-
-out:
- find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
- g_free(find_cluster_cb);
-}
-
-/**
- * Find the offset of a data cluster
- *
- * @s: QED state
- * @request: L2 cache entry
- * @pos: Byte position in device
- * @len: Number of bytes
- * @cb: Completion function
- * @opaque: User data for completion function
- *
- * This function translates a position in the block device to an offset in the
- * image file. It invokes the cb completion callback to report back the
- * translated offset or unallocated range in the image file.
- *
- * If the L2 table exists, request->l2_table points to the L2 table cache entry
- * and the caller must free the reference when they are finished. The cache
- * entry is exposed in this way to avoid callers having to read the L2 table
- * again later during request processing. If request->l2_table is non-NULL it
- * will be unreferenced before taking on the new cache entry.
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
- size_t len, QEDFindClusterFunc *cb, void *opaque)
-{
- QEDFindClusterCB *find_cluster_cb;
- uint64_t l2_offset;
-
- /* Limit length to L2 boundary. Requests are broken up at the L2 boundary
- * so that a request acts on one L2 table at a time.
- */
- len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
-
- l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
- if (qed_offset_is_unalloc_cluster(l2_offset)) {
- cb(opaque, QED_CLUSTER_L1, 0, len);
- return;
- }
- if (!qed_check_table_offset(s, l2_offset)) {
- cb(opaque, -EINVAL, 0, 0);
- return;
- }
-
- find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
- find_cluster_cb->s = s;
- find_cluster_cb->pos = pos;
- find_cluster_cb->len = len;
- find_cluster_cb->cb = cb;
- find_cluster_cb->opaque = opaque;
- find_cluster_cb->request = request;
-
- qed_read_l2_table(s, request, l2_offset,
- qed_find_cluster_cb, find_cluster_cb);
-}
diff --git a/contrib/qemu/block/qed-gencb.c b/contrib/qemu/block/qed-gencb.c
deleted file mode 100644
index 7d7ac1ffc8e..00000000000
--- a/contrib/qemu/block/qed-gencb.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
-{
- GenericCB *gencb = g_malloc(len);
- gencb->cb = cb;
- gencb->opaque = opaque;
- return gencb;
-}
-
-void gencb_complete(void *opaque, int ret)
-{
- GenericCB *gencb = opaque;
- BlockDriverCompletionFunc *cb = gencb->cb;
- void *user_opaque = gencb->opaque;
-
- g_free(gencb);
- cb(user_opaque, ret);
-}
diff --git a/contrib/qemu/block/qed-l2-cache.c b/contrib/qemu/block/qed-l2-cache.c
deleted file mode 100644
index e9b2aae44d9..00000000000
--- a/contrib/qemu/block/qed-l2-cache.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * QEMU Enhanced Disk Format L2 Cache
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-/*
- * L2 table cache usage is as follows:
- *
- * An open image has one L2 table cache that is used to avoid accessing the
- * image file for recently referenced L2 tables.
- *
- * Cluster offset lookup translates the logical offset within the block device
- * to a cluster offset within the image file. This is done by indexing into
- * the L1 and L2 tables which store cluster offsets. It is here where the L2
- * table cache serves up recently referenced L2 tables.
- *
- * If there is a cache miss, that L2 table is read from the image file and
- * committed to the cache. Subsequent accesses to that L2 table will be served
- * from the cache until the table is evicted from the cache.
- *
- * L2 tables are also committed to the cache when new L2 tables are allocated
- * in the image file. Since the L2 table cache is write-through, the new L2
- * table is first written out to the image file and then committed to the
- * cache.
- *
- * Multiple I/O requests may be using an L2 table cache entry at any given
- * time. That means an entry may be in use across several requests and
- * reference counting is needed to free the entry at the correct time. In
- * particular, an entry evicted from the cache will only be freed once all
- * references are dropped.
- *
- * An in-flight I/O request will hold a reference to a L2 table cache entry for
- * the period during which it needs to access the L2 table. This includes
- * cluster offset lookup, L2 table allocation, and L2 table update when a new
- * data cluster has been allocated.
- *
- * An interesting case occurs when two requests need to access an L2 table that
- * is not in the cache. Since the operation to read the table from the image
- * file takes some time to complete, both requests may see a cache miss and
- * start reading the L2 table from the image file. The first to finish will
- * commit its L2 table into the cache. When the second tries to commit its
- * table will be deleted in favor of the existing cache entry.
- */
-
-#include "trace.h"
-#include "qed.h"
-
-/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */
-#define MAX_L2_CACHE_SIZE 50
-
-/**
- * Initialize the L2 cache
- */
-void qed_init_l2_cache(L2TableCache *l2_cache)
-{
- QTAILQ_INIT(&l2_cache->entries);
- l2_cache->n_entries = 0;
-}
-
-/**
- * Free the L2 cache
- */
-void qed_free_l2_cache(L2TableCache *l2_cache)
-{
- CachedL2Table *entry, *next_entry;
-
- QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) {
- qemu_vfree(entry->table);
- g_free(entry);
- }
-}
-
-/**
- * Allocate an uninitialized entry from the cache
- *
- * The returned entry has a reference count of 1 and is owned by the caller.
- * The caller must allocate the actual table field for this entry and it must
- * be freeable using qemu_vfree().
- */
-CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
-{
- CachedL2Table *entry;
-
- entry = g_malloc0(sizeof(*entry));
- entry->ref++;
-
- trace_qed_alloc_l2_cache_entry(l2_cache, entry);
-
- return entry;
-}
-
-/**
- * Decrease an entry's reference count and free if necessary when the reference
- * count drops to zero.
- */
-void qed_unref_l2_cache_entry(CachedL2Table *entry)
-{
- if (!entry) {
- return;
- }
-
- entry->ref--;
- trace_qed_unref_l2_cache_entry(entry, entry->ref);
- if (entry->ref == 0) {
- qemu_vfree(entry->table);
- g_free(entry);
- }
-}
-
-/**
- * Find an entry in the L2 cache. This may return NULL and it's up to the
- * caller to satisfy the cache miss.
- *
- * For a cached entry, this function increases the reference count and returns
- * the entry.
- */
-CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
-{
- CachedL2Table *entry;
-
- QTAILQ_FOREACH(entry, &l2_cache->entries, node) {
- if (entry->offset == offset) {
- trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref);
- entry->ref++;
- return entry;
- }
- }
- return NULL;
-}
-
-/**
- * Commit an L2 cache entry into the cache. This is meant to be used as part of
- * the process to satisfy a cache miss. A caller would allocate an entry which
- * is not actually in the L2 cache and then once the entry was valid and
- * present on disk, the entry can be committed into the cache.
- *
- * Since the cache is write-through, it's important that this function is not
- * called until the entry is present on disk and the L1 has been updated to
- * point to the entry.
- *
- * N.B. This function steals a reference to the l2_table from the caller so the
- * caller must obtain a new reference by issuing a call to
- * qed_find_l2_cache_entry().
- */
-void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
-{
- CachedL2Table *entry;
-
- entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset);
- if (entry) {
- qed_unref_l2_cache_entry(entry);
- qed_unref_l2_cache_entry(l2_table);
- return;
- }
-
- /* Evict an unused cache entry so we have space. If all entries are in use
- * we can grow the cache temporarily and we try to shrink back down later.
- */
- if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
- CachedL2Table *next;
- QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
- if (entry->ref > 1) {
- continue;
- }
-
- QTAILQ_REMOVE(&l2_cache->entries, entry, node);
- l2_cache->n_entries--;
- qed_unref_l2_cache_entry(entry);
-
- /* Stop evicting when we've shrunk back to max size */
- if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
- break;
- }
- }
- }
-
- l2_cache->n_entries++;
- QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node);
-}
diff --git a/contrib/qemu/block/qed-table.c b/contrib/qemu/block/qed-table.c
deleted file mode 100644
index 76d2dcccf81..00000000000
--- a/contrib/qemu/block/qed-table.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Table I/O
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "trace.h"
-#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
-#include "qed.h"
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEDTable *table;
-
- struct iovec iov;
- QEMUIOVector qiov;
-} QEDReadTableCB;
-
-static void qed_read_table_cb(void *opaque, int ret)
-{
- QEDReadTableCB *read_table_cb = opaque;
- QEDTable *table = read_table_cb->table;
- int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
- int i;
-
- /* Handle I/O error */
- if (ret) {
- goto out;
- }
-
- /* Byteswap offsets */
- for (i = 0; i < noffsets; i++) {
- table->offsets[i] = le64_to_cpu(table->offsets[i]);
- }
-
-out:
- /* Completion */
- trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
- gencb_complete(&read_table_cb->gencb, ret);
-}
-
-static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
- cb, opaque);
- QEMUIOVector *qiov = &read_table_cb->qiov;
-
- trace_qed_read_table(s, offset, table);
-
- read_table_cb->s = s;
- read_table_cb->table = table;
- read_table_cb->iov.iov_base = table->offsets,
- read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
-
- qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
- bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
- qiov->size / BDRV_SECTOR_SIZE,
- qed_read_table_cb, read_table_cb);
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEDTable *orig_table;
- QEDTable *table;
- bool flush; /* flush after write? */
-
- struct iovec iov;
- QEMUIOVector qiov;
-} QEDWriteTableCB;
-
-static void qed_write_table_cb(void *opaque, int ret)
-{
- QEDWriteTableCB *write_table_cb = opaque;
-
- trace_qed_write_table_cb(write_table_cb->s,
- write_table_cb->orig_table,
- write_table_cb->flush,
- ret);
-
- if (ret) {
- goto out;
- }
-
- if (write_table_cb->flush) {
- /* We still need to flush first */
- write_table_cb->flush = false;
- bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
- write_table_cb);
- return;
- }
-
-out:
- qemu_vfree(write_table_cb->table);
- gencb_complete(&write_table_cb->gencb, ret);
-}
-
-/**
- * Write out an updated part or all of a table
- *
- * @s: QED state
- * @offset: Offset of table in image file, in bytes
- * @table: Table
- * @index: Index of first element
- * @n: Number of elements
- * @flush: Whether or not to sync to disk
- * @cb: Completion function
- * @opaque: Argument for completion function
- */
-static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDWriteTableCB *write_table_cb;
- unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
- unsigned int start, end, i;
- size_t len_bytes;
-
- trace_qed_write_table(s, offset, table, index, n);
-
- /* Calculate indices of the first and one after last elements */
- start = index & ~sector_mask;
- end = (index + n + sector_mask) & ~sector_mask;
-
- len_bytes = (end - start) * sizeof(uint64_t);
-
- write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
- write_table_cb->s = s;
- write_table_cb->orig_table = table;
- write_table_cb->flush = flush;
- write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
- write_table_cb->iov.iov_base = write_table_cb->table->offsets;
- write_table_cb->iov.iov_len = len_bytes;
- qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
-
- /* Byteswap table */
- for (i = start; i < end; i++) {
- uint64_t le_offset = cpu_to_le64(table->offsets[i]);
- write_table_cb->table->offsets[i - start] = le_offset;
- }
-
- /* Adjust for offset into table */
- offset += start * sizeof(uint64_t);
-
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
- &write_table_cb->qiov,
- write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
- qed_write_table_cb, write_table_cb);
-}
-
-/**
- * Propagate return value from async callback
- */
-static void qed_sync_cb(void *opaque, int ret)
-{
- *(int *)opaque = ret;
-}
-
-int qed_read_l1_table_sync(BDRVQEDState *s)
-{
- int ret = -EINPROGRESS;
-
- qed_read_table(s, s->header.l1_table_offset,
- s->l1_table, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
- qed_write_table(s, s->header.l1_table_offset,
- s->l1_table, index, n, false, cb, opaque);
-}
-
-int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
- unsigned int n)
-{
- int ret = -EINPROGRESS;
-
- qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- uint64_t l2_offset;
- QEDRequest *request;
-} QEDReadL2TableCB;
-
-static void qed_read_l2_table_cb(void *opaque, int ret)
-{
- QEDReadL2TableCB *read_l2_table_cb = opaque;
- QEDRequest *request = read_l2_table_cb->request;
- BDRVQEDState *s = read_l2_table_cb->s;
- CachedL2Table *l2_table = request->l2_table;
- uint64_t l2_offset = read_l2_table_cb->l2_offset;
-
- if (ret) {
- /* can't trust loaded L2 table anymore */
- qed_unref_l2_cache_entry(l2_table);
- request->l2_table = NULL;
- } else {
- l2_table->offset = l2_offset;
-
- qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
- /* This is guaranteed to succeed because we just committed the entry
- * to the cache.
- */
- request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
- assert(request->l2_table != NULL);
- }
-
- gencb_complete(&read_l2_table_cb->gencb, ret);
-}
-
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDReadL2TableCB *read_l2_table_cb;
-
- qed_unref_l2_cache_entry(request->l2_table);
-
- /* Check for cached L2 entry */
- request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
- if (request->l2_table) {
- cb(opaque, 0);
- return;
- }
-
- request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
- request->l2_table->table = qed_alloc_table(s);
-
- read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
- read_l2_table_cb->s = s;
- read_l2_table_cb->l2_offset = offset;
- read_l2_table_cb->request = request;
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
- qed_read_table(s, offset, request->l2_table->table,
- qed_read_l2_table_cb, read_l2_table_cb);
-}
-
-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
-{
- int ret = -EINPROGRESS;
-
- qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
- qed_write_table(s, request->l2_table->offset,
- request->l2_table->table, index, n, flush, cb, opaque);
-}
-
-int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush)
-{
- int ret = -EINPROGRESS;
-
- qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
diff --git a/contrib/qemu/block/qed.c b/contrib/qemu/block/qed.c
deleted file mode 100644
index f767b0528ce..00000000000
--- a/contrib/qemu/block/qed.c
+++ /dev/null
@@ -1,1596 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/timer.h"
-#include "trace.h"
-#include "qed.h"
-#include "qapi/qmp/qerror.h"
-#include "migration/migration.h"
-
-static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- QEDAIOCB *acb = (QEDAIOCB *)blockacb;
- bool finished = false;
-
- /* Wait for the request to finish */
- acb->finished = &finished;
- while (!finished) {
- qemu_aio_wait();
- }
-}
-
-static const AIOCBInfo qed_aiocb_info = {
- .aiocb_size = sizeof(QEDAIOCB),
- .cancel = qed_aio_cancel,
-};
-
-static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
- const char *filename)
-{
- const QEDHeader *header = (const QEDHeader *)buf;
-
- if (buf_size < sizeof(*header)) {
- return 0;
- }
- if (le32_to_cpu(header->magic) != QED_MAGIC) {
- return 0;
- }
- return 100;
-}
-
-/**
- * Check whether an image format is raw
- *
- * @fmt: Backing file format, may be NULL
- */
-static bool qed_fmt_is_raw(const char *fmt)
-{
- return fmt && strcmp(fmt, "raw") == 0;
-}
-
-static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
-{
- cpu->magic = le32_to_cpu(le->magic);
- cpu->cluster_size = le32_to_cpu(le->cluster_size);
- cpu->table_size = le32_to_cpu(le->table_size);
- cpu->header_size = le32_to_cpu(le->header_size);
- cpu->features = le64_to_cpu(le->features);
- cpu->compat_features = le64_to_cpu(le->compat_features);
- cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
- cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
- cpu->image_size = le64_to_cpu(le->image_size);
- cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
- cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
-}
-
-static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
-{
- le->magic = cpu_to_le32(cpu->magic);
- le->cluster_size = cpu_to_le32(cpu->cluster_size);
- le->table_size = cpu_to_le32(cpu->table_size);
- le->header_size = cpu_to_le32(cpu->header_size);
- le->features = cpu_to_le64(cpu->features);
- le->compat_features = cpu_to_le64(cpu->compat_features);
- le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
- le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
- le->image_size = cpu_to_le64(cpu->image_size);
- le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
- le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
-}
-
-int qed_write_header_sync(BDRVQEDState *s)
-{
- QEDHeader le;
- int ret;
-
- qed_header_cpu_to_le(&s->header, &le);
- ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
- if (ret != sizeof(le)) {
- return ret;
- }
- return 0;
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- struct iovec iov;
- QEMUIOVector qiov;
- int nsectors;
- uint8_t *buf;
-} QEDWriteHeaderCB;
-
-static void qed_write_header_cb(void *opaque, int ret)
-{
- QEDWriteHeaderCB *write_header_cb = opaque;
-
- qemu_vfree(write_header_cb->buf);
- gencb_complete(write_header_cb, ret);
-}
-
-static void qed_write_header_read_cb(void *opaque, int ret)
-{
- QEDWriteHeaderCB *write_header_cb = opaque;
- BDRVQEDState *s = write_header_cb->s;
-
- if (ret) {
- qed_write_header_cb(write_header_cb, ret);
- return;
- }
-
- /* Update header */
- qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
-
- bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
- write_header_cb->nsectors, qed_write_header_cb,
- write_header_cb);
-}
-
-/**
- * Update header in-place (does not rewrite backing filename or other strings)
- *
- * This function only updates known header fields in-place and does not affect
- * extra data after the QED header.
- */
-static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
- void *opaque)
-{
- /* We must write full sectors for O_DIRECT but cannot necessarily generate
- * the data following the header if an unrecognized compat feature is
- * active. Therefore, first read the sectors containing the header, update
- * them, and write back.
- */
-
- int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
- BDRV_SECTOR_SIZE;
- size_t len = nsectors * BDRV_SECTOR_SIZE;
- QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
- cb, opaque);
-
- write_header_cb->s = s;
- write_header_cb->nsectors = nsectors;
- write_header_cb->buf = qemu_blockalign(s->bs, len);
- write_header_cb->iov.iov_base = write_header_cb->buf;
- write_header_cb->iov.iov_len = len;
- qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
-
- bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
- qed_write_header_read_cb, write_header_cb);
-}
-
-static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
-{
- uint64_t table_entries;
- uint64_t l2_size;
-
- table_entries = (table_size * cluster_size) / sizeof(uint64_t);
- l2_size = table_entries * cluster_size;
-
- return l2_size * table_entries;
-}
-
-static bool qed_is_cluster_size_valid(uint32_t cluster_size)
-{
- if (cluster_size < QED_MIN_CLUSTER_SIZE ||
- cluster_size > QED_MAX_CLUSTER_SIZE) {
- return false;
- }
- if (cluster_size & (cluster_size - 1)) {
- return false; /* not power of 2 */
- }
- return true;
-}
-
-static bool qed_is_table_size_valid(uint32_t table_size)
-{
- if (table_size < QED_MIN_TABLE_SIZE ||
- table_size > QED_MAX_TABLE_SIZE) {
- return false;
- }
- if (table_size & (table_size - 1)) {
- return false; /* not power of 2 */
- }
- return true;
-}
-
-static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
- uint32_t table_size)
-{
- if (image_size % BDRV_SECTOR_SIZE != 0) {
- return false; /* not multiple of sector size */
- }
- if (image_size > qed_max_image_size(cluster_size, table_size)) {
- return false; /* image is too large */
- }
- return true;
-}
-
-/**
- * Read a string of known length from the image file
- *
- * @file: Image file
- * @offset: File offset to start of string, in bytes
- * @n: String length in bytes
- * @buf: Destination buffer
- * @buflen: Destination buffer length in bytes
- * @ret: 0 on success, -errno on failure
- *
- * The string is NUL-terminated.
- */
-static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
- char *buf, size_t buflen)
-{
- int ret;
- if (n >= buflen) {
- return -EINVAL;
- }
- ret = bdrv_pread(file, offset, buf, n);
- if (ret < 0) {
- return ret;
- }
- buf[n] = '\0';
- return 0;
-}
-
-/**
- * Allocate new clusters
- *
- * @s: QED state
- * @n: Number of contiguous clusters to allocate
- * @ret: Offset of first allocated cluster
- *
- * This function only produces the offset where the new clusters should be
- * written. It updates BDRVQEDState but does not make any changes to the image
- * file.
- */
-static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
-{
- uint64_t offset = s->file_size;
- s->file_size += n * s->header.cluster_size;
- return offset;
-}
-
-QEDTable *qed_alloc_table(BDRVQEDState *s)
-{
- /* Honor O_DIRECT memory alignment requirements */
- return qemu_blockalign(s->bs,
- s->header.cluster_size * s->header.table_size);
-}
-
-/**
- * Allocate a new zeroed L2 table
- */
-static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
-{
- CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
-
- l2_table->table = qed_alloc_table(s);
- l2_table->offset = qed_alloc_clusters(s, s->header.table_size);
-
- memset(l2_table->table->offsets, 0,
- s->header.cluster_size * s->header.table_size);
- return l2_table;
-}
-
-static void qed_aio_next_io(void *opaque, int ret);
-
-static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
-{
- assert(!s->allocating_write_reqs_plugged);
-
- s->allocating_write_reqs_plugged = true;
-}
-
-static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
-{
- QEDAIOCB *acb;
-
- assert(s->allocating_write_reqs_plugged);
-
- s->allocating_write_reqs_plugged = false;
-
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
- if (acb) {
- qed_aio_next_io(acb, 0);
- }
-}
-
-static void qed_finish_clear_need_check(void *opaque, int ret)
-{
- /* Do nothing */
-}
-
-static void qed_flush_after_clear_need_check(void *opaque, int ret)
-{
- BDRVQEDState *s = opaque;
-
- bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
-
- /* No need to wait until flush completes */
- qed_unplug_allocating_write_reqs(s);
-}
-
-static void qed_clear_need_check(void *opaque, int ret)
-{
- BDRVQEDState *s = opaque;
-
- if (ret) {
- qed_unplug_allocating_write_reqs(s);
- return;
- }
-
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header(s, qed_flush_after_clear_need_check, s);
-}
-
-static void qed_need_check_timer_cb(void *opaque)
-{
- BDRVQEDState *s = opaque;
-
- /* The timer should only fire when allocating writes have drained */
- assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
-
- trace_qed_need_check_timer_cb(s);
-
- qed_plug_allocating_write_reqs(s);
-
- /* Ensure writes are on disk before clearing flag */
- bdrv_aio_flush(s->bs, qed_clear_need_check, s);
-}
-
-static void qed_start_need_check_timer(BDRVQEDState *s)
-{
- trace_qed_start_need_check_timer(s);
-
- /* Use vm_clock so we don't alter the image file while suspended for
- * migration.
- */
- qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
- get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
-}
-
-/* It's okay to call this multiple times or when no timer is started */
-static void qed_cancel_need_check_timer(BDRVQEDState *s)
-{
- trace_qed_cancel_need_check_timer(s);
- qemu_del_timer(s->need_check_timer);
-}
-
-static void bdrv_qed_rebind(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- s->bs = bs;
-}
-
-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQEDState *s = bs->opaque;
- QEDHeader le_header;
- int64_t file_size;
- int ret;
-
- s->bs = bs;
- QSIMPLEQ_INIT(&s->allocating_write_reqs);
-
- ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
- if (ret < 0) {
- return ret;
- }
- qed_header_le_to_cpu(&le_header, &s->header);
-
- if (s->header.magic != QED_MAGIC) {
- return -EMEDIUMTYPE;
- }
- if (s->header.features & ~QED_FEATURE_MASK) {
- /* image uses unsupported feature bits */
- char buf[64];
- snprintf(buf, sizeof(buf), "%" PRIx64,
- s->header.features & ~QED_FEATURE_MASK);
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "QED", buf);
- return -ENOTSUP;
- }
- if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
- return -EINVAL;
- }
-
- /* Round down file size to the last cluster */
- file_size = bdrv_getlength(bs->file);
- if (file_size < 0) {
- return file_size;
- }
- s->file_size = qed_start_of_cluster(s, file_size);
-
- if (!qed_is_table_size_valid(s->header.table_size)) {
- return -EINVAL;
- }
- if (!qed_is_image_size_valid(s->header.image_size,
- s->header.cluster_size,
- s->header.table_size)) {
- return -EINVAL;
- }
- if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
- return -EINVAL;
- }
-
- s->table_nelems = (s->header.cluster_size * s->header.table_size) /
- sizeof(uint64_t);
- s->l2_shift = ffs(s->header.cluster_size) - 1;
- s->l2_mask = s->table_nelems - 1;
- s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
-
- if ((s->header.features & QED_F_BACKING_FILE)) {
- if ((uint64_t)s->header.backing_filename_offset +
- s->header.backing_filename_size >
- s->header.cluster_size * s->header.header_size) {
- return -EINVAL;
- }
-
- ret = qed_read_string(bs->file, s->header.backing_filename_offset,
- s->header.backing_filename_size, bs->backing_file,
- sizeof(bs->backing_file));
- if (ret < 0) {
- return ret;
- }
-
- if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
- pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
- }
- }
-
- /* Reset unknown autoclear feature bits. This is a backwards
- * compatibility mechanism that allows images to be opened by older
- * programs, which "knock out" unknown feature bits. When an image is
- * opened by a newer program again it can detect that the autoclear
- * feature is no longer valid.
- */
- if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
- !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
- s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
-
- ret = qed_write_header_sync(s);
- if (ret) {
- return ret;
- }
-
- /* From here on only known autoclear feature bits are valid */
- bdrv_flush(bs->file);
- }
-
- s->l1_table = qed_alloc_table(s);
- qed_init_l2_cache(&s->l2_cache);
-
- ret = qed_read_l1_table_sync(s);
- if (ret) {
- goto out;
- }
-
- /* If image was not closed cleanly, check consistency */
- if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
- /* Read-only images cannot be fixed. There is no risk of corruption
- * since write operations are not possible. Therefore, allow
- * potentially inconsistent images to be opened read-only. This can
- * aid data recovery from an otherwise inconsistent image.
- */
- if (!bdrv_is_read_only(bs->file) &&
- !(flags & BDRV_O_INCOMING)) {
- BdrvCheckResult result = {0};
-
- ret = qed_check(s, &result, true);
- if (ret) {
- goto out;
- }
- }
- }
-
- s->need_check_timer = qemu_new_timer_ns(vm_clock,
- qed_need_check_timer_cb, s);
-
-out:
- if (ret) {
- qed_free_l2_cache(&s->l2_cache);
- qemu_vfree(s->l1_table);
- }
- return ret;
-}
-
-/* We have nothing to do for QED reopen, stubs just return
- * success */
-static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static void bdrv_qed_close(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
-
- qed_cancel_need_check_timer(s);
- qemu_free_timer(s->need_check_timer);
-
- /* Ensure writes reach stable storage */
- bdrv_flush(bs->file);
-
- /* Clean shutdown, no check required on next open */
- if (s->header.features & QED_F_NEED_CHECK) {
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header_sync(s);
- }
-
- qed_free_l2_cache(&s->l2_cache);
- qemu_vfree(s->l1_table);
-}
-
-static int qed_create(const char *filename, uint32_t cluster_size,
- uint64_t image_size, uint32_t table_size,
- const char *backing_file, const char *backing_fmt)
-{
- QEDHeader header = {
- .magic = QED_MAGIC,
- .cluster_size = cluster_size,
- .table_size = table_size,
- .header_size = 1,
- .features = 0,
- .compat_features = 0,
- .l1_table_offset = cluster_size,
- .image_size = image_size,
- };
- QEDHeader le_header;
- uint8_t *l1_table = NULL;
- size_t l1_size = header.cluster_size * header.table_size;
- int ret = 0;
- BlockDriverState *bs = NULL;
-
- ret = bdrv_create_file(filename, NULL);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB);
- if (ret < 0) {
- return ret;
- }
-
- /* File must start empty and grow, check truncate is supported */
- ret = bdrv_truncate(bs, 0);
- if (ret < 0) {
- goto out;
- }
-
- if (backing_file) {
- header.features |= QED_F_BACKING_FILE;
- header.backing_filename_offset = sizeof(le_header);
- header.backing_filename_size = strlen(backing_file);
-
- if (qed_fmt_is_raw(backing_fmt)) {
- header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
- }
- }
-
- qed_header_cpu_to_le(&header, &le_header);
- ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
- if (ret < 0) {
- goto out;
- }
- ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
- header.backing_filename_size);
- if (ret < 0) {
- goto out;
- }
-
- l1_table = g_malloc0(l1_size);
- ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
- if (ret < 0) {
- goto out;
- }
-
- ret = 0; /* success */
-out:
- g_free(l1_table);
- bdrv_delete(bs);
- return ret;
-}
-
-static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
-{
- uint64_t image_size = 0;
- uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
- uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
- const char *backing_file = NULL;
- const char *backing_fmt = NULL;
-
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- image_size = options->value.n;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
- backing_fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- cluster_size = options->value.n;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
- if (options->value.n) {
- table_size = options->value.n;
- }
- }
- options++;
- }
-
- if (!qed_is_cluster_size_valid(cluster_size)) {
- fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
- QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
- return -EINVAL;
- }
- if (!qed_is_table_size_valid(table_size)) {
- fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
- QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
- return -EINVAL;
- }
- if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
- fprintf(stderr, "QED image size must be a non-zero multiple of "
- "cluster size and less than %" PRIu64 " bytes\n",
- qed_max_image_size(cluster_size, table_size));
- return -EINVAL;
- }
-
- return qed_create(filename, cluster_size, image_size, table_size,
- backing_file, backing_fmt);
-}
-
-typedef struct {
- Coroutine *co;
- int is_allocated;
- int *pnum;
-} QEDIsAllocatedCB;
-
-static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
-{
- QEDIsAllocatedCB *cb = opaque;
- *cb->pnum = len / BDRV_SECTOR_SIZE;
- cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
- if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
- }
-}
-
-static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- BDRVQEDState *s = bs->opaque;
- uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
- size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
- QEDIsAllocatedCB cb = {
- .is_allocated = -1,
- .pnum = pnum,
- };
- QEDRequest request = { .l2_table = NULL };
-
- qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
-
- /* Now sleep if the callback wasn't invoked immediately */
- while (cb.is_allocated == -1) {
- cb.co = qemu_coroutine_self();
- qemu_coroutine_yield();
- }
-
- qed_unref_l2_cache_entry(request.l2_table);
-
- return cb.is_allocated;
-}
-
-static int bdrv_qed_make_empty(BlockDriverState *bs)
-{
- return -ENOTSUP;
-}
-
-static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
-{
- return acb->common.bs->opaque;
-}
-
-/**
- * Read from the backing file or zero-fill if no backing file
- *
- * @s: QED state
- * @pos: Byte position in device
- * @qiov: Destination I/O vector
- * @cb: Completion function
- * @opaque: User data for completion function
- *
- * This function reads qiov->size bytes starting at pos from the backing file.
- * If there is no backing file then zeroes are read.
- */
-static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
- QEMUIOVector *qiov,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- uint64_t backing_length = 0;
- size_t size;
-
- /* If there is a backing file, get its length. Treat the absence of a
- * backing file like a zero length backing file.
- */
- if (s->bs->backing_hd) {
- int64_t l = bdrv_getlength(s->bs->backing_hd);
- if (l < 0) {
- cb(opaque, l);
- return;
- }
- backing_length = l;
- }
-
- /* Zero all sectors if reading beyond the end of the backing file */
- if (pos >= backing_length ||
- pos + qiov->size > backing_length) {
- qemu_iovec_memset(qiov, 0, 0, qiov->size);
- }
-
- /* Complete now if there are no backing file sectors to read */
- if (pos >= backing_length) {
- cb(opaque, 0);
- return;
- }
-
- /* If the read straddles the end of the backing file, shorten it */
- size = MIN((uint64_t)backing_length - pos, qiov->size);
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
- bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
- qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEMUIOVector qiov;
- struct iovec iov;
- uint64_t offset;
-} CopyFromBackingFileCB;
-
-static void qed_copy_from_backing_file_cb(void *opaque, int ret)
-{
- CopyFromBackingFileCB *copy_cb = opaque;
- qemu_vfree(copy_cb->iov.iov_base);
- gencb_complete(&copy_cb->gencb, ret);
-}
-
-static void qed_copy_from_backing_file_write(void *opaque, int ret)
-{
- CopyFromBackingFileCB *copy_cb = opaque;
- BDRVQEDState *s = copy_cb->s;
-
- if (ret) {
- qed_copy_from_backing_file_cb(copy_cb, ret);
- return;
- }
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
- bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
- &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
- qed_copy_from_backing_file_cb, copy_cb);
-}
-
-/**
- * Copy data from backing file into the image
- *
- * @s: QED state
- * @pos: Byte position in device
- * @len: Number of bytes
- * @offset: Byte offset in image file
- * @cb: Completion function
- * @opaque: User data for completion function
- */
-static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
- uint64_t len, uint64_t offset,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- CopyFromBackingFileCB *copy_cb;
-
- /* Skip copy entirely if there is no work to do */
- if (len == 0) {
- cb(opaque, 0);
- return;
- }
-
- copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
- copy_cb->s = s;
- copy_cb->offset = offset;
- copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
- copy_cb->iov.iov_len = len;
- qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
-
- qed_read_backing_file(s, pos, &copy_cb->qiov,
- qed_copy_from_backing_file_write, copy_cb);
-}
-
-/**
- * Link one or more contiguous clusters into a table
- *
- * @s: QED state
- * @table: L2 table
- * @index: First cluster index
- * @n: Number of contiguous clusters
- * @cluster: First cluster offset
- *
- * The cluster offset may be an allocated byte offset in the image file, the
- * zero cluster marker, or the unallocated cluster marker.
- */
-static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
- unsigned int n, uint64_t cluster)
-{
- int i;
- for (i = index; i < index + n; i++) {
- table->offsets[i] = cluster;
- if (!qed_offset_is_unalloc_cluster(cluster) &&
- !qed_offset_is_zero_cluster(cluster)) {
- cluster += s->header.cluster_size;
- }
- }
-}
-
-static void qed_aio_complete_bh(void *opaque)
-{
- QEDAIOCB *acb = opaque;
- BlockDriverCompletionFunc *cb = acb->common.cb;
- void *user_opaque = acb->common.opaque;
- int ret = acb->bh_ret;
- bool *finished = acb->finished;
-
- qemu_bh_delete(acb->bh);
- qemu_aio_release(acb);
-
- /* Invoke callback */
- cb(user_opaque, ret);
-
- /* Signal cancel completion */
- if (finished) {
- *finished = true;
- }
-}
-
-static void qed_aio_complete(QEDAIOCB *acb, int ret)
-{
- BDRVQEDState *s = acb_to_s(acb);
-
- trace_qed_aio_complete(s, acb, ret);
-
- /* Free resources */
- qemu_iovec_destroy(&acb->cur_qiov);
- qed_unref_l2_cache_entry(acb->request.l2_table);
-
- /* Free the buffer we may have allocated for zero writes */
- if (acb->flags & QED_AIOCB_ZERO) {
- qemu_vfree(acb->qiov->iov[0].iov_base);
- acb->qiov->iov[0].iov_base = NULL;
- }
-
- /* Arrange for a bh to invoke the completion function */
- acb->bh_ret = ret;
- acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
- qemu_bh_schedule(acb->bh);
-
- /* Start next allocating write request waiting behind this one. Note that
- * requests enqueue themselves when they first hit an unallocated cluster
- * but they wait until the entire request is finished before waking up the
- * next request in the queue. This ensures that we don't cycle through
- * requests multiple times but rather finish one at a time completely.
- */
- if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
- QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
- if (acb) {
- qed_aio_next_io(acb, 0);
- } else if (s->header.features & QED_F_NEED_CHECK) {
- qed_start_need_check_timer(s);
- }
- }
-}
-
-/**
- * Commit the current L2 table to the cache
- */
-static void qed_commit_l2_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- CachedL2Table *l2_table = acb->request.l2_table;
- uint64_t l2_offset = l2_table->offset;
-
- qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
- /* This is guaranteed to succeed because we just committed the entry to the
- * cache.
- */
- acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
- assert(acb->request.l2_table != NULL);
-
- qed_aio_next_io(opaque, ret);
-}
-
-/**
- * Update L1 table with new L2 table offset and write it out
- */
-static void qed_aio_write_l1_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- int index;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- index = qed_l1_index(s, acb->cur_pos);
- s->l1_table->offsets[index] = acb->request.l2_table->offset;
-
- qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
-}
-
-/**
- * Update L2 table with new cluster offsets and write them out
- */
-static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
-{
- BDRVQEDState *s = acb_to_s(acb);
- bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
- int index;
-
- if (ret) {
- goto err;
- }
-
- if (need_alloc) {
- qed_unref_l2_cache_entry(acb->request.l2_table);
- acb->request.l2_table = qed_new_l2_table(s);
- }
-
- index = qed_l2_index(s, acb->cur_pos);
- qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
- offset);
-
- if (need_alloc) {
- /* Write out the whole new L2 table */
- qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
- qed_aio_write_l1_update, acb);
- } else {
- /* Write out only the updated part of the L2 table */
- qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
- qed_aio_next_io, acb);
- }
- return;
-
-err:
- qed_aio_complete(acb, ret);
-}
-
-static void qed_aio_write_l2_update_cb(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
-}
-
-/**
- * Flush new data clusters before updating the L2 table
- *
- * This flush is necessary when a backing file is in use. A crash during an
- * allocating write could result in empty clusters in the image. If the write
- * only touched a subregion of the cluster, then backing image sectors have
- * been lost in the untouched region. The solution is to flush after writing a
- * new data cluster and before updating the L2 table.
- */
-static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
-
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
- qed_aio_complete(acb, -EIO);
- }
-}
-
-/**
- * Write data to the image file
- */
-static void qed_aio_write_main(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t offset = acb->cur_cluster +
- qed_offset_into_cluster(s, acb->cur_pos);
- BlockDriverCompletionFunc *next_fn;
-
- trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
- next_fn = qed_aio_next_io;
- } else {
- if (s->bs->backing_hd) {
- next_fn = qed_aio_write_flush_before_l2_update;
- } else {
- next_fn = qed_aio_write_l2_update_cb;
- }
- }
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- next_fn, acb);
-}
-
-/**
- * Populate back untouched region of new data cluster
- */
-static void qed_aio_write_postfill(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t start = acb->cur_pos + acb->cur_qiov.size;
- uint64_t len =
- qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
- uint64_t offset = acb->cur_cluster +
- qed_offset_into_cluster(s, acb->cur_pos) +
- acb->cur_qiov.size;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- trace_qed_aio_write_postfill(s, acb, start, len, offset);
- qed_copy_from_backing_file(s, start, len, offset,
- qed_aio_write_main, acb);
-}
-
-/**
- * Populate front untouched region of new data cluster
- */
-static void qed_aio_write_prefill(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
- uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
-
- trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
- qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
- qed_aio_write_postfill, acb);
-}
-
-/**
- * Check if the QED_F_NEED_CHECK bit should be set during allocating write
- */
-static bool qed_should_set_need_check(BDRVQEDState *s)
-{
- /* The flush before L2 update path ensures consistency */
- if (s->bs->backing_hd) {
- return false;
- }
-
- return !(s->header.features & QED_F_NEED_CHECK);
-}
-
-static void qed_aio_write_zero_cluster(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- qed_aio_write_l2_update(acb, 0, 1);
-}
-
-/**
- * Write new data cluster
- *
- * @acb: Write request
- * @len: Length in bytes
- *
- * This path is taken when writing to previously unallocated clusters.
- */
-static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
-{
- BDRVQEDState *s = acb_to_s(acb);
- BlockDriverCompletionFunc *cb;
-
- /* Cancel timer when the first allocating request comes in */
- if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
- qed_cancel_need_check_timer(s);
- }
-
- /* Freeze this request if another allocating write is in progress */
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
- QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
- }
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
- s->allocating_write_reqs_plugged) {
- return; /* wait for existing request to finish */
- }
-
- acb->cur_nclusters = qed_bytes_to_clusters(s,
- qed_offset_into_cluster(s, acb->cur_pos) + len);
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- if (acb->flags & QED_AIOCB_ZERO) {
- /* Skip ahead if the clusters are already zero */
- if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
- qed_aio_next_io(acb, 0);
- return;
- }
-
- cb = qed_aio_write_zero_cluster;
- } else {
- cb = qed_aio_write_prefill;
- acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
- }
-
- if (qed_should_set_need_check(s)) {
- s->header.features |= QED_F_NEED_CHECK;
- qed_write_header(s, cb, acb);
- } else {
- cb(acb, 0);
- }
-}
-
-/**
- * Write data cluster in place
- *
- * @acb: Write request
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * This path is taken when writing to already allocated clusters.
- */
-static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
-{
- /* Allocate buffer for zero writes */
- if (acb->flags & QED_AIOCB_ZERO) {
- struct iovec *iov = acb->qiov->iov;
-
- if (!iov->iov_base) {
- iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
- memset(iov->iov_base, 0, iov->iov_len);
- }
- }
-
- /* Calculate the I/O vector */
- acb->cur_cluster = offset;
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- /* Do the actual write */
- qed_aio_write_main(acb, 0);
-}
-
-/**
- * Write data cluster
- *
- * @opaque: Write request
- * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- * or -errno
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * Callback from qed_find_cluster().
- */
-static void qed_aio_write_data(void *opaque, int ret,
- uint64_t offset, size_t len)
-{
- QEDAIOCB *acb = opaque;
-
- trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len);
-
- acb->find_cluster_ret = ret;
-
- switch (ret) {
- case QED_CLUSTER_FOUND:
- qed_aio_write_inplace(acb, offset, len);
- break;
-
- case QED_CLUSTER_L2:
- case QED_CLUSTER_L1:
- case QED_CLUSTER_ZERO:
- qed_aio_write_alloc(acb, len);
- break;
-
- default:
- qed_aio_complete(acb, ret);
- break;
- }
-}
-
-/**
- * Read data cluster
- *
- * @opaque: Read request
- * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- * or -errno
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * Callback from qed_find_cluster().
- */
-static void qed_aio_read_data(void *opaque, int ret,
- uint64_t offset, size_t len)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- BlockDriverState *bs = acb->common.bs;
-
- /* Adjust offset into cluster */
- offset += qed_offset_into_cluster(s, acb->cur_pos);
-
- trace_qed_aio_read_data(s, acb, ret, offset, len);
-
- if (ret < 0) {
- goto err;
- }
-
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- /* Handle zero cluster and backing file reads */
- if (ret == QED_CLUSTER_ZERO) {
- qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
- qed_aio_next_io(acb, 0);
- return;
- } else if (ret != QED_CLUSTER_FOUND) {
- qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
- qed_aio_next_io, acb);
- return;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- qed_aio_next_io, acb);
- return;
-
-err:
- qed_aio_complete(acb, ret);
-}
-
-/**
- * Begin next I/O or complete the request
- */
-static void qed_aio_next_io(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
- qed_aio_write_data : qed_aio_read_data;
-
- trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
-
- /* Handle I/O error */
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- acb->qiov_offset += acb->cur_qiov.size;
- acb->cur_pos += acb->cur_qiov.size;
- qemu_iovec_reset(&acb->cur_qiov);
-
- /* Complete request */
- if (acb->cur_pos >= acb->end_pos) {
- qed_aio_complete(acb, 0);
- return;
- }
-
- /* Find next cluster and start I/O */
- qed_find_cluster(s, &acb->request,
- acb->cur_pos, acb->end_pos - acb->cur_pos,
- io_fn, acb);
-}
-
-static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque, int flags)
-{
- QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
-
- trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
- opaque, flags);
-
- acb->flags = flags;
- acb->finished = NULL;
- acb->qiov = qiov;
- acb->qiov_offset = 0;
- acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
- acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
- acb->request.l2_table = NULL;
- qemu_iovec_init(&acb->cur_qiov, qiov->niov);
-
- /* Start request */
- qed_aio_next_io(acb, 0);
- return &acb->common;
-}
-
-static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
- opaque, QED_AIOCB_WRITE);
-}
-
-typedef struct {
- Coroutine *co;
- int ret;
- bool done;
-} QEDWriteZeroesCB;
-
-static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
-{
- QEDWriteZeroesCB *cb = opaque;
-
- cb->done = true;
- cb->ret = ret;
- if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
- }
-}
-
-static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors)
-{
- BlockDriverAIOCB *blockacb;
- BDRVQEDState *s = bs->opaque;
- QEDWriteZeroesCB cb = { .done = false };
- QEMUIOVector qiov;
- struct iovec iov;
-
- /* Refuse if there are untouched backing file sectors */
- if (bs->backing_hd) {
- if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
- if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
- }
-
- /* Zero writes start without an I/O buffer. If a buffer becomes necessary
- * then it will be allocated during request processing.
- */
- iov.iov_base = NULL,
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
- qed_co_write_zeroes_cb, &cb,
- QED_AIOCB_WRITE | QED_AIOCB_ZERO);
- if (!blockacb) {
- return -EIO;
- }
- if (!cb.done) {
- cb.co = qemu_coroutine_self();
- qemu_coroutine_yield();
- }
- assert(cb.done);
- return cb.ret;
-}
-
-static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
-{
- BDRVQEDState *s = bs->opaque;
- uint64_t old_image_size;
- int ret;
-
- if (!qed_is_image_size_valid(offset, s->header.cluster_size,
- s->header.table_size)) {
- return -EINVAL;
- }
-
- /* Shrinking is currently not supported */
- if ((uint64_t)offset < s->header.image_size) {
- return -ENOTSUP;
- }
-
- old_image_size = s->header.image_size;
- s->header.image_size = offset;
- ret = qed_write_header_sync(s);
- if (ret < 0) {
- s->header.image_size = old_image_size;
- }
- return ret;
-}
-
-static int64_t bdrv_qed_getlength(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- return s->header.image_size;
-}
-
-static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQEDState *s = bs->opaque;
-
- memset(bdi, 0, sizeof(*bdi));
- bdi->cluster_size = s->header.cluster_size;
- bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
- return 0;
-}
-
-static int bdrv_qed_change_backing_file(BlockDriverState *bs,
- const char *backing_file,
- const char *backing_fmt)
-{
- BDRVQEDState *s = bs->opaque;
- QEDHeader new_header, le_header;
- void *buffer;
- size_t buffer_len, backing_file_len;
- int ret;
-
- /* Refuse to set backing filename if unknown compat feature bits are
- * active. If the image uses an unknown compat feature then we may not
- * know the layout of data following the header structure and cannot safely
- * add a new string.
- */
- if (backing_file && (s->header.compat_features &
- ~QED_COMPAT_FEATURE_MASK)) {
- return -ENOTSUP;
- }
-
- memcpy(&new_header, &s->header, sizeof(new_header));
-
- new_header.features &= ~(QED_F_BACKING_FILE |
- QED_F_BACKING_FORMAT_NO_PROBE);
-
- /* Adjust feature flags */
- if (backing_file) {
- new_header.features |= QED_F_BACKING_FILE;
-
- if (qed_fmt_is_raw(backing_fmt)) {
- new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
- }
- }
-
- /* Calculate new header size */
- backing_file_len = 0;
-
- if (backing_file) {
- backing_file_len = strlen(backing_file);
- }
-
- buffer_len = sizeof(new_header);
- new_header.backing_filename_offset = buffer_len;
- new_header.backing_filename_size = backing_file_len;
- buffer_len += backing_file_len;
-
- /* Make sure we can rewrite header without failing */
- if (buffer_len > new_header.header_size * new_header.cluster_size) {
- return -ENOSPC;
- }
-
- /* Prepare new header */
- buffer = g_malloc(buffer_len);
-
- qed_header_cpu_to_le(&new_header, &le_header);
- memcpy(buffer, &le_header, sizeof(le_header));
- buffer_len = sizeof(le_header);
-
- if (backing_file) {
- memcpy(buffer + buffer_len, backing_file, backing_file_len);
- buffer_len += backing_file_len;
- }
-
- /* Write new header */
- ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
- g_free(buffer);
- if (ret == 0) {
- memcpy(&s->header, &new_header, sizeof(new_header));
- }
- return ret;
-}
-
-static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
-
- bdrv_qed_close(bs);
- memset(s, 0, sizeof(BDRVQEDState));
- bdrv_qed_open(bs, NULL, bs->open_flags);
-}
-
-static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
- BdrvCheckMode fix)
-{
- BDRVQEDState *s = bs->opaque;
-
- return qed_check(s, result, !!fix);
-}
-
-static QEMUOptionParameter qed_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size (in bytes)"
- }, {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- }, {
- .name = BLOCK_OPT_BACKING_FMT,
- .type = OPT_STRING,
- .help = "Image format of the base image"
- }, {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "Cluster size (in bytes)",
- .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
- }, {
- .name = BLOCK_OPT_TABLE_SIZE,
- .type = OPT_SIZE,
- .help = "L1/L2 table size (in clusters)"
- },
- { /* end of list */ }
-};
-
-static BlockDriver bdrv_qed = {
- .format_name = "qed",
- .instance_size = sizeof(BDRVQEDState),
- .create_options = qed_create_options,
-
- .bdrv_probe = bdrv_qed_probe,
- .bdrv_rebind = bdrv_qed_rebind,
- .bdrv_open = bdrv_qed_open,
- .bdrv_close = bdrv_qed_close,
- .bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
- .bdrv_create = bdrv_qed_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
- .bdrv_make_empty = bdrv_qed_make_empty,
- .bdrv_aio_readv = bdrv_qed_aio_readv,
- .bdrv_aio_writev = bdrv_qed_aio_writev,
- .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
- .bdrv_truncate = bdrv_qed_truncate,
- .bdrv_getlength = bdrv_qed_getlength,
- .bdrv_get_info = bdrv_qed_get_info,
- .bdrv_change_backing_file = bdrv_qed_change_backing_file,
- .bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
- .bdrv_check = bdrv_qed_check,
-};
-
-static void bdrv_qed_init(void)
-{
- bdrv_register(&bdrv_qed);
-}
-
-block_init(bdrv_qed_init);
diff --git a/contrib/qemu/block/qed.h b/contrib/qemu/block/qed.h
deleted file mode 100644
index 2b4ddedf313..00000000000
--- a/contrib/qemu/block/qed.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef BLOCK_QED_H
-#define BLOCK_QED_H
-
-#include "block/block_int.h"
-
-/* The layout of a QED file is as follows:
- *
- * +--------+----------+----------+----------+-----+
- * | header | L1 table | cluster0 | cluster1 | ... |
- * +--------+----------+----------+----------+-----+
- *
- * There is a 2-level pagetable for cluster allocation:
- *
- * +----------+
- * | L1 table |
- * +----------+
- * ,------' | '------.
- * +----------+ | +----------+
- * | L2 table | ... | L2 table |
- * +----------+ +----------+
- * ,------' | '------.
- * +----------+ | +----------+
- * | Data | ... | Data |
- * +----------+ +----------+
- *
- * The L1 table is fixed size and always present. L2 tables are allocated on
- * demand. The L1 table size determines the maximum possible image size; it
- * can be influenced using the cluster_size and table_size values.
- *
- * All fields are little-endian on disk.
- */
-
-enum {
- QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
-
- /* The image supports a backing file */
- QED_F_BACKING_FILE = 0x01,
-
- /* The image needs a consistency check before use */
- QED_F_NEED_CHECK = 0x02,
-
- /* The backing file format must not be probed, treat as raw image */
- QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
-
- /* Feature bits must be used when the on-disk format changes */
- QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
- QED_F_NEED_CHECK |
- QED_F_BACKING_FORMAT_NO_PROBE,
- QED_COMPAT_FEATURE_MASK = 0, /* supported compat feature bits */
- QED_AUTOCLEAR_FEATURE_MASK = 0, /* supported autoclear feature bits */
-
- /* Data is stored in groups of sectors called clusters. Cluster size must
- * be large to avoid keeping too much metadata. I/O requests that have
- * sub-cluster size will require read-modify-write.
- */
- QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
- QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
- QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
-
- /* Allocated clusters are tracked using a 2-level pagetable. Table size is
- * a multiple of clusters so large maximum image sizes can be supported
- * without jacking up the cluster size too much.
- */
- QED_MIN_TABLE_SIZE = 1, /* in clusters */
- QED_MAX_TABLE_SIZE = 16,
- QED_DEFAULT_TABLE_SIZE = 4,
-
- /* Delay to flush and clean image after last allocating write completes */
- QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */
-};
-
-typedef struct {
- uint32_t magic; /* QED\0 */
-
- uint32_t cluster_size; /* in bytes */
- uint32_t table_size; /* for L1 and L2 tables, in clusters */
- uint32_t header_size; /* in clusters */
-
- uint64_t features; /* format feature bits */
- uint64_t compat_features; /* compatible feature bits */
- uint64_t autoclear_features; /* self-resetting feature bits */
-
- uint64_t l1_table_offset; /* in bytes */
- uint64_t image_size; /* total logical image size, in bytes */
-
- /* if (features & QED_F_BACKING_FILE) */
- uint32_t backing_filename_offset; /* in bytes from start of header */
- uint32_t backing_filename_size; /* in bytes */
-} QEDHeader;
-
-typedef struct {
- uint64_t offsets[0]; /* in bytes */
-} QEDTable;
-
-/* The L2 cache is a simple write-through cache for L2 structures */
-typedef struct CachedL2Table {
- QEDTable *table;
- uint64_t offset; /* offset=0 indicates an invalidate entry */
- QTAILQ_ENTRY(CachedL2Table) node;
- int ref;
-} CachedL2Table;
-
-typedef struct {
- QTAILQ_HEAD(, CachedL2Table) entries;
- unsigned int n_entries;
-} L2TableCache;
-
-typedef struct QEDRequest {
- CachedL2Table *l2_table;
-} QEDRequest;
-
-enum {
- QED_AIOCB_WRITE = 0x0001, /* read or write? */
- QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */
-};
-
-typedef struct QEDAIOCB {
- BlockDriverAIOCB common;
- QEMUBH *bh;
- int bh_ret; /* final return status for completion bh */
- QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
- int flags; /* QED_AIOCB_* bits ORed together */
- bool *finished; /* signal for cancel completion */
- uint64_t end_pos; /* request end on block device, in bytes */
-
- /* User scatter-gather list */
- QEMUIOVector *qiov;
- size_t qiov_offset; /* byte count already processed */
-
- /* Current cluster scatter-gather list */
- QEMUIOVector cur_qiov;
- uint64_t cur_pos; /* position on block device, in bytes */
- uint64_t cur_cluster; /* cluster offset in image file */
- unsigned int cur_nclusters; /* number of clusters being accessed */
- int find_cluster_ret; /* used for L1/L2 update */
-
- QEDRequest request;
-} QEDAIOCB;
-
-typedef struct {
- BlockDriverState *bs; /* device */
- uint64_t file_size; /* length of image file, in bytes */
-
- QEDHeader header; /* always cpu-endian */
- QEDTable *l1_table;
- L2TableCache l2_cache; /* l2 table cache */
- uint32_t table_nelems;
- uint32_t l1_shift;
- uint32_t l2_shift;
- uint32_t l2_mask;
-
- /* Allocating write request queue */
- QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
- bool allocating_write_reqs_plugged;
-
- /* Periodic flush and clear need check flag */
- QEMUTimer *need_check_timer;
-} BDRVQEDState;
-
-enum {
- QED_CLUSTER_FOUND, /* cluster found */
- QED_CLUSTER_ZERO, /* zero cluster found */
- QED_CLUSTER_L2, /* cluster missing in L2 */
- QED_CLUSTER_L1, /* cluster missing in L1 */
-};
-
-/**
- * qed_find_cluster() completion callback
- *
- * @opaque: User data for completion callback
- * @ret: QED_CLUSTER_FOUND Success
- * QED_CLUSTER_L2 Data cluster unallocated in L2
- * QED_CLUSTER_L1 L2 unallocated in L1
- * -errno POSIX error occurred
- * @offset: Data cluster offset
- * @len: Contiguous bytes starting from cluster offset
- *
- * This function is invoked when qed_find_cluster() completes.
- *
- * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
- * in the image file.
- *
- * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
- * table offset, respectively. len is number of contiguous unallocated bytes.
- */
-typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
-
-/**
- * Generic callback for chaining async callbacks
- */
-typedef struct {
- BlockDriverCompletionFunc *cb;
- void *opaque;
-} GenericCB;
-
-void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
-void gencb_complete(void *opaque, int ret);
-
-/**
- * Header functions
- */
-int qed_write_header_sync(BDRVQEDState *s);
-
-/**
- * L2 cache functions
- */
-void qed_init_l2_cache(L2TableCache *l2_cache);
-void qed_free_l2_cache(L2TableCache *l2_cache);
-CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
-void qed_unref_l2_cache_entry(CachedL2Table *entry);
-CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
-void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
-
-/**
- * Table I/O functions
- */
-int qed_read_l1_table_sync(BDRVQEDState *s);
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
- BlockDriverCompletionFunc *cb, void *opaque);
-int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
- unsigned int n);
-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- uint64_t offset);
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
- BlockDriverCompletionFunc *cb, void *opaque);
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque);
-int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush);
-
-/**
- * Cluster functions
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
- size_t len, QEDFindClusterFunc *cb, void *opaque);
-
-/**
- * Consistency check
- */
-int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
-
-QEDTable *qed_alloc_table(BDRVQEDState *s);
-
-/**
- * Round down to the start of a cluster
- */
-static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
-{
- return offset & ~(uint64_t)(s->header.cluster_size - 1);
-}
-
-static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
-{
- return offset & (s->header.cluster_size - 1);
-}
-
-static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
-{
- return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
- (s->header.cluster_size - 1);
-}
-
-static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
-{
- return pos >> s->l1_shift;
-}
-
-static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
-{
- return (pos >> s->l2_shift) & s->l2_mask;
-}
-
-/**
- * Test if a cluster offset is valid
- */
-static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
-{
- uint64_t header_size = (uint64_t)s->header.header_size *
- s->header.cluster_size;
-
- if (offset & (s->header.cluster_size - 1)) {
- return false;
- }
- return offset >= header_size && offset < s->file_size;
-}
-
-/**
- * Test if a table offset is valid
- */
-static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
-{
- uint64_t end_offset = offset + (s->header.table_size - 1) *
- s->header.cluster_size;
-
- /* Overflow check */
- if (end_offset <= offset) {
- return false;
- }
-
- return qed_check_cluster_offset(s, offset) &&
- qed_check_cluster_offset(s, end_offset);
-}
-
-static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
- uint64_t offset)
-{
- if (qed_offset_into_cluster(s, offset)) {
- return false;
- }
- return true;
-}
-
-static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
-{
- if (offset == 0) {
- return true;
- }
- return false;
-}
-
-static inline bool qed_offset_is_zero_cluster(uint64_t offset)
-{
- if (offset == 1) {
- return true;
- }
- return false;
-}
-
-#endif /* BLOCK_QED_H */
diff --git a/contrib/qemu/block/snapshot.c b/contrib/qemu/block/snapshot.c
deleted file mode 100644
index 6c6d9deea1f..00000000000
--- a/contrib/qemu/block/snapshot.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Block layer snapshot related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/snapshot.h"
-#include "block/block_int.h"
-
-int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
- const char *name)
-{
- QEMUSnapshotInfo *sn_tab, *sn;
- int nb_sns, i, ret;
-
- ret = -ENOENT;
- nb_sns = bdrv_snapshot_list(bs, &sn_tab);
- if (nb_sns < 0) {
- return ret;
- }
- for (i = 0; i < nb_sns; i++) {
- sn = &sn_tab[i];
- if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
- *sn_info = *sn;
- ret = 0;
- break;
- }
- }
- g_free(sn_tab);
- return ret;
-}
-
-int bdrv_can_snapshot(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
- return 0;
- }
-
- if (!drv->bdrv_snapshot_create) {
- if (bs->file != NULL) {
- return bdrv_can_snapshot(bs->file);
- }
- return 0;
- }
-
- return 1;
-}
-
-int bdrv_snapshot_create(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_create) {
- return drv->bdrv_snapshot_create(bs, sn_info);
- }
- if (bs->file) {
- return bdrv_snapshot_create(bs->file, sn_info);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_goto(BlockDriverState *bs,
- const char *snapshot_id)
-{
- BlockDriver *drv = bs->drv;
- int ret, open_ret;
-
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_goto) {
- return drv->bdrv_snapshot_goto(bs, snapshot_id);
- }
-
- if (bs->file) {
- drv->bdrv_close(bs);
- ret = bdrv_snapshot_goto(bs->file, snapshot_id);
- open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
- if (open_ret < 0) {
- bdrv_delete(bs->file);
- bs->drv = NULL;
- return open_ret;
- }
- return ret;
- }
-
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_delete) {
- return drv->bdrv_snapshot_delete(bs, snapshot_id);
- }
- if (bs->file) {
- return bdrv_snapshot_delete(bs->file, snapshot_id);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_list(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_list) {
- return drv->bdrv_snapshot_list(bs, psn_info);
- }
- if (bs->file) {
- return bdrv_snapshot_list(bs->file, psn_info);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_load_tmp(BlockDriverState *bs,
- const char *snapshot_name)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (!bs->read_only) {
- return -EINVAL;
- }
- if (drv->bdrv_snapshot_load_tmp) {
- return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
- }
- return -ENOTSUP;
-}
diff --git a/contrib/qemu/config-host.h b/contrib/qemu/config-host.h
deleted file mode 100644
index 6b5c8da1243..00000000000
--- a/contrib/qemu/config-host.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* Automatically generated by create_config - do not modify */
-#define CONFIG_QEMU_CONFDIR "/usr/local/etc/qemu"
-#define CONFIG_QEMU_DATADIR "/usr/local/share/qemu"
-#define CONFIG_QEMU_DOCDIR "/usr/local/share/doc/qemu"
-#define CONFIG_QEMU_LOCALSTATEDIR "/usr/local/var"
-#define CONFIG_QEMU_HELPERDIR "/usr/local/libexec"
-#define CONFIG_QEMU_LOCALEDIR "/usr/local/share/locale"
-#define HOST_X86_64 1
-#define CONFIG_QEMU_LDST_OPTIMIZATION 1
-#define CONFIG_POSIX 1
-#define CONFIG_LINUX 1
-#define CONFIG_SLIRP 1
-#define CONFIG_SMBD_COMMAND "/usr/sbin/smbd"
-#define CONFIG_AUDIO_DRIVERS \
- &oss_audio_driver,\
-
-#define CONFIG_OSS 1
-#define CONFIG_BDRV_RW_WHITELIST\
- NULL
-#define CONFIG_BDRV_RO_WHITELIST\
- NULL
-#define CONFIG_VNC 1
-#define CONFIG_VNC_TLS 1
-#define CONFIG_VNC_SASL 1
-#define CONFIG_VNC_WS 1
-#define CONFIG_FNMATCH 1
-#define CONFIG_UUID 1
-#define CONFIG_XFS 1
-#define QEMU_VERSION "1.5.50"
-#define QEMU_PKGVERSION ""
-#define CONFIG_CURSES 1
-#define CONFIG_UTIMENSAT 1
-#define CONFIG_PIPE2 1
-#define CONFIG_ACCEPT4 1
-#define CONFIG_SPLICE 1
-#define CONFIG_EVENTFD 1
-#define CONFIG_FALLOCATE 1
-#define CONFIG_FALLOCATE_PUNCH_HOLE 1
-#define CONFIG_SYNC_FILE_RANGE 1
-#define CONFIG_FIEMAP 1
-#define CONFIG_DUP3 1
-#define CONFIG_EPOLL 1
-#define CONFIG_EPOLL_CREATE1 1
-#define CONFIG_EPOLL_PWAIT 1
-#define CONFIG_SENDFILE 1
-#define CONFIG_INOTIFY 1
-#define CONFIG_INOTIFY1 1
-#define CONFIG_BYTESWAP_H 1
-#define CONFIG_CURL 1
-#define CONFIG_LINUX_AIO 1
-#define CONFIG_ATTR 1
-#define CONFIG_VHOST_SCSI 1
-#define CONFIG_IOVEC 1
-#define CONFIG_PREADV 1
-#define CONFIG_FDT 1
-#define CONFIG_SIGNALFD 1
-#define CONFIG_FDATASYNC 1
-#define CONFIG_MADVISE 1
-#define CONFIG_POSIX_MADVISE 1
-#define CONFIG_SIGEV_THREAD_ID 1
-#define CONFIG_UNAME_RELEASE ""
-#define CONFIG_QOM_CAST_DEBUG 1
-#define CONFIG_COROUTINE_BACKEND ucontext
-#define CONFIG_OPEN_BY_HANDLE 1
-#define CONFIG_LINUX_MAGIC_H 1
-#define CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE 1
-#define CONFIG_HAS_ENVIRON 1
-#define CONFIG_CPUID_H 1
-#define CONFIG_VIRTIO_BLK_DATA_PLANE $(CONFIG_VIRTIO)
-#define CONFIG_TRACE_NOP 1
-#define CONFIG_TRACE_FILE trace
-#define CONFIG_TRACE_DEFAULT 1
diff --git a/contrib/qemu/coroutine-ucontext.c b/contrib/qemu/coroutine-ucontext.c
deleted file mode 100644
index 4bf2cde279b..00000000000
--- a/contrib/qemu/coroutine-ucontext.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * ucontext coroutine initialization code
- *
- * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
- * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.0 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
-#ifdef _FORTIFY_SOURCE
-#undef _FORTIFY_SOURCE
-#endif
-#include <stdlib.h>
-#include <setjmp.h>
-#include <stdint.h>
-#include <pthread.h>
-#include <ucontext.h>
-#include "qemu-common.h"
-#include "block/coroutine_int.h"
-
-#ifdef CONFIG_VALGRIND_H
-#include <valgrind/valgrind.h>
-#endif
-
-typedef struct {
- Coroutine base;
- void *stack;
- sigjmp_buf env;
-
-#ifdef CONFIG_VALGRIND_H
- unsigned int valgrind_stack_id;
-#endif
-
-} CoroutineUContext;
-
-/**
- * Per-thread coroutine bookkeeping
- */
-typedef struct {
- /** Currently executing coroutine */
- Coroutine *current;
-
- /** The default coroutine */
- CoroutineUContext leader;
-} CoroutineThreadState;
-
-static pthread_key_t thread_state_key;
-
-/*
- * va_args to makecontext() must be type 'int', so passing
- * the pointer we need may require several int args. This
- * union is a quick hack to let us do that
- */
-union cc_arg {
- void *p;
- int i[2];
-};
-
-static CoroutineThreadState *coroutine_get_thread_state(void)
-{
- CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
- if (!s) {
- s = g_malloc0(sizeof(*s));
- s->current = &s->leader.base;
- pthread_setspecific(thread_state_key, s);
- }
- return s;
-}
-
-static void qemu_coroutine_thread_cleanup(void *opaque)
-{
- CoroutineThreadState *s = opaque;
-
- g_free(s);
-}
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
- int ret;
-
- ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
- if (ret != 0) {
- fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
- abort();
- }
-}
-
-static void coroutine_trampoline(int i0, int i1)
-{
- union cc_arg arg;
- CoroutineUContext *self;
- Coroutine *co;
-
- arg.i[0] = i0;
- arg.i[1] = i1;
- self = arg.p;
- co = &self->base;
-
- /* Initialize longjmp environment and switch back the caller */
- if (!sigsetjmp(self->env, 0)) {
- siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
- }
-
- while (true) {
- co->entry(co->entry_arg);
- qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
- }
-}
-
-Coroutine *qemu_coroutine_new(void)
-{
- const size_t stack_size = 1 << 20;
- CoroutineUContext *co;
- ucontext_t old_uc, uc;
- sigjmp_buf old_env;
- union cc_arg arg = {0};
-
- /* The ucontext functions preserve signal masks which incurs a
- * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
- * preserve signal masks but only works on the current stack.
- * Since we need a way to create and switch to a new stack, use
- * the ucontext functions for that but sigsetjmp()/siglongjmp() for
- * everything else.
- */
-
- if (getcontext(&uc) == -1) {
- abort();
- }
-
- co = g_malloc0(sizeof(*co));
- co->stack = g_malloc(stack_size);
- co->base.entry_arg = &old_env; /* stash away our jmp_buf */
-
- uc.uc_link = &old_uc;
- uc.uc_stack.ss_sp = co->stack;
- uc.uc_stack.ss_size = stack_size;
- uc.uc_stack.ss_flags = 0;
-
-#ifdef CONFIG_VALGRIND_H
- co->valgrind_stack_id =
- VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
-#endif
-
- arg.p = co;
-
- makecontext(&uc, (void (*)(void))coroutine_trampoline,
- 2, arg.i[0], arg.i[1]);
-
- /* swapcontext() in, siglongjmp() back out */
- if (!sigsetjmp(old_env, 0)) {
- swapcontext(&old_uc, &uc);
- }
- return &co->base;
-}
-
-#ifdef CONFIG_VALGRIND_H
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-/* Work around an unused variable in the valgrind.h macro... */
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-static inline void valgrind_stack_deregister(CoroutineUContext *co)
-{
- VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
-}
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-#pragma GCC diagnostic pop
-#endif
-#endif
-
-void qemu_coroutine_delete(Coroutine *co_)
-{
- CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
-
-#ifdef CONFIG_VALGRIND_H
- valgrind_stack_deregister(co);
-#endif
-
- g_free(co->stack);
- g_free(co);
-}
-
-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
- CoroutineAction action)
-{
- CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
- CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
- CoroutineThreadState *s = coroutine_get_thread_state();
- int ret;
-
- s->current = to_;
-
- ret = sigsetjmp(from->env, 0);
- if (ret == 0) {
- siglongjmp(to->env, action);
- }
- return ret;
-}
-
-Coroutine *qemu_coroutine_self(void)
-{
- CoroutineThreadState *s = coroutine_get_thread_state();
-
- return s->current;
-}
-
-bool qemu_in_coroutine(void)
-{
- CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
- return s && s->current->caller;
-}
diff --git a/contrib/qemu/include/block/aio.h b/contrib/qemu/include/block/aio.h
deleted file mode 100644
index 183679374fa..00000000000
--- a/contrib/qemu/include/block/aio.h
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * QEMU aio implementation
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_AIO_H
-#define QEMU_AIO_H
-
-#include "qemu-common.h"
-#include "qemu/queue.h"
-#include "qemu/event_notifier.h"
-
-typedef struct BlockDriverAIOCB BlockDriverAIOCB;
-typedef void BlockDriverCompletionFunc(void *opaque, int ret);
-
-typedef struct AIOCBInfo {
- void (*cancel)(BlockDriverAIOCB *acb);
- size_t aiocb_size;
-} AIOCBInfo;
-
-struct BlockDriverAIOCB {
- const AIOCBInfo *aiocb_info;
- BlockDriverState *bs;
- BlockDriverCompletionFunc *cb;
- void *opaque;
-};
-
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
-void qemu_aio_release(void *p);
-
-typedef struct AioHandler AioHandler;
-typedef void QEMUBHFunc(void *opaque);
-typedef void IOHandler(void *opaque);
-
-typedef struct AioContext {
- GSource source;
-
- /* The list of registered AIO handlers */
- QLIST_HEAD(, AioHandler) aio_handlers;
-
- /* This is a simple lock used to protect the aio_handlers list.
- * Specifically, it's used to ensure that no callbacks are removed while
- * we're walking and dispatching callbacks.
- */
- int walking_handlers;
-
- /* Anchor of the list of Bottom Halves belonging to the context */
- struct QEMUBH *first_bh;
-
- /* A simple lock used to protect the first_bh list, and ensure that
- * no callbacks are removed while we're walking and dispatching callbacks.
- */
- int walking_bh;
-
- /* Used for aio_notify. */
- EventNotifier notifier;
-
- /* GPollFDs for aio_poll() */
- GArray *pollfds;
-
- /* Thread pool for performing work and receiving completion callbacks */
- struct ThreadPool *thread_pool;
-} AioContext;
-
-/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
-typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
-
-/**
- * aio_context_new: Allocate a new AioContext.
- *
- * AioContext provide a mini event-loop that can be waited on synchronously.
- * They also provide bottom halves, a service to execute a piece of code
- * as soon as possible.
- */
-AioContext *aio_context_new(void);
-
-/**
- * aio_context_ref:
- * @ctx: The AioContext to operate on.
- *
- * Add a reference to an AioContext.
- */
-void aio_context_ref(AioContext *ctx);
-
-/**
- * aio_context_unref:
- * @ctx: The AioContext to operate on.
- *
- * Drop a reference to an AioContext.
- */
-void aio_context_unref(AioContext *ctx);
-
-/**
- * aio_bh_new: Allocate a new bottom half structure.
- *
- * Bottom halves are lightweight callbacks whose invocation is guaranteed
- * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
- * is opaque and must be allocated prior to its use.
- */
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
-
-/**
- * aio_notify: Force processing of pending events.
- *
- * Similar to signaling a condition variable, aio_notify forces
- * aio_wait to exit, so that the next call will re-examine pending events.
- * The caller of aio_notify will usually call aio_wait again very soon,
- * or go through another iteration of the GLib main loop. Hence, aio_notify
- * also has the side effect of recalculating the sets of file descriptors
- * that the main loop waits for.
- *
- * Calling aio_notify is rarely necessary, because for example scheduling
- * a bottom half calls it already.
- */
-void aio_notify(AioContext *ctx);
-
-/**
- * aio_bh_poll: Poll bottom halves for an AioContext.
- *
- * These are internal functions used by the QEMU main loop.
- */
-int aio_bh_poll(AioContext *ctx);
-
-/**
- * qemu_bh_schedule: Schedule a bottom half.
- *
- * Scheduling a bottom half interrupts the main loop and causes the
- * execution of the callback that was passed to qemu_bh_new.
- *
- * Bottom halves that are scheduled from a bottom half handler are instantly
- * invoked. This can create an infinite loop if a bottom half handler
- * schedules itself.
- *
- * @bh: The bottom half to be scheduled.
- */
-void qemu_bh_schedule(QEMUBH *bh);
-
-/**
- * qemu_bh_cancel: Cancel execution of a bottom half.
- *
- * Canceling execution of a bottom half undoes the effect of calls to
- * qemu_bh_schedule without freeing its resources yet. While cancellation
- * itself is also wait-free and thread-safe, it can of course race with the
- * loop that executes bottom halves unless you are holding the iothread
- * mutex. This makes it mostly useless if you are not holding the mutex.
- *
- * @bh: The bottom half to be canceled.
- */
-void qemu_bh_cancel(QEMUBH *bh);
-
-/**
- *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
- *
- * Deleting a bottom half frees the memory that was allocated for it by
- * qemu_bh_new. It also implies canceling the bottom half if it was
- * scheduled.
- *
- * @bh: The bottom half to be deleted.
- */
-void qemu_bh_delete(QEMUBH *bh);
-
-/* Return whether there are any pending callbacks from the GSource
- * attached to the AioContext.
- *
- * This is used internally in the implementation of the GSource.
- */
-bool aio_pending(AioContext *ctx);
-
-/* Progress in completing AIO work to occur. This can issue new pending
- * aio as a result of executing I/O completion or bh callbacks.
- *
- * If there is no pending AIO operation or completion (bottom half),
- * return false. If there are pending AIO operations of bottom halves,
- * return true.
- *
- * If there are no pending bottom halves, but there are pending AIO
- * operations, it may not be possible to make any progress without
- * blocking. If @blocking is true, this function will wait until one
- * or more AIO events have completed, to ensure something has moved
- * before returning.
- */
-bool aio_poll(AioContext *ctx, bool blocking);
-
-#ifdef CONFIG_POSIX
-/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
-typedef int (AioFlushHandler)(void *opaque);
-
-/* Register a file descriptor and associated callbacks. Behaves very similarly
- * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
- * be invoked when using qemu_aio_wait().
- *
- * Code that invokes AIO completion functions should rely on this function
- * instead of qemu_set_fd_handler[2].
- */
-void aio_set_fd_handler(AioContext *ctx,
- int fd,
- IOHandler *io_read,
- IOHandler *io_write,
- AioFlushHandler *io_flush,
- void *opaque);
-#endif
-
-/* Register an event notifier and associated callbacks. Behaves very similarly
- * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
- * will be invoked when using qemu_aio_wait().
- *
- * Code that invokes AIO completion functions should rely on this function
- * instead of event_notifier_set_handler.
- */
-void aio_set_event_notifier(AioContext *ctx,
- EventNotifier *notifier,
- EventNotifierHandler *io_read,
- AioFlushEventNotifierHandler *io_flush);
-
-/* Return a GSource that lets the main loop poll the file descriptors attached
- * to this AioContext.
- */
-GSource *aio_get_g_source(AioContext *ctx);
-
-/* Return the ThreadPool bound to this AioContext */
-struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
-
-/* Functions to operate on the main QEMU AioContext. */
-
-bool qemu_aio_wait(void);
-void qemu_aio_set_event_notifier(EventNotifier *notifier,
- EventNotifierHandler *io_read,
- AioFlushEventNotifierHandler *io_flush);
-
-#ifdef CONFIG_POSIX
-void qemu_aio_set_fd_handler(int fd,
- IOHandler *io_read,
- IOHandler *io_write,
- AioFlushHandler *io_flush,
- void *opaque);
-#endif
-
-#endif
diff --git a/contrib/qemu/include/block/block.h b/contrib/qemu/include/block/block.h
deleted file mode 100644
index b6b9014a9ce..00000000000
--- a/contrib/qemu/include/block/block.h
+++ /dev/null
@@ -1,443 +0,0 @@
-#ifndef BLOCK_H
-#define BLOCK_H
-
-#include "block/aio.h"
-#include "qemu-common.h"
-#include "qemu/option.h"
-#include "block/coroutine.h"
-#include "qapi/qmp/qobject.h"
-#include "qapi-types.h"
-
-/* block.c */
-typedef struct BlockDriver BlockDriver;
-typedef struct BlockJob BlockJob;
-
-typedef struct BlockDriverInfo {
- /* in bytes, 0 if irrelevant */
- int cluster_size;
- /* offset at which the VM state can be saved (0 if not possible) */
- int64_t vm_state_offset;
- bool is_dirty;
-} BlockDriverInfo;
-
-typedef struct BlockFragInfo {
- uint64_t allocated_clusters;
- uint64_t total_clusters;
- uint64_t fragmented_clusters;
- uint64_t compressed_clusters;
-} BlockFragInfo;
-
-/* Callbacks for block device models */
-typedef struct BlockDevOps {
- /*
- * Runs when virtual media changed (monitor commands eject, change)
- * Argument load is true on load and false on eject.
- * Beware: doesn't run when a host device's physical media
- * changes. Sure would be useful if it did.
- * Device models with removable media must implement this callback.
- */
- void (*change_media_cb)(void *opaque, bool load);
- /*
- * Runs when an eject request is issued from the monitor, the tray
- * is closed, and the medium is locked.
- * Device models that do not implement is_medium_locked will not need
- * this callback. Device models that can lock the medium or tray might
- * want to implement the callback and unlock the tray when "force" is
- * true, even if they do not support eject requests.
- */
- void (*eject_request_cb)(void *opaque, bool force);
- /*
- * Is the virtual tray open?
- * Device models implement this only when the device has a tray.
- */
- bool (*is_tray_open)(void *opaque);
- /*
- * Is the virtual medium locked into the device?
- * Device models implement this only when device has such a lock.
- */
- bool (*is_medium_locked)(void *opaque);
- /*
- * Runs when the size changed (e.g. monitor command block_resize)
- */
- void (*resize_cb)(void *opaque);
-} BlockDevOps;
-
-#define BDRV_O_RDWR 0x0002
-#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
-#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
-#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
-#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
-#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
-#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
-#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
-#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
-#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
-#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
-#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
-
-#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
-
-#define BDRV_SECTOR_BITS 9
-#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
-#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
-
-typedef enum {
- BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
-} BlockErrorAction;
-
-typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
-
-typedef struct BDRVReopenState {
- BlockDriverState *bs;
- int flags;
- void *opaque;
-} BDRVReopenState;
-
-
-void bdrv_iostatus_enable(BlockDriverState *bs);
-void bdrv_iostatus_reset(BlockDriverState *bs);
-void bdrv_iostatus_disable(BlockDriverState *bs);
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
-void bdrv_info_print(Monitor *mon, const QObject *data);
-void bdrv_info(Monitor *mon, QObject **ret_data);
-void bdrv_stats_print(Monitor *mon, const QObject *data);
-void bdrv_info_stats(Monitor *mon, QObject **ret_data);
-
-/* disk I/O throttling */
-void bdrv_io_limits_enable(BlockDriverState *bs);
-void bdrv_io_limits_disable(BlockDriverState *bs);
-bool bdrv_io_limits_enabled(BlockDriverState *bs);
-
-void bdrv_init(void);
-void bdrv_init_with_whitelist(void);
-BlockDriver *bdrv_find_protocol(const char *filename,
- bool allow_protocol_prefix);
-BlockDriver *bdrv_find_format(const char *format_name);
-BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
- bool readonly);
-int bdrv_create(BlockDriver *drv, const char* filename,
- QEMUOptionParameter *options);
-int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
-BlockDriverState *bdrv_new(const char *device_name);
-void bdrv_make_anon(BlockDriverState *bs);
-void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
-void bdrv_delete(BlockDriverState *bs);
-int bdrv_parse_cache_flags(const char *mode, int *flags);
-int bdrv_parse_discard_flags(const char *mode, int *flags);
-int bdrv_file_open(BlockDriverState **pbs, const char *filename,
- QDict *options, int flags);
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *options);
-int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
- int flags, BlockDriver *drv);
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, int flags);
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
-int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
-void bdrv_reopen_commit(BDRVReopenState *reopen_state);
-void bdrv_reopen_abort(BDRVReopenState *reopen_state);
-void bdrv_close(BlockDriverState *bs);
-void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
-int bdrv_attach_dev(BlockDriverState *bs, void *dev);
-void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
-void bdrv_detach_dev(BlockDriverState *bs, void *dev);
-void *bdrv_get_attached_dev(BlockDriverState *bs);
-void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
- void *opaque);
-void bdrv_dev_eject_request(BlockDriverState *bs, bool force);
-bool bdrv_dev_has_removable_media(BlockDriverState *bs);
-bool bdrv_dev_is_tray_open(BlockDriverState *bs);
-bool bdrv_dev_is_medium_locked(BlockDriverState *bs);
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
-int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
-int bdrv_pread(BlockDriverState *bs, int64_t offset,
- void *buf, int count);
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int count);
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
- const void *buf, int count);
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
-/*
- * Efficiently zero a region of the disk image. Note that this is a regular
- * I/O request like read or write and should have a reasonable size. This
- * function is not suitable for zeroing the entire image in a single request
- * because it may allocate memory for the entire region.
- */
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors);
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum);
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- int64_t sector_num,
- int nb_sectors, int *pnum);
-BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
- const char *backing_file);
-int bdrv_get_backing_file_depth(BlockDriverState *bs);
-int bdrv_truncate(BlockDriverState *bs, int64_t offset);
-int64_t bdrv_getlength(BlockDriverState *bs);
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-int bdrv_commit(BlockDriverState *bs);
-int bdrv_commit_all(void);
-int bdrv_change_backing_file(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt);
-void bdrv_register(BlockDriver *bdrv);
-int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
- BlockDriverState *base);
-BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
- BlockDriverState *bs);
-BlockDriverState *bdrv_find_base(BlockDriverState *bs);
-
-
-typedef struct BdrvCheckResult {
- int corruptions;
- int leaks;
- int check_errors;
- int corruptions_fixed;
- int leaks_fixed;
- int64_t image_end_offset;
- BlockFragInfo bfi;
-} BdrvCheckResult;
-
-typedef enum {
- BDRV_FIX_LEAKS = 1,
- BDRV_FIX_ERRORS = 2,
-} BdrvCheckMode;
-
-int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
-
-/* async block I/O */
-typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
- int sector_num);
-BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
-BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-void bdrv_aio_cancel(BlockDriverAIOCB *acb);
-
-typedef struct BlockRequest {
- /* Fields to be filled by multiwrite caller */
- int64_t sector;
- int nb_sectors;
- QEMUIOVector *qiov;
- BlockDriverCompletionFunc *cb;
- void *opaque;
-
- /* Filled by multiwrite implementation */
- int error;
-} BlockRequest;
-
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs);
-
-/* sg packet commands */
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
-BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockDriverCompletionFunc *cb, void *opaque);
-
-/* Invalidate any cached metadata used by image formats */
-void bdrv_invalidate_cache(BlockDriverState *bs);
-void bdrv_invalidate_cache_all(void);
-
-void bdrv_clear_incoming_migration_all(void);
-
-/* Ensure contents are flushed to disk. */
-int bdrv_flush(BlockDriverState *bs);
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
-int bdrv_flush_all(void);
-void bdrv_close_all(void);
-void bdrv_drain_all(void);
-
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
-int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
-int bdrv_has_zero_init_1(BlockDriverState *bs);
-int bdrv_has_zero_init(BlockDriverState *bs);
-int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum);
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- int64_t sector_num, int nb_sectors, int *pnum);
-
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error);
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
- bool is_read, int error);
-int bdrv_is_read_only(BlockDriverState *bs);
-int bdrv_is_sg(BlockDriverState *bs);
-int bdrv_enable_write_cache(BlockDriverState *bs);
-void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
-int bdrv_is_inserted(BlockDriverState *bs);
-int bdrv_media_changed(BlockDriverState *bs);
-void bdrv_lock_medium(BlockDriverState *bs, bool locked);
-void bdrv_eject(BlockDriverState *bs, bool eject_flag);
-const char *bdrv_get_format_name(BlockDriverState *bs);
-BlockDriverState *bdrv_find(const char *name);
-BlockDriverState *bdrv_next(BlockDriverState *bs);
-void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
- void *opaque);
-int bdrv_is_encrypted(BlockDriverState *bs);
-int bdrv_key_required(BlockDriverState *bs);
-int bdrv_set_key(BlockDriverState *bs, const char *key);
-int bdrv_query_missing_keys(void);
-void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
- void *opaque);
-const char *bdrv_get_device_name(BlockDriverState *bs);
-int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors);
-
-const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
-void bdrv_get_backing_filename(BlockDriverState *bs,
- char *filename, int filename_size);
-void bdrv_get_full_backing_filename(BlockDriverState *bs,
- char *dest, size_t sz);
-int bdrv_is_snapshot(BlockDriverState *bs);
-
-int path_is_absolute(const char *path);
-void path_combine(char *dest, int dest_size,
- const char *base_path,
- const char *filename);
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size);
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
-
-void bdrv_img_create(const char *filename, const char *fmt,
- const char *base_filename, const char *base_fmt,
- char *options, uint64_t img_size, int flags,
- Error **errp, bool quiet);
-
-void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
-void *qemu_blockalign(BlockDriverState *bs, size_t size);
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-
-struct HBitmapIter;
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
-int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
-int64_t bdrv_get_dirty_count(BlockDriverState *bs);
-
-void bdrv_enable_copy_on_read(BlockDriverState *bs);
-void bdrv_disable_copy_on_read(BlockDriverState *bs);
-
-void bdrv_set_in_use(BlockDriverState *bs, int in_use);
-int bdrv_in_use(BlockDriverState *bs);
-
-#ifdef CONFIG_LINUX_AIO
-int raw_get_aio_fd(BlockDriverState *bs);
-#else
-static inline int raw_get_aio_fd(BlockDriverState *bs)
-{
- return -ENOTSUP;
-}
-#endif
-
-enum BlockAcctType {
- BDRV_ACCT_READ,
- BDRV_ACCT_WRITE,
- BDRV_ACCT_FLUSH,
- BDRV_MAX_IOTYPE,
-};
-
-typedef struct BlockAcctCookie {
- int64_t bytes;
- int64_t start_time_ns;
- enum BlockAcctType type;
-} BlockAcctCookie;
-
-void bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
- int64_t bytes, enum BlockAcctType type);
-void bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie);
-
-typedef enum {
- BLKDBG_L1_UPDATE,
-
- BLKDBG_L1_GROW_ALLOC_TABLE,
- BLKDBG_L1_GROW_WRITE_TABLE,
- BLKDBG_L1_GROW_ACTIVATE_TABLE,
-
- BLKDBG_L2_LOAD,
- BLKDBG_L2_UPDATE,
- BLKDBG_L2_UPDATE_COMPRESSED,
- BLKDBG_L2_ALLOC_COW_READ,
- BLKDBG_L2_ALLOC_WRITE,
-
- BLKDBG_READ_AIO,
- BLKDBG_READ_BACKING_AIO,
- BLKDBG_READ_COMPRESSED,
-
- BLKDBG_WRITE_AIO,
- BLKDBG_WRITE_COMPRESSED,
-
- BLKDBG_VMSTATE_LOAD,
- BLKDBG_VMSTATE_SAVE,
-
- BLKDBG_COW_READ,
- BLKDBG_COW_WRITE,
-
- BLKDBG_REFTABLE_LOAD,
- BLKDBG_REFTABLE_GROW,
-
- BLKDBG_REFBLOCK_LOAD,
- BLKDBG_REFBLOCK_UPDATE,
- BLKDBG_REFBLOCK_UPDATE_PART,
- BLKDBG_REFBLOCK_ALLOC,
- BLKDBG_REFBLOCK_ALLOC_HOOKUP,
- BLKDBG_REFBLOCK_ALLOC_WRITE,
- BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS,
- BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE,
- BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE,
-
- BLKDBG_CLUSTER_ALLOC,
- BLKDBG_CLUSTER_ALLOC_BYTES,
- BLKDBG_CLUSTER_FREE,
-
- BLKDBG_FLUSH_TO_OS,
- BLKDBG_FLUSH_TO_DISK,
-
- BLKDBG_EVENT_MAX,
-} BlkDebugEvent;
-
-#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
-void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
-
-int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
- const char *tag);
-int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
-bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-
-#endif
diff --git a/contrib/qemu/include/block/block_int.h b/contrib/qemu/include/block/block_int.h
deleted file mode 100644
index c6ac871e210..00000000000
--- a/contrib/qemu/include/block/block_int.h
+++ /dev/null
@@ -1,421 +0,0 @@
-/*
- * QEMU System Emulator block driver
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef BLOCK_INT_H
-#define BLOCK_INT_H
-
-#include "block/block.h"
-#include "qemu/option.h"
-#include "qemu/queue.h"
-#include "block/coroutine.h"
-#include "qemu/timer.h"
-#include "qapi-types.h"
-#include "qapi/qmp/qerror.h"
-#include "monitor/monitor.h"
-#include "qemu/hbitmap.h"
-#include "block/snapshot.h"
-
-#define BLOCK_FLAG_ENCRYPT 1
-#define BLOCK_FLAG_COMPAT6 4
-#define BLOCK_FLAG_LAZY_REFCOUNTS 8
-
-#define BLOCK_IO_LIMIT_READ 0
-#define BLOCK_IO_LIMIT_WRITE 1
-#define BLOCK_IO_LIMIT_TOTAL 2
-
-#define BLOCK_IO_SLICE_TIME 100000000
-#define NANOSECONDS_PER_SECOND 1000000000.0
-
-#define BLOCK_OPT_SIZE "size"
-#define BLOCK_OPT_ENCRYPT "encryption"
-#define BLOCK_OPT_COMPAT6 "compat6"
-#define BLOCK_OPT_BACKING_FILE "backing_file"
-#define BLOCK_OPT_BACKING_FMT "backing_fmt"
-#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
-#define BLOCK_OPT_TABLE_SIZE "table_size"
-#define BLOCK_OPT_PREALLOC "preallocation"
-#define BLOCK_OPT_SUBFMT "subformat"
-#define BLOCK_OPT_COMPAT_LEVEL "compat"
-#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
-
-typedef struct BdrvTrackedRequest {
- BlockDriverState *bs;
- int64_t sector_num;
- int nb_sectors;
- bool is_write;
- QLIST_ENTRY(BdrvTrackedRequest) list;
- Coroutine *co; /* owner, used for deadlock detection */
- CoQueue wait_queue; /* coroutines blocked on this request */
-} BdrvTrackedRequest;
-
-
-typedef struct BlockIOLimit {
- int64_t bps[3];
- int64_t iops[3];
-} BlockIOLimit;
-
-typedef struct BlockIOBaseValue {
- uint64_t bytes[2];
- uint64_t ios[2];
-} BlockIOBaseValue;
-
-struct BlockDriver {
- const char *format_name;
- int instance_size;
- int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
- int (*bdrv_probe_device)(const char *filename);
-
- /* Any driver implementing this callback is expected to be able to handle
- * NULL file names in its .bdrv_open() implementation */
- void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
-
- /* For handling image reopen for split or non-split files */
- int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
- void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
-
- int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags);
- int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags);
- int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
- int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
- void (*bdrv_close)(BlockDriverState *bs);
- void (*bdrv_rebind)(BlockDriverState *bs);
- int (*bdrv_create)(const char *filename, QEMUOptionParameter *options);
- int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
- int (*bdrv_make_empty)(BlockDriverState *bs);
- /* aio */
- BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
- BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
- BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
- BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-
- int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
- int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
- /*
- * Efficiently zero a region of the disk image. Typically an image format
- * would use a compact metadata representation to implement this. This
- * function pointer may be NULL and .bdrv_co_writev() will be called
- * instead.
- */
- int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
- int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
- int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum);
-
- /*
- * Invalidate any cached meta-data.
- */
- void (*bdrv_invalidate_cache)(BlockDriverState *bs);
-
- /*
- * Flushes all data that was already written to the OS all the way down to
- * the disk (for example raw-posix calls fsync()).
- */
- int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
-
- /*
- * Flushes all internal caches to the OS. The data may still sit in a
- * writeback cache of the host OS, but it will survive a crash of the qemu
- * process.
- */
- int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
-
- const char *protocol_name;
- int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
- int64_t (*bdrv_getlength)(BlockDriverState *bs);
- int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
- int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
-
- int (*bdrv_snapshot_create)(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
- int (*bdrv_snapshot_goto)(BlockDriverState *bs,
- const char *snapshot_id);
- int (*bdrv_snapshot_delete)(BlockDriverState *bs, const char *snapshot_id);
- int (*bdrv_snapshot_list)(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info);
- int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
- const char *snapshot_name);
- int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
-
- int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t pos);
- int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
-
- int (*bdrv_change_backing_file)(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt);
-
- /* removable device specific */
- int (*bdrv_is_inserted)(BlockDriverState *bs);
- int (*bdrv_media_changed)(BlockDriverState *bs);
- void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
- void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
-
- /* to control generic scsi devices */
- int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
- BlockDriverAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockDriverCompletionFunc *cb, void *opaque);
-
- /* List of options for creating images, terminated by name == NULL */
- QEMUOptionParameter *create_options;
-
-
- /*
- * Returns 0 for completed check, -errno for internal errors.
- * The check results are stored in result.
- */
- int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
- BdrvCheckMode fix);
-
- void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
-
- /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
- int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
- const char *tag);
- int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
- bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
-
- /*
- * Returns 1 if newly created images are guaranteed to contain only
- * zeros, 0 otherwise.
- */
- int (*bdrv_has_zero_init)(BlockDriverState *bs);
-
- QLIST_ENTRY(BlockDriver) list;
-};
-
-/*
- * Note: the function bdrv_append() copies and swaps contents of
- * BlockDriverStates, so if you add new fields to this struct, please
- * inspect bdrv_append() to determine if the new fields need to be
- * copied as well.
- */
-struct BlockDriverState {
- int64_t total_sectors; /* if we are reading a disk image, give its
- size in sectors */
- int read_only; /* if true, the media is read only */
- int open_flags; /* flags used to open the file, re-used for re-open */
- int encrypted; /* if true, the media is encrypted */
- int valid_key; /* if true, a valid encryption key has been set */
- int sg; /* if true, the device is a /dev/sg* */
- int copy_on_read; /* if true, copy read backing sectors into image
- note this is a reference count */
-
- BlockDriver *drv; /* NULL means no media */
- void *opaque;
-
- void *dev; /* attached device model, if any */
- /* TODO change to DeviceState when all users are qdevified */
- const BlockDevOps *dev_ops;
- void *dev_opaque;
-
- char filename[1024];
- char backing_file[1024]; /* if non zero, the image is a diff of
- this file image */
- char backing_format[16]; /* if non-zero and backing_file exists */
- int is_temporary;
-
- BlockDriverState *backing_hd;
- BlockDriverState *file;
-
- NotifierList close_notifiers;
-
- /* Callback before write request is processed */
- NotifierWithReturnList before_write_notifiers;
-
- /* number of in-flight copy-on-read requests */
- unsigned int copy_on_read_in_flight;
-
- /* the time for latest disk I/O */
- int64_t slice_start;
- int64_t slice_end;
- BlockIOLimit io_limits;
- BlockIOBaseValue slice_submitted;
- CoQueue throttled_reqs;
- QEMUTimer *block_timer;
- bool io_limits_enabled;
-
- /* I/O stats (display with "info blockstats"). */
- uint64_t nr_bytes[BDRV_MAX_IOTYPE];
- uint64_t nr_ops[BDRV_MAX_IOTYPE];
- uint64_t total_time_ns[BDRV_MAX_IOTYPE];
- uint64_t wr_highest_sector;
-
- /* Whether the disk can expand beyond total_sectors */
- int growable;
-
- /* the memory alignment required for the buffers handled by this driver */
- int buffer_alignment;
-
- /* do we need to tell the quest if we have a volatile write cache? */
- int enable_write_cache;
-
- /* NOTE: the following infos are only hints for real hardware
- drivers. They are not used by the block driver */
- BlockdevOnError on_read_error, on_write_error;
- bool iostatus_enabled;
- BlockDeviceIoStatus iostatus;
- char device_name[32];
- HBitmap *dirty_bitmap;
- int in_use; /* users other than guest access, eg. block migration */
- QTAILQ_ENTRY(BlockDriverState) list;
-
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
-
- /* long-running background operation */
- BlockJob *job;
-
- QDict *options;
-};
-
-int get_tmp_filename(char *filename, int size);
-
-void bdrv_set_io_limits(BlockDriverState *bs,
- BlockIOLimit *io_limits);
-
-/**
- * bdrv_add_before_write_notifier:
- *
- * Register a callback that is invoked before write requests are processed but
- * after any throttling or waiting for overlapping requests.
- */
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
- NotifierWithReturn *notifier);
-
-/**
- * bdrv_get_aio_context:
- *
- * Returns: the currently bound #AioContext
- */
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
-
-#ifdef _WIN32
-int is_windows_drive(const char *filename);
-#endif
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
- enum MonitorEvent ev,
- BlockErrorAction action, bool is_read);
-
-/**
- * stream_start:
- * @bs: Block device to operate on.
- * @base: Block device that will become the new base, or %NULL to
- * flatten the whole backing file chain onto @bs.
- * @base_id: The file name that will be written to @bs as the new
- * backing file if the job completes. Ignored if @base is %NULL.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Start a streaming operation on @bs. Clusters that are unallocated
- * in @bs, but allocated in any image between @base and @bs (both
- * exclusive) will be written to @bs. At the end of a successful
- * streaming job, the backing file of @bs will be changed to
- * @base_id in the written image and to @base in the live BlockDriverState.
- */
-void stream_start(BlockDriverState *bs, BlockDriverState *base,
- const char *base_id, int64_t speed, BlockdevOnError on_error,
- BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/**
- * commit_start:
- * @bs: Top Block device
- * @base: Block device that will be written into, and become the new top
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- */
-void commit_start(BlockDriverState *bs, BlockDriverState *base,
- BlockDriverState *top, int64_t speed,
- BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/*
- * mirror_start:
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @granularity: The chosen granularity for the dirty bitmap.
- * @buf_size: The amount of data that can be in flight at one time.
- * @mode: Whether to collapse all images in the chain to the target.
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Start a mirroring operation on @bs. Clusters that are allocated
- * in @bs will be written to @bs until the job is cancelled or
- * manually completed. At the end of a successful mirroring job,
- * @bs will be switched to read from @target.
- */
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, int64_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/*
- * backup_start:
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- *
- * Start a backup operation on @bs. Clusters in @bs are written to @target
- * until the job is cancelled or manually completed.
- */
-void backup_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- BlockDriverCompletionFunc *cb, void *opaque,
- Error **errp);
-
-#endif /* BLOCK_INT_H */
diff --git a/contrib/qemu/include/block/blockjob.h b/contrib/qemu/include/block/blockjob.h
deleted file mode 100644
index c290d07bba0..00000000000
--- a/contrib/qemu/include/block/blockjob.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Declarations for long-running block device operations
- *
- * Copyright (c) 2011 IBM Corp.
- * Copyright (c) 2012 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef BLOCKJOB_H
-#define BLOCKJOB_H 1
-
-#include "block/block.h"
-
-/**
- * BlockJobType:
- *
- * A class type for block job objects.
- */
-typedef struct BlockJobType {
- /** Derived BlockJob struct size */
- size_t instance_size;
-
- /** String describing the operation, part of query-block-jobs QMP API */
- const char *job_type;
-
- /** Optional callback for job types that support setting a speed limit */
- void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
-
- /** Optional callback for job types that need to forward I/O status reset */
- void (*iostatus_reset)(BlockJob *job);
-
- /**
- * Optional callback for job types whose completion must be triggered
- * manually.
- */
- void (*complete)(BlockJob *job, Error **errp);
-} BlockJobType;
-
-/**
- * BlockJob:
- *
- * Long-running operation on a BlockDriverState.
- */
-struct BlockJob {
- /** The job type, including the job vtable. */
- const BlockJobType *job_type;
-
- /** The block device on which the job is operating. */
- BlockDriverState *bs;
-
- /**
- * The coroutine that executes the job. If not NULL, it is
- * reentered when busy is false and the job is cancelled.
- */
- Coroutine *co;
-
- /**
- * Set to true if the job should cancel itself. The flag must
- * always be tested just before toggling the busy flag from false
- * to true. After a job has been cancelled, it should only yield
- * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
- */
- bool cancelled;
-
- /**
- * Set to true if the job is either paused, or will pause itself
- * as soon as possible (if busy == true).
- */
- bool paused;
-
- /**
- * Set to false by the job while it is in a quiescent state, where
- * no I/O is pending and the job has yielded on any condition
- * that is not detected by #qemu_aio_wait, such as a timer.
- */
- bool busy;
-
- /** Status that is published by the query-block-jobs QMP API */
- BlockDeviceIoStatus iostatus;
-
- /** Offset that is published by the query-block-jobs QMP API */
- int64_t offset;
-
- /** Length that is published by the query-block-jobs QMP API */
- int64_t len;
-
- /** Speed that was set with @block_job_set_speed. */
- int64_t speed;
-
- /** The completion function that will be called when the job completes. */
- BlockDriverCompletionFunc *cb;
-
- /** The opaque value that is passed to the completion function. */
- void *opaque;
-};
-
-/**
- * block_job_create:
- * @job_type: The class object for the newly-created job.
- * @bs: The block
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Create a new long-running block device job and return it. The job
- * will call @cb asynchronously when the job completes. Note that
- * @bs may have been closed at the time the @cb it is called. If
- * this is the case, the job may be reported as either cancelled or
- * completed.
- *
- * This function is not part of the public job interface; it should be
- * called from a wrapper that is specific to the job type.
- */
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
- int64_t speed, BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @clock: The clock to sleep on.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds. Canceling the job will interrupt the wait immediately.
- */
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
-
-/**
- * block_job_completed:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_completed(BlockJob *job, int ret);
-
-/**
- * block_job_set_speed:
- * @job: The job to set the speed for.
- * @speed: The new value
- * @errp: Error object.
- *
- * Set a rate-limiting parameter for the job; the actual meaning may
- * vary depending on the job type.
- */
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the specified job.
- */
-void block_job_cancel(BlockJob *job);
-
-/**
- * block_job_complete:
- * @job: The job to be completed.
- * @errp: Error object.
- *
- * Asynchronously complete the specified job.
- */
-void block_job_complete(BlockJob *job, Error **errp);
-
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
-/**
- * block_job_query:
- * @job: The job to get information about.
- *
- * Return information about a job.
- */
-BlockJobInfo *block_job_query(BlockJob *job);
-
-/**
- * block_job_pause:
- * @job: The job to be paused.
- *
- * Asynchronously pause the specified job.
- */
-void block_job_pause(BlockJob *job);
-
-/**
- * block_job_resume:
- * @job: The job to be resumed.
- *
- * Resume the specified job.
- */
-void block_job_resume(BlockJob *job);
-
-/**
- * qobject_from_block_job:
- * @job: The job whose information is requested.
- *
- * Return a QDict corresponding to @job's query-block-jobs entry.
- */
-QObject *qobject_from_block_job(BlockJob *job);
-
-/**
- * block_job_ready:
- * @job: The job which is now ready to complete.
- *
- * Send a BLOCK_JOB_READY event for the specified job.
- */
-void block_job_ready(BlockJob *job);
-
-/**
- * block_job_is_paused:
- * @job: The job being queried.
- *
- * Returns whether the job is currently paused, or will pause
- * as soon as it reaches a sleeping point.
- */
-bool block_job_is_paused(BlockJob *job);
-
-/**
- * block_job_cancel_sync:
- * @job: The job to be canceled.
- *
- * Synchronously cancel the job. The completion callback is called
- * before the function returns. The job may actually complete
- * instead of canceling itself; the circumstances under which this
- * happens depend on the kind of job that is active.
- *
- * Returns the return value from the job if the job actually completed
- * during the call, or -ECANCELED if it was canceled.
- */
-int block_job_cancel_sync(BlockJob *job);
-
-/**
- * block_job_iostatus_reset:
- * @job: The job whose I/O status should be reset.
- *
- * Reset I/O status on @job and on BlockDriverState objects it uses,
- * other than job->bs.
- */
-void block_job_iostatus_reset(BlockJob *job);
-
-/**
- * block_job_error_action:
- * @job: The job to signal an error for.
- * @bs: The block device on which to set an I/O error.
- * @on_err: The error action setting.
- * @is_read: Whether the operation was a read.
- * @error: The error that was reported.
- *
- * Report an I/O error for a block job and possibly stop the VM. Return the
- * action that was selected based on @on_err and @error.
- */
-BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
- BlockdevOnError on_err,
- int is_read, int error);
-#endif
diff --git a/contrib/qemu/include/block/coroutine.h b/contrib/qemu/include/block/coroutine.h
deleted file mode 100644
index 377805a3b08..00000000000
--- a/contrib/qemu/include/block/coroutine.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * QEMU coroutine implementation
- *
- * Copyright IBM, Corp. 2011
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Kevin Wolf <kwolf@redhat.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef QEMU_COROUTINE_H
-#define QEMU_COROUTINE_H
-
-#include <stdbool.h>
-#include "qemu/queue.h"
-#include "qemu/timer.h"
-
-/**
- * Coroutines are a mechanism for stack switching and can be used for
- * cooperative userspace threading. These functions provide a simple but
- * useful flavor of coroutines that is suitable for writing sequential code,
- * rather than callbacks, for operations that need to give up control while
- * waiting for events to complete.
- *
- * These functions are re-entrant and may be used outside the global mutex.
- */
-
-/**
- * Mark a function that executes in coroutine context
- *
- * Functions that execute in coroutine context cannot be called directly from
- * normal functions. In the future it would be nice to enable compiler or
- * static checker support for catching such errors. This annotation might make
- * it possible and in the meantime it serves as documentation.
- *
- * For example:
- *
- * static void coroutine_fn foo(void) {
- * ....
- * }
- */
-#define coroutine_fn
-
-typedef struct Coroutine Coroutine;
-
-/**
- * Coroutine entry point
- *
- * When the coroutine is entered for the first time, opaque is passed in as an
- * argument.
- *
- * When this function returns, the coroutine is destroyed automatically and
- * execution continues in the caller who last entered the coroutine.
- */
-typedef void coroutine_fn CoroutineEntry(void *opaque);
-
-/**
- * Create a new coroutine
- *
- * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
- */
-Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
-
-/**
- * Transfer control to a coroutine
- *
- * The opaque argument is passed as the argument to the entry point when
- * entering the coroutine for the first time. It is subsequently ignored.
- */
-void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
-
-/**
- * Transfer control back to a coroutine's caller
- *
- * This function does not return until the coroutine is re-entered using
- * qemu_coroutine_enter().
- */
-void coroutine_fn qemu_coroutine_yield(void);
-
-/**
- * Get the currently executing coroutine
- */
-Coroutine *coroutine_fn qemu_coroutine_self(void);
-
-/**
- * Return whether or not currently inside a coroutine
- *
- * This can be used to write functions that work both when in coroutine context
- * and when not in coroutine context. Note that such functions cannot use the
- * coroutine_fn annotation since they work outside coroutine context.
- */
-bool qemu_in_coroutine(void);
-
-
-
-/**
- * CoQueues are a mechanism to queue coroutines in order to continue executing
- * them later. They provide the fundamental primitives on which coroutine locks
- * are built.
- */
-typedef struct CoQueue {
- QTAILQ_HEAD(, Coroutine) entries;
- AioContext *ctx;
-} CoQueue;
-
-/**
- * Initialise a CoQueue. This must be called before any other operation is used
- * on the CoQueue.
- */
-void qemu_co_queue_init(CoQueue *queue);
-
-/**
- * Adds the current coroutine to the CoQueue and transfers control to the
- * caller of the coroutine.
- */
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
-
-/**
- * Adds the current coroutine to the head of the CoQueue and transfers control to the
- * caller of the coroutine.
- */
-void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue);
-
-/**
- * Restarts the next coroutine in the CoQueue and removes it from the queue.
- *
- * Returns true if a coroutine was restarted, false if the queue is empty.
- */
-bool qemu_co_queue_next(CoQueue *queue);
-
-/**
- * Restarts all coroutines in the CoQueue and leaves the queue empty.
- */
-void qemu_co_queue_restart_all(CoQueue *queue);
-
-/**
- * Checks if the CoQueue is empty.
- */
-bool qemu_co_queue_empty(CoQueue *queue);
-
-
-/**
- * Provides a mutex that can be used to synchronise coroutines
- */
-typedef struct CoMutex {
- bool locked;
- CoQueue queue;
-} CoMutex;
-
-/**
- * Initialises a CoMutex. This must be called before any other operation is used
- * on the CoMutex.
- */
-void qemu_co_mutex_init(CoMutex *mutex);
-
-/**
- * Locks the mutex. If the lock cannot be taken immediately, control is
- * transferred to the caller of the current coroutine.
- */
-void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
-
-/**
- * Unlocks the mutex and schedules the next coroutine that was waiting for this
- * lock to be run.
- */
-void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
-
-typedef struct CoRwlock {
- bool writer;
- int reader;
- CoQueue queue;
-} CoRwlock;
-
-/**
- * Initialises a CoRwlock. This must be called before any other operation
- * is used on the CoRwlock
- */
-void qemu_co_rwlock_init(CoRwlock *lock);
-
-/**
- * Read locks the CoRwlock. If the lock cannot be taken immediately because
- * of a parallel writer, control is transferred to the caller of the current
- * coroutine.
- */
-void qemu_co_rwlock_rdlock(CoRwlock *lock);
-
-/**
- * Write Locks the mutex. If the lock cannot be taken immediately because
- * of a parallel reader, control is transferred to the caller of the current
- * coroutine.
- */
-void qemu_co_rwlock_wrlock(CoRwlock *lock);
-
-/**
- * Unlocks the read/write lock and schedules the next coroutine that was
- * waiting for this lock to be run.
- */
-void qemu_co_rwlock_unlock(CoRwlock *lock);
-
-/**
- * Yield the coroutine for a given duration
- *
- * Note this function uses timers and hence only works when a main loop is in
- * use. See main-loop.h and do not use from qemu-tool programs.
- */
-void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns);
-
-/**
- * Yield until a file descriptor becomes readable
- *
- * Note that this function clobbers the handlers for the file descriptor.
- */
-void coroutine_fn yield_until_fd_readable(int fd);
-#endif /* QEMU_COROUTINE_H */
diff --git a/contrib/qemu/include/block/coroutine_int.h b/contrib/qemu/include/block/coroutine_int.h
deleted file mode 100644
index f133d65af86..00000000000
--- a/contrib/qemu/include/block/coroutine_int.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Coroutine internals
- *
- * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef QEMU_COROUTINE_INT_H
-#define QEMU_COROUTINE_INT_H
-
-#include "qemu/queue.h"
-#include "block/coroutine.h"
-
-typedef enum {
- COROUTINE_YIELD = 1,
- COROUTINE_TERMINATE = 2,
-} CoroutineAction;
-
-struct Coroutine {
- CoroutineEntry *entry;
- void *entry_arg;
- Coroutine *caller;
- QSLIST_ENTRY(Coroutine) pool_next;
-
- /* Coroutines that should be woken up when we yield or terminate */
- QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
- QTAILQ_ENTRY(Coroutine) co_queue_next;
-};
-
-Coroutine *qemu_coroutine_new(void);
-void qemu_coroutine_delete(Coroutine *co);
-CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
- CoroutineAction action);
-void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
-
-#endif
diff --git a/contrib/qemu/include/block/snapshot.h b/contrib/qemu/include/block/snapshot.h
deleted file mode 100644
index eaf61f0326e..00000000000
--- a/contrib/qemu/include/block/snapshot.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Block layer snapshot related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef SNAPSHOT_H
-#define SNAPSHOT_H
-
-#include "qemu-common.h"
-
-typedef struct QEMUSnapshotInfo {
- char id_str[128]; /* unique snapshot id */
- /* the following fields are informative. They are not needed for
- the consistency of the snapshot */
- char name[256]; /* user chosen name */
- uint64_t vm_state_size; /* VM state info size */
- uint32_t date_sec; /* UTC date of the snapshot */
- uint32_t date_nsec;
- uint64_t vm_clock_nsec; /* VM clock relative to boot */
-} QEMUSnapshotInfo;
-
-int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
- const char *name);
-int bdrv_can_snapshot(BlockDriverState *bs);
-int bdrv_snapshot_create(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
-int bdrv_snapshot_goto(BlockDriverState *bs,
- const char *snapshot_id);
-int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
-int bdrv_snapshot_list(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info);
-int bdrv_snapshot_load_tmp(BlockDriverState *bs,
- const char *snapshot_name);
-#endif
diff --git a/contrib/qemu/include/config.h b/contrib/qemu/include/config.h
deleted file mode 100644
index e20f78696a1..00000000000
--- a/contrib/qemu/include/config.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "config-host.h"
-#include "config-target.h"
diff --git a/contrib/qemu/include/exec/cpu-common.h b/contrib/qemu/include/exec/cpu-common.h
deleted file mode 100644
index e4996e19c32..00000000000
--- a/contrib/qemu/include/exec/cpu-common.h
+++ /dev/null
@@ -1,124 +0,0 @@
-#ifndef CPU_COMMON_H
-#define CPU_COMMON_H 1
-
-/* CPU interfaces that are target independent. */
-
-#ifndef CONFIG_USER_ONLY
-#include "exec/hwaddr.h"
-#endif
-
-#ifndef NEED_CPU_H
-#include "exec/poison.h"
-#endif
-
-#include "qemu/bswap.h"
-#include "qemu/queue.h"
-
-/**
- * CPUListState:
- * @cpu_fprintf: Print function.
- * @file: File to print to using @cpu_fprint.
- *
- * State commonly used for iterating over CPU models.
- */
-typedef struct CPUListState {
- fprintf_function cpu_fprintf;
- FILE *file;
-} CPUListState;
-
-#if !defined(CONFIG_USER_ONLY)
-
-enum device_endian {
- DEVICE_NATIVE_ENDIAN,
- DEVICE_BIG_ENDIAN,
- DEVICE_LITTLE_ENDIAN,
-};
-
-/* address in the RAM (different from a physical address) */
-#if defined(CONFIG_XEN_BACKEND)
-typedef uint64_t ram_addr_t;
-# define RAM_ADDR_MAX UINT64_MAX
-# define RAM_ADDR_FMT "%" PRIx64
-#else
-typedef uintptr_t ram_addr_t;
-# define RAM_ADDR_MAX UINTPTR_MAX
-# define RAM_ADDR_FMT "%" PRIxPTR
-#endif
-
-/* memory API */
-
-typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
-typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
-
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
-/* This should not be used by devices. */
-MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
-void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
-
-void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
- int len, int is_write);
-static inline void cpu_physical_memory_read(hwaddr addr,
- void *buf, int len)
-{
- cpu_physical_memory_rw(addr, buf, len, 0);
-}
-static inline void cpu_physical_memory_write(hwaddr addr,
- const void *buf, int len)
-{
- cpu_physical_memory_rw(addr, (void *)buf, len, 1);
-}
-void *cpu_physical_memory_map(hwaddr addr,
- hwaddr *plen,
- int is_write);
-void cpu_physical_memory_unmap(void *buffer, hwaddr len,
- int is_write, hwaddr access_len);
-void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
-
-bool cpu_physical_memory_is_io(hwaddr phys_addr);
-
-/* Coalesced MMIO regions are areas where write operations can be reordered.
- * This usually implies that write operations are side-effect free. This allows
- * batching which can make a major impact on performance when using
- * virtualization.
- */
-void qemu_flush_coalesced_mmio_buffer(void);
-
-uint32_t ldub_phys(hwaddr addr);
-uint32_t lduw_le_phys(hwaddr addr);
-uint32_t lduw_be_phys(hwaddr addr);
-uint32_t ldl_le_phys(hwaddr addr);
-uint32_t ldl_be_phys(hwaddr addr);
-uint64_t ldq_le_phys(hwaddr addr);
-uint64_t ldq_be_phys(hwaddr addr);
-void stb_phys(hwaddr addr, uint32_t val);
-void stw_le_phys(hwaddr addr, uint32_t val);
-void stw_be_phys(hwaddr addr, uint32_t val);
-void stl_le_phys(hwaddr addr, uint32_t val);
-void stl_be_phys(hwaddr addr, uint32_t val);
-void stq_le_phys(hwaddr addr, uint64_t val);
-void stq_be_phys(hwaddr addr, uint64_t val);
-
-#ifdef NEED_CPU_H
-uint32_t lduw_phys(hwaddr addr);
-uint32_t ldl_phys(hwaddr addr);
-uint64_t ldq_phys(hwaddr addr);
-void stl_phys_notdirty(hwaddr addr, uint32_t val);
-void stw_phys(hwaddr addr, uint32_t val);
-void stl_phys(hwaddr addr, uint32_t val);
-void stq_phys(hwaddr addr, uint64_t val);
-#endif
-
-void cpu_physical_memory_write_rom(hwaddr addr,
- const uint8_t *buf, int len);
-
-extern struct MemoryRegion io_mem_rom;
-extern struct MemoryRegion io_mem_notdirty;
-
-typedef void (RAMBlockIterFunc)(void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque);
-
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
-
-#endif
-
-#endif /* !CPU_COMMON_H */
diff --git a/contrib/qemu/include/exec/hwaddr.h b/contrib/qemu/include/exec/hwaddr.h
deleted file mode 100644
index c9eb78fba18..00000000000
--- a/contrib/qemu/include/exec/hwaddr.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Define hwaddr if it exists. */
-
-#ifndef HWADDR_H
-#define HWADDR_H
-
-#define HWADDR_BITS 64
-/* hwaddr is the type of a physical address (its size can
- be different from 'target_ulong'). */
-
-typedef uint64_t hwaddr;
-#define HWADDR_MAX UINT64_MAX
-#define TARGET_FMT_plx "%016" PRIx64
-#define HWADDR_PRId PRId64
-#define HWADDR_PRIi PRIi64
-#define HWADDR_PRIo PRIo64
-#define HWADDR_PRIu PRIu64
-#define HWADDR_PRIx PRIx64
-#define HWADDR_PRIX PRIX64
-
-#endif
diff --git a/contrib/qemu/include/exec/poison.h b/contrib/qemu/include/exec/poison.h
deleted file mode 100644
index 2341a750413..00000000000
--- a/contrib/qemu/include/exec/poison.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* Poison identifiers that should not be used when building
- target independent device code. */
-
-#ifndef HW_POISON_H
-#define HW_POISON_H
-#ifdef __GNUC__
-
-#pragma GCC poison TARGET_I386
-#pragma GCC poison TARGET_X86_64
-#pragma GCC poison TARGET_ALPHA
-#pragma GCC poison TARGET_ARM
-#pragma GCC poison TARGET_CRIS
-#pragma GCC poison TARGET_LM32
-#pragma GCC poison TARGET_M68K
-#pragma GCC poison TARGET_MIPS
-#pragma GCC poison TARGET_MIPS64
-#pragma GCC poison TARGET_OPENRISC
-#pragma GCC poison TARGET_PPC
-#pragma GCC poison TARGET_PPCEMB
-#pragma GCC poison TARGET_PPC64
-#pragma GCC poison TARGET_ABI32
-#pragma GCC poison TARGET_SH4
-#pragma GCC poison TARGET_SPARC
-#pragma GCC poison TARGET_SPARC64
-
-#pragma GCC poison TARGET_WORDS_BIGENDIAN
-#pragma GCC poison BSWAP_NEEDED
-
-#pragma GCC poison TARGET_LONG_BITS
-#pragma GCC poison TARGET_FMT_lx
-#pragma GCC poison TARGET_FMT_ld
-
-#pragma GCC poison TARGET_PAGE_SIZE
-#pragma GCC poison TARGET_PAGE_MASK
-#pragma GCC poison TARGET_PAGE_BITS
-#pragma GCC poison TARGET_PAGE_ALIGN
-
-#pragma GCC poison CPUArchState
-#pragma GCC poison env
-
-#pragma GCC poison lduw_phys
-#pragma GCC poison ldl_phys
-#pragma GCC poison ldq_phys
-#pragma GCC poison stl_phys_notdirty
-#pragma GCC poison stw_phys
-#pragma GCC poison stl_phys
-#pragma GCC poison stq_phys
-
-#pragma GCC poison CPU_INTERRUPT_HARD
-#pragma GCC poison CPU_INTERRUPT_EXITTB
-#pragma GCC poison CPU_INTERRUPT_HALT
-#pragma GCC poison CPU_INTERRUPT_DEBUG
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_0
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_1
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_2
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_3
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_4
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_0
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_1
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_2
-
-#endif
-#endif
diff --git a/contrib/qemu/include/fpu/softfloat.h b/contrib/qemu/include/fpu/softfloat.h
deleted file mode 100644
index f3927e2419f..00000000000
--- a/contrib/qemu/include/fpu/softfloat.h
+++ /dev/null
@@ -1,641 +0,0 @@
-/*
- * QEMU float support
- *
- * Derived from SoftFloat.
- */
-
-/*============================================================================
-
-This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
-Package, Release 2b.
-
-Written by John R. Hauser. This work was made possible in part by the
-International Computer Science Institute, located at Suite 600, 1947 Center
-Street, Berkeley, California 94704. Funding was partially provided by the
-National Science Foundation under grant MIP-9311980. The original version
-of this code was written as part of a project to build a fixed-point vector
-processor in collaboration with the University of California at Berkeley,
-overseen by Profs. Nelson Morgan and John Wawrzynek. More information
-is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
-arithmetic/SoftFloat.html'.
-
-THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
-been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
-RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
-AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
-COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
-EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
-INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
-OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
-
-Derivative works are acceptable, even for commercial purposes, so long as
-(1) the source code for the derivative work includes prominent notice that
-the work is derivative, and (2) the source code includes prominent notice with
-these four paragraphs for those parts of this code that are retained.
-
-=============================================================================*/
-
-#ifndef SOFTFLOAT_H
-#define SOFTFLOAT_H
-
-#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
-#include <sunmath.h>
-#endif
-
-#include <inttypes.h>
-#include "config-host.h"
-#include "qemu/osdep.h"
-
-/*----------------------------------------------------------------------------
-| Each of the following `typedef's defines the most convenient type that holds
-| integers of at least as many bits as specified. For example, `uint8' should
-| be the most convenient type that can hold unsigned integers of as many as
-| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
-| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
-| to the same as `int'.
-*----------------------------------------------------------------------------*/
-typedef uint8_t flag;
-typedef uint8_t uint8;
-typedef int8_t int8;
-typedef unsigned int uint32;
-typedef signed int int32;
-typedef uint64_t uint64;
-typedef int64_t int64;
-
-#define LIT64( a ) a##LL
-#define INLINE static inline
-
-#define STATUS_PARAM , float_status *status
-#define STATUS(field) status->field
-#define STATUS_VAR , status
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point ordering relations
-*----------------------------------------------------------------------------*/
-enum {
- float_relation_less = -1,
- float_relation_equal = 0,
- float_relation_greater = 1,
- float_relation_unordered = 2
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point types.
-*----------------------------------------------------------------------------*/
-/* Use structures for soft-float types. This prevents accidentally mixing
- them with native int/float types. A sufficiently clever compiler and
- sane ABI should be able to see though these structs. However
- x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */
-//#define USE_SOFTFLOAT_STRUCT_TYPES
-#ifdef USE_SOFTFLOAT_STRUCT_TYPES
-typedef struct {
- uint16_t v;
-} float16;
-#define float16_val(x) (((float16)(x)).v)
-#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
-#define const_float16(x) { x }
-typedef struct {
- uint32_t v;
-} float32;
-/* The cast ensures an error if the wrong type is passed. */
-#define float32_val(x) (((float32)(x)).v)
-#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
-#define const_float32(x) { x }
-typedef struct {
- uint64_t v;
-} float64;
-#define float64_val(x) (((float64)(x)).v)
-#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
-#define const_float64(x) { x }
-#else
-typedef uint16_t float16;
-typedef uint32_t float32;
-typedef uint64_t float64;
-#define float16_val(x) (x)
-#define float32_val(x) (x)
-#define float64_val(x) (x)
-#define make_float16(x) (x)
-#define make_float32(x) (x)
-#define make_float64(x) (x)
-#define const_float16(x) (x)
-#define const_float32(x) (x)
-#define const_float64(x) (x)
-#endif
-typedef struct {
- uint64_t low;
- uint16_t high;
-} floatx80;
-#define make_floatx80(exp, mant) ((floatx80) { mant, exp })
-#define make_floatx80_init(exp, mant) { .low = mant, .high = exp }
-typedef struct {
-#ifdef HOST_WORDS_BIGENDIAN
- uint64_t high, low;
-#else
- uint64_t low, high;
-#endif
-} float128;
-#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
-#define make_float128_init(high_, low_) { .high = high_, .low = low_ }
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point underflow tininess-detection mode.
-*----------------------------------------------------------------------------*/
-enum {
- float_tininess_after_rounding = 0,
- float_tininess_before_rounding = 1
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point rounding mode.
-*----------------------------------------------------------------------------*/
-enum {
- float_round_nearest_even = 0,
- float_round_down = 1,
- float_round_up = 2,
- float_round_to_zero = 3
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point exception flags.
-*----------------------------------------------------------------------------*/
-enum {
- float_flag_invalid = 1,
- float_flag_divbyzero = 4,
- float_flag_overflow = 8,
- float_flag_underflow = 16,
- float_flag_inexact = 32,
- float_flag_input_denormal = 64,
- float_flag_output_denormal = 128
-};
-
-typedef struct float_status {
- signed char float_detect_tininess;
- signed char float_rounding_mode;
- signed char float_exception_flags;
- signed char floatx80_rounding_precision;
- /* should denormalised results go to zero and set the inexact flag? */
- flag flush_to_zero;
- /* should denormalised inputs go to zero and set the input_denormal flag? */
- flag flush_inputs_to_zero;
- flag default_nan_mode;
-} float_status;
-
-void set_float_rounding_mode(int val STATUS_PARAM);
-void set_float_exception_flags(int val STATUS_PARAM);
-INLINE void set_float_detect_tininess(int val STATUS_PARAM)
-{
- STATUS(float_detect_tininess) = val;
-}
-INLINE void set_flush_to_zero(flag val STATUS_PARAM)
-{
- STATUS(flush_to_zero) = val;
-}
-INLINE void set_flush_inputs_to_zero(flag val STATUS_PARAM)
-{
- STATUS(flush_inputs_to_zero) = val;
-}
-INLINE void set_default_nan_mode(flag val STATUS_PARAM)
-{
- STATUS(default_nan_mode) = val;
-}
-INLINE int get_float_exception_flags(float_status *status)
-{
- return STATUS(float_exception_flags);
-}
-void set_floatx80_rounding_precision(int val STATUS_PARAM);
-
-/*----------------------------------------------------------------------------
-| Routine to raise any or all of the software IEC/IEEE floating-point
-| exception flags.
-*----------------------------------------------------------------------------*/
-void float_raise( int8 flags STATUS_PARAM);
-
-/*----------------------------------------------------------------------------
-| Options to indicate which negations to perform in float*_muladd()
-| Using these differs from negating an input or output before calling
-| the muladd function in that this means that a NaN doesn't have its
-| sign bit inverted before it is propagated.
-*----------------------------------------------------------------------------*/
-enum {
- float_muladd_negate_c = 1,
- float_muladd_negate_product = 2,
- float_muladd_negate_result = 4,
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE integer-to-floating-point conversion routines.
-*----------------------------------------------------------------------------*/
-float32 int32_to_float32( int32 STATUS_PARAM );
-float64 int32_to_float64( int32 STATUS_PARAM );
-float32 uint32_to_float32( uint32 STATUS_PARAM );
-float64 uint32_to_float64( uint32 STATUS_PARAM );
-floatx80 int32_to_floatx80( int32 STATUS_PARAM );
-float128 int32_to_float128( int32 STATUS_PARAM );
-float32 int64_to_float32( int64 STATUS_PARAM );
-float32 uint64_to_float32( uint64 STATUS_PARAM );
-float64 int64_to_float64( int64 STATUS_PARAM );
-float64 uint64_to_float64( uint64 STATUS_PARAM );
-floatx80 int64_to_floatx80( int64 STATUS_PARAM );
-float128 int64_to_float128( int64 STATUS_PARAM );
-float128 uint64_to_float128( uint64 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software half-precision conversion routines.
-*----------------------------------------------------------------------------*/
-float16 float32_to_float16( float32, flag STATUS_PARAM );
-float32 float16_to_float32( float16, flag STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software half-precision operations.
-*----------------------------------------------------------------------------*/
-int float16_is_quiet_nan( float16 );
-int float16_is_signaling_nan( float16 );
-float16 float16_maybe_silence_nan( float16 );
-
-INLINE int float16_is_any_nan(float16 a)
-{
- return ((float16_val(a) & ~0x8000) > 0x7c00);
-}
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated half-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const float16 float16_default_nan;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM);
-uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM);
-int32 float32_to_int32( float32 STATUS_PARAM );
-int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
-uint32 float32_to_uint32( float32 STATUS_PARAM );
-uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
-int64 float32_to_int64( float32 STATUS_PARAM );
-int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
-float64 float32_to_float64( float32 STATUS_PARAM );
-floatx80 float32_to_floatx80( float32 STATUS_PARAM );
-float128 float32_to_float128( float32 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision operations.
-*----------------------------------------------------------------------------*/
-float32 float32_round_to_int( float32 STATUS_PARAM );
-float32 float32_add( float32, float32 STATUS_PARAM );
-float32 float32_sub( float32, float32 STATUS_PARAM );
-float32 float32_mul( float32, float32 STATUS_PARAM );
-float32 float32_div( float32, float32 STATUS_PARAM );
-float32 float32_rem( float32, float32 STATUS_PARAM );
-float32 float32_muladd(float32, float32, float32, int STATUS_PARAM);
-float32 float32_sqrt( float32 STATUS_PARAM );
-float32 float32_exp2( float32 STATUS_PARAM );
-float32 float32_log2( float32 STATUS_PARAM );
-int float32_eq( float32, float32 STATUS_PARAM );
-int float32_le( float32, float32 STATUS_PARAM );
-int float32_lt( float32, float32 STATUS_PARAM );
-int float32_unordered( float32, float32 STATUS_PARAM );
-int float32_eq_quiet( float32, float32 STATUS_PARAM );
-int float32_le_quiet( float32, float32 STATUS_PARAM );
-int float32_lt_quiet( float32, float32 STATUS_PARAM );
-int float32_unordered_quiet( float32, float32 STATUS_PARAM );
-int float32_compare( float32, float32 STATUS_PARAM );
-int float32_compare_quiet( float32, float32 STATUS_PARAM );
-float32 float32_min(float32, float32 STATUS_PARAM);
-float32 float32_max(float32, float32 STATUS_PARAM);
-int float32_is_quiet_nan( float32 );
-int float32_is_signaling_nan( float32 );
-float32 float32_maybe_silence_nan( float32 );
-float32 float32_scalbn( float32, int STATUS_PARAM );
-
-INLINE float32 float32_abs(float32 a)
-{
- /* Note that abs does *not* handle NaN specially, nor does
- * it flush denormal inputs to zero.
- */
- return make_float32(float32_val(a) & 0x7fffffff);
-}
-
-INLINE float32 float32_chs(float32 a)
-{
- /* Note that chs does *not* handle NaN specially, nor does
- * it flush denormal inputs to zero.
- */
- return make_float32(float32_val(a) ^ 0x80000000);
-}
-
-INLINE int float32_is_infinity(float32 a)
-{
- return (float32_val(a) & 0x7fffffff) == 0x7f800000;
-}
-
-INLINE int float32_is_neg(float32 a)
-{
- return float32_val(a) >> 31;
-}
-
-INLINE int float32_is_zero(float32 a)
-{
- return (float32_val(a) & 0x7fffffff) == 0;
-}
-
-INLINE int float32_is_any_nan(float32 a)
-{
- return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
-}
-
-INLINE int float32_is_zero_or_denormal(float32 a)
-{
- return (float32_val(a) & 0x7f800000) == 0;
-}
-
-INLINE float32 float32_set_sign(float32 a, int sign)
-{
- return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
-}
-
-#define float32_zero make_float32(0)
-#define float32_one make_float32(0x3f800000)
-#define float32_ln2 make_float32(0x3f317218)
-#define float32_pi make_float32(0x40490fdb)
-#define float32_half make_float32(0x3f000000)
-#define float32_infinity make_float32(0x7f800000)
-
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated single-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const float32 float32_default_nan;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM);
-uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM);
-int32 float64_to_int32( float64 STATUS_PARAM );
-int32 float64_to_int32_round_to_zero( float64 STATUS_PARAM );
-uint32 float64_to_uint32( float64 STATUS_PARAM );
-uint32 float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
-int64 float64_to_int64( float64 STATUS_PARAM );
-int64 float64_to_int64_round_to_zero( float64 STATUS_PARAM );
-uint64 float64_to_uint64 (float64 a STATUS_PARAM);
-uint64 float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
-float32 float64_to_float32( float64 STATUS_PARAM );
-floatx80 float64_to_floatx80( float64 STATUS_PARAM );
-float128 float64_to_float128( float64 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision operations.
-*----------------------------------------------------------------------------*/
-float64 float64_round_to_int( float64 STATUS_PARAM );
-float64 float64_trunc_to_int( float64 STATUS_PARAM );
-float64 float64_add( float64, float64 STATUS_PARAM );
-float64 float64_sub( float64, float64 STATUS_PARAM );
-float64 float64_mul( float64, float64 STATUS_PARAM );
-float64 float64_div( float64, float64 STATUS_PARAM );
-float64 float64_rem( float64, float64 STATUS_PARAM );
-float64 float64_muladd(float64, float64, float64, int STATUS_PARAM);
-float64 float64_sqrt( float64 STATUS_PARAM );
-float64 float64_log2( float64 STATUS_PARAM );
-int float64_eq( float64, float64 STATUS_PARAM );
-int float64_le( float64, float64 STATUS_PARAM );
-int float64_lt( float64, float64 STATUS_PARAM );
-int float64_unordered( float64, float64 STATUS_PARAM );
-int float64_eq_quiet( float64, float64 STATUS_PARAM );
-int float64_le_quiet( float64, float64 STATUS_PARAM );
-int float64_lt_quiet( float64, float64 STATUS_PARAM );
-int float64_unordered_quiet( float64, float64 STATUS_PARAM );
-int float64_compare( float64, float64 STATUS_PARAM );
-int float64_compare_quiet( float64, float64 STATUS_PARAM );
-float64 float64_min(float64, float64 STATUS_PARAM);
-float64 float64_max(float64, float64 STATUS_PARAM);
-int float64_is_quiet_nan( float64 a );
-int float64_is_signaling_nan( float64 );
-float64 float64_maybe_silence_nan( float64 );
-float64 float64_scalbn( float64, int STATUS_PARAM );
-
-INLINE float64 float64_abs(float64 a)
-{
- /* Note that abs does *not* handle NaN specially, nor does
- * it flush denormal inputs to zero.
- */
- return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
-}
-
-INLINE float64 float64_chs(float64 a)
-{
- /* Note that chs does *not* handle NaN specially, nor does
- * it flush denormal inputs to zero.
- */
- return make_float64(float64_val(a) ^ 0x8000000000000000LL);
-}
-
-INLINE int float64_is_infinity(float64 a)
-{
- return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL;
-}
-
-INLINE int float64_is_neg(float64 a)
-{
- return float64_val(a) >> 63;
-}
-
-INLINE int float64_is_zero(float64 a)
-{
- return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
-}
-
-INLINE int float64_is_any_nan(float64 a)
-{
- return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
-}
-
-INLINE int float64_is_zero_or_denormal(float64 a)
-{
- return (float64_val(a) & 0x7ff0000000000000LL) == 0;
-}
-
-INLINE float64 float64_set_sign(float64 a, int sign)
-{
- return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
- | ((int64_t)sign << 63));
-}
-
-#define float64_zero make_float64(0)
-#define float64_one make_float64(0x3ff0000000000000LL)
-#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
-#define float64_pi make_float64(0x400921fb54442d18LL)
-#define float64_half make_float64(0x3fe0000000000000LL)
-#define float64_infinity make_float64(0x7ff0000000000000LL)
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated double-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const float64 float64_default_nan;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int32 floatx80_to_int32( floatx80 STATUS_PARAM );
-int32 floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
-int64 floatx80_to_int64( floatx80 STATUS_PARAM );
-int64 floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
-float32 floatx80_to_float32( floatx80 STATUS_PARAM );
-float64 floatx80_to_float64( floatx80 STATUS_PARAM );
-float128 floatx80_to_float128( floatx80 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision operations.
-*----------------------------------------------------------------------------*/
-floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
-floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
-int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
-int floatx80_le( floatx80, floatx80 STATUS_PARAM );
-int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
-int floatx80_unordered( floatx80, floatx80 STATUS_PARAM );
-int floatx80_eq_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_unordered_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
-int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_is_quiet_nan( floatx80 );
-int floatx80_is_signaling_nan( floatx80 );
-floatx80 floatx80_maybe_silence_nan( floatx80 );
-floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
-
-INLINE floatx80 floatx80_abs(floatx80 a)
-{
- a.high &= 0x7fff;
- return a;
-}
-
-INLINE floatx80 floatx80_chs(floatx80 a)
-{
- a.high ^= 0x8000;
- return a;
-}
-
-INLINE int floatx80_is_infinity(floatx80 a)
-{
- return (a.high & 0x7fff) == 0x7fff && a.low == 0x8000000000000000LL;
-}
-
-INLINE int floatx80_is_neg(floatx80 a)
-{
- return a.high >> 15;
-}
-
-INLINE int floatx80_is_zero(floatx80 a)
-{
- return (a.high & 0x7fff) == 0 && a.low == 0;
-}
-
-INLINE int floatx80_is_zero_or_denormal(floatx80 a)
-{
- return (a.high & 0x7fff) == 0;
-}
-
-INLINE int floatx80_is_any_nan(floatx80 a)
-{
- return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
-}
-
-#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
-#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
-#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
-#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
-#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
-#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated extended double-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const floatx80 floatx80_default_nan;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE quadruple-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int32 float128_to_int32( float128 STATUS_PARAM );
-int32 float128_to_int32_round_to_zero( float128 STATUS_PARAM );
-int64 float128_to_int64( float128 STATUS_PARAM );
-int64 float128_to_int64_round_to_zero( float128 STATUS_PARAM );
-float32 float128_to_float32( float128 STATUS_PARAM );
-float64 float128_to_float64( float128 STATUS_PARAM );
-floatx80 float128_to_floatx80( float128 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE quadruple-precision operations.
-*----------------------------------------------------------------------------*/
-float128 float128_round_to_int( float128 STATUS_PARAM );
-float128 float128_add( float128, float128 STATUS_PARAM );
-float128 float128_sub( float128, float128 STATUS_PARAM );
-float128 float128_mul( float128, float128 STATUS_PARAM );
-float128 float128_div( float128, float128 STATUS_PARAM );
-float128 float128_rem( float128, float128 STATUS_PARAM );
-float128 float128_sqrt( float128 STATUS_PARAM );
-int float128_eq( float128, float128 STATUS_PARAM );
-int float128_le( float128, float128 STATUS_PARAM );
-int float128_lt( float128, float128 STATUS_PARAM );
-int float128_unordered( float128, float128 STATUS_PARAM );
-int float128_eq_quiet( float128, float128 STATUS_PARAM );
-int float128_le_quiet( float128, float128 STATUS_PARAM );
-int float128_lt_quiet( float128, float128 STATUS_PARAM );
-int float128_unordered_quiet( float128, float128 STATUS_PARAM );
-int float128_compare( float128, float128 STATUS_PARAM );
-int float128_compare_quiet( float128, float128 STATUS_PARAM );
-int float128_is_quiet_nan( float128 );
-int float128_is_signaling_nan( float128 );
-float128 float128_maybe_silence_nan( float128 );
-float128 float128_scalbn( float128, int STATUS_PARAM );
-
-INLINE float128 float128_abs(float128 a)
-{
- a.high &= 0x7fffffffffffffffLL;
- return a;
-}
-
-INLINE float128 float128_chs(float128 a)
-{
- a.high ^= 0x8000000000000000LL;
- return a;
-}
-
-INLINE int float128_is_infinity(float128 a)
-{
- return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0;
-}
-
-INLINE int float128_is_neg(float128 a)
-{
- return a.high >> 63;
-}
-
-INLINE int float128_is_zero(float128 a)
-{
- return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
-}
-
-INLINE int float128_is_zero_or_denormal(float128 a)
-{
- return (a.high & 0x7fff000000000000LL) == 0;
-}
-
-INLINE int float128_is_any_nan(float128 a)
-{
- return ((a.high >> 48) & 0x7fff) == 0x7fff &&
- ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
-}
-
-#define float128_zero make_float128(0, 0)
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated quadruple-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const float128 float128_default_nan;
-
-#endif /* !SOFTFLOAT_H */
diff --git a/contrib/qemu/include/glib-compat.h b/contrib/qemu/include/glib-compat.h
deleted file mode 100644
index 8aa77afd626..00000000000
--- a/contrib/qemu/include/glib-compat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * GLIB Compatibility Functions
- *
- * Copyright IBM, Corp. 2013
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_GLIB_COMPAT_H
-#define QEMU_GLIB_COMPAT_H
-
-#include <glib.h>
-
-#if !GLIB_CHECK_VERSION(2, 14, 0)
-static inline guint g_timeout_add_seconds(guint interval, GSourceFunc function,
- gpointer data)
-{
- return g_timeout_add(interval * 1000, function, data);
-}
-#endif
-
-#endif
diff --git a/contrib/qemu/include/migration/migration.h b/contrib/qemu/include/migration/migration.h
deleted file mode 100644
index bc9fde0b2ab..00000000000
--- a/contrib/qemu/include/migration/migration.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * QEMU live migration
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_MIGRATION_H
-#define QEMU_MIGRATION_H
-
-#include "qapi/qmp/qdict.h"
-#include "qemu-common.h"
-#include "qemu/thread.h"
-#include "qemu/notify.h"
-#include "qapi/error.h"
-#include "migration/vmstate.h"
-#include "qapi-types.h"
-#include "exec/cpu-common.h"
-
-struct MigrationParams {
- bool blk;
- bool shared;
-};
-
-typedef struct MigrationState MigrationState;
-
-struct MigrationState
-{
- int64_t bandwidth_limit;
- size_t bytes_xfer;
- size_t xfer_limit;
- QemuThread thread;
- QEMUBH *cleanup_bh;
- QEMUFile *file;
-
- int state;
- MigrationParams params;
- double mbps;
- int64_t total_time;
- int64_t downtime;
- int64_t expected_downtime;
- int64_t dirty_pages_rate;
- int64_t dirty_bytes_rate;
- bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
- int64_t xbzrle_cache_size;
-};
-
-void process_incoming_migration(QEMUFile *f);
-
-void qemu_start_incoming_migration(const char *uri, Error **errp);
-
-uint64_t migrate_max_downtime(void);
-
-void do_info_migrate_print(Monitor *mon, const QObject *data);
-
-void do_info_migrate(Monitor *mon, QObject **ret_data);
-
-void exec_start_incoming_migration(const char *host_port, Error **errp);
-
-void exec_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
-
-void tcp_start_incoming_migration(const char *host_port, Error **errp);
-
-void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
-
-void unix_start_incoming_migration(const char *path, Error **errp);
-
-void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
-
-void fd_start_incoming_migration(const char *path, Error **errp);
-
-void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
-
-void migrate_fd_error(MigrationState *s);
-
-void migrate_fd_connect(MigrationState *s);
-
-int migrate_fd_close(MigrationState *s);
-
-void add_migration_state_change_notifier(Notifier *notify);
-void remove_migration_state_change_notifier(Notifier *notify);
-bool migration_is_active(MigrationState *);
-bool migration_has_finished(MigrationState *);
-bool migration_has_failed(MigrationState *);
-MigrationState *migrate_get_current(void);
-
-uint64_t ram_bytes_remaining(void);
-uint64_t ram_bytes_transferred(void);
-uint64_t ram_bytes_total(void);
-
-void acct_update_position(QEMUFile *f, size_t size, bool zero);
-
-extern SaveVMHandlers savevm_ram_handlers;
-
-uint64_t dup_mig_bytes_transferred(void);
-uint64_t dup_mig_pages_transferred(void);
-uint64_t skipped_mig_bytes_transferred(void);
-uint64_t skipped_mig_pages_transferred(void);
-uint64_t norm_mig_bytes_transferred(void);
-uint64_t norm_mig_pages_transferred(void);
-uint64_t xbzrle_mig_bytes_transferred(void);
-uint64_t xbzrle_mig_pages_transferred(void);
-uint64_t xbzrle_mig_pages_overflow(void);
-uint64_t xbzrle_mig_pages_cache_miss(void);
-
-/**
- * @migrate_add_blocker - prevent migration from proceeding
- *
- * @reason - an error to be returned whenever migration is attempted
- */
-void migrate_add_blocker(Error *reason);
-
-/**
- * @migrate_del_blocker - remove a blocking error from migration
- *
- * @reason - the error blocking migration
- */
-void migrate_del_blocker(Error *reason);
-
-bool migrate_rdma_pin_all(void);
-
-bool migrate_auto_converge(void);
-
-int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
- uint8_t *dst, int dlen);
-int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
-
-int migrate_use_xbzrle(void);
-int64_t migrate_xbzrle_cache_size(void);
-
-int64_t xbzrle_cache_resize(int64_t new_size);
-
-void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
-void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
-void ram_control_load_hook(QEMUFile *f, uint64_t flags);
-
-/* Whenever this is found in the data stream, the flags
- * will be passed to ram_control_load_hook in the incoming-migration
- * side. This lets before_ram_iterate/after_ram_iterate add
- * transport-specific sections to the RAM migration data.
- */
-#define RAM_SAVE_FLAG_HOOK 0x80
-
-#define RAM_SAVE_CONTROL_NOT_SUPP -1000
-#define RAM_SAVE_CONTROL_DELAYED -2000
-
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
- ram_addr_t offset, size_t size,
- int *bytes_sent);
-
-#endif
diff --git a/contrib/qemu/include/migration/qemu-file.h b/contrib/qemu/include/migration/qemu-file.h
deleted file mode 100644
index 0f757fbeb63..00000000000
--- a/contrib/qemu/include/migration/qemu-file.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef QEMU_FILE_H
-#define QEMU_FILE_H 1
-#include "exec/cpu-common.h"
-
-/* This function writes a chunk of data to a file at the given position.
- * The pos argument can be ignored if the file is only being used for
- * streaming. The handler should try to write all of the data it can.
- */
-typedef int (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf,
- int64_t pos, int size);
-
-/* Read a chunk of data from a file at the given position. The pos argument
- * can be ignored if the file is only be used for streaming. The number of
- * bytes actually read should be returned.
- */
-typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
- int64_t pos, int size);
-
-/* Close a file
- *
- * Return negative error number on error, 0 or positive value on success.
- *
- * The meaning of return value on success depends on the specific back-end being
- * used.
- */
-typedef int (QEMUFileCloseFunc)(void *opaque);
-
-/* Called to return the OS file descriptor associated to the QEMUFile.
- */
-typedef int (QEMUFileGetFD)(void *opaque);
-
-/*
- * This function writes an iovec to file.
- */
-typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
- int iovcnt, int64_t pos);
-
-/*
- * This function provides hooks around different
- * stages of RAM migration.
- */
-typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
-
-/*
- * Constants used by ram_control_* hooks
- */
-#define RAM_CONTROL_SETUP 0
-#define RAM_CONTROL_ROUND 1
-#define RAM_CONTROL_HOOK 2
-#define RAM_CONTROL_FINISH 3
-
-/*
- * This function allows override of where the RAM page
- * is saved (such as RDMA, for example.)
- */
-typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
- ram_addr_t block_offset,
- ram_addr_t offset,
- size_t size,
- int *bytes_sent);
-
-typedef struct QEMUFileOps {
- QEMUFilePutBufferFunc *put_buffer;
- QEMUFileGetBufferFunc *get_buffer;
- QEMUFileCloseFunc *close;
- QEMUFileGetFD *get_fd;
- QEMUFileWritevBufferFunc *writev_buffer;
- QEMURamHookFunc *before_ram_iterate;
- QEMURamHookFunc *after_ram_iterate;
- QEMURamHookFunc *hook_ram_load;
- QEMURamSaveFunc *save_page;
-} QEMUFileOps;
-
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
-QEMUFile *qemu_fopen(const char *filename, const char *mode);
-QEMUFile *qemu_fdopen(int fd, const char *mode);
-QEMUFile *qemu_fopen_socket(int fd, const char *mode);
-QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
-int qemu_get_fd(QEMUFile *f);
-int qemu_fclose(QEMUFile *f);
-int64_t qemu_ftell(QEMUFile *f);
-void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
-void qemu_put_byte(QEMUFile *f, int v);
-/*
- * put_buffer without copying the buffer.
- * The buffer should be available till it is sent asynchronously.
- */
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
-bool qemu_file_mode_is_not_valid(const char *mode);
-
-static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
-{
- qemu_put_byte(f, (int)v);
-}
-
-#define qemu_put_sbyte qemu_put_byte
-
-void qemu_put_be16(QEMUFile *f, unsigned int v);
-void qemu_put_be32(QEMUFile *f, unsigned int v);
-void qemu_put_be64(QEMUFile *f, uint64_t v);
-int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
-int qemu_get_byte(QEMUFile *f);
-void qemu_update_position(QEMUFile *f, size_t size);
-
-static inline unsigned int qemu_get_ubyte(QEMUFile *f)
-{
- return (unsigned int)qemu_get_byte(f);
-}
-
-#define qemu_get_sbyte qemu_get_byte
-
-unsigned int qemu_get_be16(QEMUFile *f);
-unsigned int qemu_get_be32(QEMUFile *f);
-uint64_t qemu_get_be64(QEMUFile *f);
-
-int qemu_file_rate_limit(QEMUFile *f);
-void qemu_file_reset_rate_limit(QEMUFile *f);
-void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
-int64_t qemu_file_get_rate_limit(QEMUFile *f);
-int qemu_file_get_error(QEMUFile *f);
-void qemu_fflush(QEMUFile *f);
-
-static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
-{
- qemu_put_be64(f, *pv);
-}
-
-static inline void qemu_put_be32s(QEMUFile *f, const uint32_t *pv)
-{
- qemu_put_be32(f, *pv);
-}
-
-static inline void qemu_put_be16s(QEMUFile *f, const uint16_t *pv)
-{
- qemu_put_be16(f, *pv);
-}
-
-static inline void qemu_put_8s(QEMUFile *f, const uint8_t *pv)
-{
- qemu_put_byte(f, *pv);
-}
-
-static inline void qemu_get_be64s(QEMUFile *f, uint64_t *pv)
-{
- *pv = qemu_get_be64(f);
-}
-
-static inline void qemu_get_be32s(QEMUFile *f, uint32_t *pv)
-{
- *pv = qemu_get_be32(f);
-}
-
-static inline void q