summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configure.ac33
-rw-r--r--contrib/qemu/block.c4604
-rw-r--r--contrib/qemu/block/qcow.c914
-rw-r--r--contrib/qemu/block/qcow2-cache.c323
-rw-r--r--contrib/qemu/block/qcow2-cluster.c1478
-rw-r--r--contrib/qemu/block/qcow2-refcount.c1374
-rw-r--r--contrib/qemu/block/qcow2-snapshot.c660
-rw-r--r--contrib/qemu/block/qcow2.c1825
-rw-r--r--contrib/qemu/block/qcow2.h437
-rw-r--r--contrib/qemu/block/qed-check.c248
-rw-r--r--contrib/qemu/block/qed-cluster.c165
-rw-r--r--contrib/qemu/block/qed-gencb.c32
-rw-r--r--contrib/qemu/block/qed-l2-cache.c187
-rw-r--r--contrib/qemu/block/qed-table.c296
-rw-r--r--contrib/qemu/block/qed.c1596
-rw-r--r--contrib/qemu/block/qed.h344
-rw-r--r--contrib/qemu/block/snapshot.c157
-rw-r--r--contrib/qemu/config-host.h72
-rw-r--r--contrib/qemu/coroutine-ucontext.c225
-rw-r--r--contrib/qemu/include/block/aio.h247
-rw-r--r--contrib/qemu/include/block/block.h443
-rw-r--r--contrib/qemu/include/block/block_int.h421
-rw-r--r--contrib/qemu/include/block/blockjob.h278
-rw-r--r--contrib/qemu/include/block/coroutine.h218
-rw-r--r--contrib/qemu/include/block/coroutine_int.h53
-rw-r--r--contrib/qemu/include/block/snapshot.h53
-rw-r--r--contrib/qemu/include/config.h2
-rw-r--r--contrib/qemu/include/exec/cpu-common.h124
-rw-r--r--contrib/qemu/include/exec/hwaddr.h20
-rw-r--r--contrib/qemu/include/exec/poison.h63
-rw-r--r--contrib/qemu/include/fpu/softfloat.h641
-rw-r--r--contrib/qemu/include/glib-compat.h27
-rw-r--r--contrib/qemu/include/migration/migration.h157
-rw-r--r--contrib/qemu/include/migration/qemu-file.h266
-rw-r--r--contrib/qemu/include/migration/vmstate.h740
-rw-r--r--contrib/qemu/include/monitor/monitor.h104
-rw-r--r--contrib/qemu/include/monitor/readline.h55
-rw-r--r--contrib/qemu/include/qapi/error.h85
-rw-r--r--contrib/qemu/include/qapi/qmp/json-lexer.h51
-rw-r--r--contrib/qemu/include/qapi/qmp/json-parser.h24
-rw-r--r--contrib/qemu/include/qapi/qmp/json-streamer.h40
-rw-r--r--contrib/qemu/include/qapi/qmp/qbool.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qdict.h69
-rw-r--r--contrib/qemu/include/qapi/qmp/qerror.h249
-rw-r--r--contrib/qemu/include/qapi/qmp/qfloat.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qint.h28
-rw-r--r--contrib/qemu/include/qapi/qmp/qjson.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qlist.h63
-rw-r--r--contrib/qemu/include/qapi/qmp/qobject.h112
-rw-r--r--contrib/qemu/include/qapi/qmp/qstring.h36
-rw-r--r--contrib/qemu/include/qapi/qmp/types.h25
-rw-r--r--contrib/qemu/include/qemu-common.h478
-rw-r--r--contrib/qemu/include/qemu/aes.h45
-rw-r--r--contrib/qemu/include/qemu/atomic.h202
-rw-r--r--contrib/qemu/include/qemu/bitmap.h222
-rw-r--r--contrib/qemu/include/qemu/bitops.h276
-rw-r--r--contrib/qemu/include/qemu/bswap.h487
-rw-r--r--contrib/qemu/include/qemu/compiler.h55
-rw-r--r--contrib/qemu/include/qemu/error-report.h46
-rw-r--r--contrib/qemu/include/qemu/event_notifier.h46
-rw-r--r--contrib/qemu/include/qemu/hbitmap.h209
-rw-r--r--contrib/qemu/include/qemu/host-utils.h322
-rw-r--r--contrib/qemu/include/qemu/iov.h115
-rw-r--r--contrib/qemu/include/qemu/main-loop.h311
-rw-r--r--contrib/qemu/include/qemu/module.h40
-rw-r--r--contrib/qemu/include/qemu/notify.h72
-rw-r--r--contrib/qemu/include/qemu/option.h157
-rw-r--r--contrib/qemu/include/qemu/option_int.h54
-rw-r--r--contrib/qemu/include/qemu/osdep.h218
-rw-r--r--contrib/qemu/include/qemu/queue.h414
-rw-r--r--contrib/qemu/include/qemu/sockets.h83
-rw-r--r--contrib/qemu/include/qemu/thread-posix.h28
-rw-r--r--contrib/qemu/include/qemu/thread.h56
-rw-r--r--contrib/qemu/include/qemu/timer.h305
-rw-r--r--contrib/qemu/include/qemu/typedefs.h69
-rw-r--r--contrib/qemu/include/sysemu/os-posix.h52
-rw-r--r--contrib/qemu/include/sysemu/sysemu.h200
-rw-r--r--contrib/qemu/include/trace.h6
-rw-r--r--contrib/qemu/nop-symbols.c12
-rw-r--r--contrib/qemu/qapi-types.h2746
-rw-r--r--contrib/qemu/qemu-coroutine-lock.c178
-rw-r--r--contrib/qemu/qemu-coroutine-sleep.c39
-rw-r--r--contrib/qemu/qemu-coroutine.c135
-rw-r--r--contrib/qemu/qmp-commands.h204
-rw-r--r--contrib/qemu/qobject/json-lexer.c373
-rw-r--r--contrib/qemu/qobject/json-parser.c724
-rw-r--r--contrib/qemu/qobject/json-streamer.c122
-rw-r--r--contrib/qemu/qobject/qbool.c68
-rw-r--r--contrib/qemu/qobject/qdict.c478
-rw-r--r--contrib/qemu/qobject/qerror.c156
-rw-r--r--contrib/qemu/qobject/qfloat.c68
-rw-r--r--contrib/qemu/qobject/qint.c67
-rw-r--r--contrib/qemu/qobject/qjson.c282
-rw-r--r--contrib/qemu/qobject/qlist.c170
-rw-r--r--contrib/qemu/qobject/qstring.c149
-rw-r--r--contrib/qemu/trace/generated-tracers.h3759
-rw-r--r--contrib/qemu/util/aes.c1314
-rw-r--r--contrib/qemu/util/bitmap.c256
-rw-r--r--contrib/qemu/util/bitops.c158
-rw-r--r--contrib/qemu/util/cutils.c532
-rw-r--r--contrib/qemu/util/error.c120
-rw-r--r--contrib/qemu/util/hbitmap.c404
-rw-r--r--contrib/qemu/util/hexdump.c37
-rw-r--r--contrib/qemu/util/iov.c426
-rw-r--r--contrib/qemu/util/module.c81
-rw-r--r--contrib/qemu/util/oslib-posix.c255
-rw-r--r--contrib/qemu/util/qemu-error.c225
-rw-r--r--contrib/qemu/util/qemu-option.c1126
-rw-r--r--contrib/qemu/util/qemu-thread-posix.c327
-rw-r--r--contrib/qemu/util/unicode.c100
-rwxr-xr-xtests/basic/file-snapshot.t62
-rw-r--r--xlators/features/Makefile.am4
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am156
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c386
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c43
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c55
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c111
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c45
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1134
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
124 files changed, 2 insertions, 42231 deletions
diff --git a/configure.ac b/configure.ac
index 29e36648aac..56ec512badf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -177,8 +177,6 @@ AC_CONFIG_FILES([Makefile
xlators/encryption/rot-13/src/Makefile
xlators/encryption/crypt/Makefile
xlators/encryption/crypt/src/Makefile
- xlators/features/qemu-block/Makefile
- xlators/features/qemu-block/src/Makefile
xlators/system/Makefile
xlators/system/posix-acl/Makefile
xlators/system/posix-acl/src/Makefile
@@ -591,33 +589,6 @@ fi
AC_SUBST(FUSERMOUNT_SUBDIR)
#end FUSERMOUNT section
-# QEMU_BLOCK section
-
-AC_ARG_ENABLE([qemu-block],
- AC_HELP_STRING([--enable-qemu-block],
- [Build QEMU Block formats translator]))
-
-if test "x$enable_qemu_block" != "xno"; then
- PKG_CHECK_MODULES([GLIB], [glib-2.0],
- [HAVE_GLIB_2="yes"],
- [HAVE_GLIB_2="no"])
-fi
-
-if test "x$enable_qemu_block" = "xyes" -a "x$HAVE_GLIB_2" = "xno"; then
- echo "QEMU Block formats translator requires libglib-2.0, but missing."
- exit 1
-fi
-
-BUILD_QEMU_BLOCK=no
-if test "x${enable_qemu_block}" != "xno" -a "x${HAVE_GLIB_2}" = "xyes"; then
- BUILD_QEMU_BLOCK=yes
- AC_DEFINE(HAVE_QEMU_BLOCK, 1, [define if libglib-2.0 library found and QEMU
- Block translator enabled])
-fi
-
-
-# end QEMU_BLOCK section
-
# EPOLL section
AC_ARG_ENABLE([epoll],
AC_HELP_STRING([--disable-epoll],
@@ -1104,9 +1075,6 @@ if test "x${GF_HOST_OS}" != "xGF_LINUX_HOST_OS" ; then
fi
AC_SUBST(UMOUNTD_SUBDIR)
-# enable/disable QEMU
-AM_CONDITIONAL([ENABLE_QEMU_BLOCK], [test x$BUILD_QEMU_BLOCK = xyes])
-
# enable debug section
AC_ARG_ENABLE([debug],
@@ -1369,7 +1337,6 @@ echo "Block Device xlator : $BUILD_BD_XLATOR"
echo "glupy : $BUILD_GLUPY"
echo "Use syslog : $USE_SYSLOG"
echo "XML output : $BUILD_XML_OUTPUT"
-echo "QEMU Block formats : $BUILD_QEMU_BLOCK"
echo "Encryption xlator : $BUILD_CRYPT_XLATOR"
echo "Unit Tests : $BUILD_UNITTEST"
echo "POSIX ACLs : $BUILD_POSIX_ACLS"
diff --git a/contrib/qemu/block.c b/contrib/qemu/block.c
deleted file mode 100644
index b56024113b8..00000000000
--- a/contrib/qemu/block.c
+++ /dev/null
@@ -1,4604 +0,0 @@
-/*
- * QEMU System Emulator block driver
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "config-host.h"
-#include "qemu-common.h"
-#include "trace.h"
-#include "monitor/monitor.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-#include "qemu/module.h"
-#include "qapi/qmp/qjson.h"
-#include "sysemu/sysemu.h"
-#include "qemu/notify.h"
-#include "block/coroutine.h"
-#include "qmp-commands.h"
-#include "qemu/timer.h"
-
-#ifdef CONFIG_BSD
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <sys/queue.h>
-#ifndef __DragonFly__
-#include <sys/disk.h>
-#endif
-#endif
-
-#ifdef _WIN32
-#include <windows.h>
-#endif
-
-#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-
-typedef enum {
- BDRV_REQ_COPY_ON_READ = 0x1,
- BDRV_REQ_ZERO_WRITE = 0x2,
-} BdrvRequestFlags;
-
-static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
-static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque,
- bool is_write);
-static void coroutine_fn bdrv_co_do_rw(void *opaque);
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
-
-static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, double elapsed_time, uint64_t *wait);
-static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
- double elapsed_time, uint64_t *wait);
-static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, int64_t *wait);
-
-static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
- QTAILQ_HEAD_INITIALIZER(bdrv_states);
-
-static QLIST_HEAD(, BlockDriver) bdrv_drivers =
- QLIST_HEAD_INITIALIZER(bdrv_drivers);
-
-/* If non-zero, use only whitelisted block drivers */
-static int use_bdrv_whitelist;
-
-#ifdef _WIN32
-static int is_windows_drive_prefix(const char *filename)
-{
- return (((filename[0] >= 'a' && filename[0] <= 'z') ||
- (filename[0] >= 'A' && filename[0] <= 'Z')) &&
- filename[1] == ':');
-}
-
-int is_windows_drive(const char *filename)
-{
- if (is_windows_drive_prefix(filename) &&
- filename[2] == '\0')
- return 1;
- if (strstart(filename, "\\\\.\\", NULL) ||
- strstart(filename, "//./", NULL))
- return 1;
- return 0;
-}
-#endif
-
-/* throttling disk I/O limits */
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
- bs->io_limits_enabled = false;
-
- while (qemu_co_queue_next(&bs->throttled_reqs));
-
- if (bs->block_timer) {
- qemu_del_timer(bs->block_timer);
- qemu_free_timer(bs->block_timer);
- bs->block_timer = NULL;
- }
-
- bs->slice_start = 0;
- bs->slice_end = 0;
-}
-
-static void bdrv_block_timer(void *opaque)
-{
- BlockDriverState *bs = opaque;
-
- qemu_co_queue_next(&bs->throttled_reqs);
-}
-
-void bdrv_io_limits_enable(BlockDriverState *bs)
-{
- qemu_co_queue_init(&bs->throttled_reqs);
- bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
- bs->io_limits_enabled = true;
-}
-
-bool bdrv_io_limits_enabled(BlockDriverState *bs)
-{
- BlockIOLimit *io_limits = &bs->io_limits;
- return io_limits->bps[BLOCK_IO_LIMIT_READ]
- || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
- || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
- || io_limits->iops[BLOCK_IO_LIMIT_READ]
- || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
- || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
-}
-
-static void bdrv_io_limits_intercept(BlockDriverState *bs,
- bool is_write, int nb_sectors)
-{
- int64_t wait_time = -1;
-
- if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
- qemu_co_queue_wait(&bs->throttled_reqs);
- }
-
- /* In fact, we hope to keep each request's timing, in FIFO mode. The next
- * throttled requests will not be dequeued until the current request is
- * allowed to be serviced. So if the current request still exceeds the
- * limits, it will be inserted to the head. All requests followed it will
- * be still in throttled_reqs queue.
- */
-
- while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
- qemu_mod_timer(bs->block_timer,
- wait_time + qemu_get_clock_ns(vm_clock));
- qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
- }
-
- qemu_co_queue_next(&bs->throttled_reqs);
-}
-
-/* check if the path starts with "<protocol>:" */
-static int path_has_protocol(const char *path)
-{
- const char *p;
-
-#ifdef _WIN32
- if (is_windows_drive(path) ||
- is_windows_drive_prefix(path)) {
- return 0;
- }
- p = path + strcspn(path, ":/\\");
-#else
- p = path + strcspn(path, ":/");
-#endif
-
- return *p == ':';
-}
-
-int path_is_absolute(const char *path)
-{
-#ifdef _WIN32
- /* specific case for names like: "\\.\d:" */
- if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
- return 1;
- }
- return (*path == '/' || *path == '\\');
-#else
- return (*path == '/');
-#endif
-}
-
-/* if filename is absolute, just copy it to dest. Otherwise, build a
- path to it by considering it is relative to base_path. URL are
- supported. */
-void path_combine(char *dest, int dest_size,
- const char *base_path,
- const char *filename)
-{
- const char *p, *p1;
- int len;
-
- if (dest_size <= 0)
- return;
- if (path_is_absolute(filename)) {
- pstrcpy(dest, dest_size, filename);
- } else {
- p = strchr(base_path, ':');
- if (p)
- p++;
- else
- p = base_path;
- p1 = strrchr(base_path, '/');
-#ifdef _WIN32
- {
- const char *p2;
- p2 = strrchr(base_path, '\\');
- if (!p1 || p2 > p1)
- p1 = p2;
- }
-#endif
- if (p1)
- p1++;
- else
- p1 = base_path;
- if (p1 > p)
- p = p1;
- len = p - base_path;
- if (len > dest_size - 1)
- len = dest_size - 1;
- memcpy(dest, base_path, len);
- dest[len] = '\0';
- pstrcat(dest, dest_size, filename);
- }
-}
-
-void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
-{
- if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
- pstrcpy(dest, sz, bs->backing_file);
- } else {
- path_combine(dest, sz, bs->filename, bs->backing_file);
- }
-}
-
-void bdrv_register(BlockDriver *bdrv)
-{
- /* Block drivers without coroutine functions need emulation */
- if (!bdrv->bdrv_co_readv) {
- bdrv->bdrv_co_readv = bdrv_co_readv_em;
- bdrv->bdrv_co_writev = bdrv_co_writev_em;
-
- /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
- * the block driver lacks aio we need to emulate that too.
- */
- if (!bdrv->bdrv_aio_readv) {
- /* add AIO emulation layer */
- bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
- bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
- }
- }
-
- QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
-}
-
-/* create a new block device (by default it is empty) */
-BlockDriverState *bdrv_new(const char *device_name)
-{
- BlockDriverState *bs;
-
- bs = g_malloc0(sizeof(BlockDriverState));
- pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
- if (device_name[0] != '\0') {
- QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
- }
- bdrv_iostatus_disable(bs);
- notifier_list_init(&bs->close_notifiers);
- notifier_with_return_list_init(&bs->before_write_notifiers);
-
- return bs;
-}
-
-void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
-{
- notifier_list_add(&bs->close_notifiers, notify);
-}
-
-BlockDriver *bdrv_find_format(const char *format_name)
-{
- BlockDriver *drv1;
- QLIST_FOREACH(drv1, &bdrv_drivers, list) {
- if (!strcmp(drv1->format_name, format_name)) {
- return drv1;
- }
- }
- return NULL;
-}
-
-static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
-{
- static const char *whitelist_rw[] = {
- CONFIG_BDRV_RW_WHITELIST
- };
- static const char *whitelist_ro[] = {
- CONFIG_BDRV_RO_WHITELIST
- };
- const char **p;
-
- if (!whitelist_rw[0] && !whitelist_ro[0]) {
- return 1; /* no whitelist, anything goes */
- }
-
- for (p = whitelist_rw; *p; p++) {
- if (!strcmp(drv->format_name, *p)) {
- return 1;
- }
- }
- if (read_only) {
- for (p = whitelist_ro; *p; p++) {
- if (!strcmp(drv->format_name, *p)) {
- return 1;
- }
- }
- }
- return 0;
-}
-
-BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
- bool read_only)
-{
- BlockDriver *drv = bdrv_find_format(format_name);
- return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
-}
-
-typedef struct CreateCo {
- BlockDriver *drv;
- char *filename;
- QEMUOptionParameter *options;
- int ret;
-} CreateCo;
-
-static void coroutine_fn bdrv_create_co_entry(void *opaque)
-{
- CreateCo *cco = opaque;
- assert(cco->drv);
-
- cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
-}
-
-int bdrv_create(BlockDriver *drv, const char* filename,
- QEMUOptionParameter *options)
-{
- int ret;
-
- Coroutine *co;
- CreateCo cco = {
- .drv = drv,
- .filename = g_strdup(filename),
- .options = options,
- .ret = NOT_DONE,
- };
-
- if (!drv->bdrv_create) {
- ret = -ENOTSUP;
- goto out;
- }
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_create_co_entry(&cco);
- } else {
- co = qemu_coroutine_create(bdrv_create_co_entry);
- qemu_coroutine_enter(co, &cco);
- while (cco.ret == NOT_DONE) {
- qemu_aio_wait();
- }
- }
-
- ret = cco.ret;
-
-out:
- g_free(cco.filename);
- return ret;
-}
-
-int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
-{
- BlockDriver *drv;
-
- drv = bdrv_find_protocol(filename, true);
- if (drv == NULL) {
- return -ENOENT;
- }
-
- return bdrv_create(drv, filename, options);
-}
-
-/*
- * Create a uniquely-named empty temporary file.
- * Return 0 upon success, otherwise a negative errno value.
- */
-int get_tmp_filename(char *filename, int size)
-{
-#ifdef _WIN32
- char temp_dir[MAX_PATH];
- /* GetTempFileName requires that its output buffer (4th param)
- have length MAX_PATH or greater. */
- assert(size >= MAX_PATH);
- return (GetTempPath(MAX_PATH, temp_dir)
- && GetTempFileName(temp_dir, "qem", 0, filename)
- ? 0 : -GetLastError());
-#else
- int fd;
- const char *tmpdir;
- tmpdir = getenv("TMPDIR");
- if (!tmpdir)
- tmpdir = "/tmp";
- if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
- return -EOVERFLOW;
- }
- fd = mkstemp(filename);
- if (fd < 0) {
- return -errno;
- }
- if (close(fd) != 0) {
- unlink(filename);
- return -errno;
- }
- return 0;
-#endif
-}
-
-/*
- * Detect host devices. By convention, /dev/cdrom[N] is always
- * recognized as a host CDROM.
- */
-static BlockDriver *find_hdev_driver(const char *filename)
-{
- int score_max = 0, score;
- BlockDriver *drv = NULL, *d;
-
- QLIST_FOREACH(d, &bdrv_drivers, list) {
- if (d->bdrv_probe_device) {
- score = d->bdrv_probe_device(filename);
- if (score > score_max) {
- score_max = score;
- drv = d;
- }
- }
- }
-
- return drv;
-}
-
-BlockDriver *bdrv_find_protocol(const char *filename,
- bool allow_protocol_prefix)
-{
- BlockDriver *drv1;
- char protocol[128];
- int len;
- const char *p;
-
- /* TODO Drivers without bdrv_file_open must be specified explicitly */
-
- /*
- * XXX(hch): we really should not let host device detection
- * override an explicit protocol specification, but moving this
- * later breaks access to device names with colons in them.
- * Thanks to the brain-dead persistent naming schemes on udev-
- * based Linux systems those actually are quite common.
- */
- drv1 = find_hdev_driver(filename);
- if (drv1) {
- return drv1;
- }
-
- if (!path_has_protocol(filename) || !allow_protocol_prefix) {
- return bdrv_find_format("file");
- }
-
- p = strchr(filename, ':');
- assert(p != NULL);
- len = p - filename;
- if (len > sizeof(protocol) - 1)
- len = sizeof(protocol) - 1;
- memcpy(protocol, filename, len);
- protocol[len] = '\0';
- QLIST_FOREACH(drv1, &bdrv_drivers, list) {
- if (drv1->protocol_name &&
- !strcmp(drv1->protocol_name, protocol)) {
- return drv1;
- }
- }
- return NULL;
-}
-
-static int find_image_format(BlockDriverState *bs, const char *filename,
- BlockDriver **pdrv)
-{
- int score, score_max;
- BlockDriver *drv1, *drv;
- uint8_t buf[2048];
- int ret = 0;
-
- /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
- if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
- drv = bdrv_find_format("raw");
- if (!drv) {
- ret = -ENOENT;
- }
- *pdrv = drv;
- return ret;
- }
-
- ret = bdrv_pread(bs, 0, buf, sizeof(buf));
- if (ret < 0) {
- *pdrv = NULL;
- return ret;
- }
-
- score_max = 0;
- drv = NULL;
- QLIST_FOREACH(drv1, &bdrv_drivers, list) {
- if (drv1->bdrv_probe) {
- score = drv1->bdrv_probe(buf, ret, filename);
- if (score > score_max) {
- score_max = score;
- drv = drv1;
- }
- }
- }
- if (!drv) {
- ret = -ENOENT;
- }
- *pdrv = drv;
- return ret;
-}
-
-/**
- * Set the current 'total_sectors' value
- */
-static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
-{
- BlockDriver *drv = bs->drv;
-
- /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
- if (bs->sg)
- return 0;
-
- /* query actual device if possible, otherwise just trust the hint */
- if (drv->bdrv_getlength) {
- int64_t length = drv->bdrv_getlength(bs);
- if (length < 0) {
- return length;
- }
- hint = length >> BDRV_SECTOR_BITS;
- }
-
- bs->total_sectors = hint;
- return 0;
-}
-
-/**
- * Set open flags for a given discard mode
- *
- * Return 0 on success, -1 if the discard mode was invalid.
- */
-int bdrv_parse_discard_flags(const char *mode, int *flags)
-{
- *flags &= ~BDRV_O_UNMAP;
-
- if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
- /* do nothing */
- } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
- *flags |= BDRV_O_UNMAP;
- } else {
- return -1;
- }
-
- return 0;
-}
-
-/**
- * Set open flags for a given cache mode
- *
- * Return 0 on success, -1 if the cache mode was invalid.
- */
-int bdrv_parse_cache_flags(const char *mode, int *flags)
-{
- *flags &= ~BDRV_O_CACHE_MASK;
-
- if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
- *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
- } else if (!strcmp(mode, "directsync")) {
- *flags |= BDRV_O_NOCACHE;
- } else if (!strcmp(mode, "writeback")) {
- *flags |= BDRV_O_CACHE_WB;
- } else if (!strcmp(mode, "unsafe")) {
- *flags |= BDRV_O_CACHE_WB;
- *flags |= BDRV_O_NO_FLUSH;
- } else if (!strcmp(mode, "writethrough")) {
- /* this is the default */
- } else {
- return -1;
- }
-
- return 0;
-}
-
-/**
- * The copy-on-read flag is actually a reference count so multiple users may
- * use the feature without worrying about clobbering its previous state.
- * Copy-on-read stays enabled until all users have called to disable it.
- */
-void bdrv_enable_copy_on_read(BlockDriverState *bs)
-{
- bs->copy_on_read++;
-}
-
-void bdrv_disable_copy_on_read(BlockDriverState *bs)
-{
- assert(bs->copy_on_read > 0);
- bs->copy_on_read--;
-}
-
-static int bdrv_open_flags(BlockDriverState *bs, int flags)
-{
- int open_flags = flags | BDRV_O_CACHE_WB;
-
- /*
- * Clear flags that are internal to the block layer before opening the
- * image.
- */
- open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
- /*
- * Snapshots should be writable.
- */
- if (bs->is_temporary) {
- open_flags |= BDRV_O_RDWR;
- }
-
- return open_flags;
-}
-
-/*
- * Common part for opening disk images and files
- *
- * Removes all processed options from *options.
- */
-static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
- QDict *options, int flags, BlockDriver *drv)
-{
- int ret, open_flags;
- const char *filename;
-
- assert(drv != NULL);
- assert(bs->file == NULL);
- assert(options != NULL && bs->options != options);
-
- if (file != NULL) {
- filename = file->filename;
- } else {
- filename = qdict_get_try_str(options, "filename");
- }
-
- trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
-
- /* bdrv_open() with directly using a protocol as drv. This layer is already
- * opened, so assign it to bs (while file becomes a closed BlockDriverState)
- * and return immediately. */
- if (file != NULL && drv->bdrv_file_open) {
- bdrv_swap(file, bs);
- return 0;
- }
-
- bs->open_flags = flags;
- bs->buffer_alignment = 512;
- open_flags = bdrv_open_flags(bs, flags);
- bs->read_only = !(open_flags & BDRV_O_RDWR);
-
- if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
- return -ENOTSUP;
- }
-
- assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
- if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
- bdrv_enable_copy_on_read(bs);
- }
-
- if (filename != NULL) {
- pstrcpy(bs->filename, sizeof(bs->filename), filename);
- } else {
- bs->filename[0] = '\0';
- }
-
- bs->drv = drv;
- bs->opaque = g_malloc0(drv->instance_size);
-
- bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
-
- /* Open the image, either directly or using a protocol */
- if (drv->bdrv_file_open) {
- assert(file == NULL);
- assert(drv->bdrv_parse_filename || filename != NULL);
- ret = drv->bdrv_file_open(bs, options, open_flags);
- } else {
- if (file == NULL) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a "
- "block driver for the protocol level",
- drv->format_name);
- ret = -EINVAL;
- goto free_and_fail;
- }
- assert(file != NULL);
- bs->file = file;
- ret = drv->bdrv_open(bs, options, open_flags);
- }
-
- if (ret < 0) {
- goto free_and_fail;
- }
-
- ret = refresh_total_sectors(bs, bs->total_sectors);
- if (ret < 0) {
- goto free_and_fail;
- }
-
-#ifndef _WIN32
- if (bs->is_temporary) {
- assert(filename != NULL);
- unlink(filename);
- }
-#endif
- return 0;
-
-free_and_fail:
- bs->file = NULL;
- g_free(bs->opaque);
- bs->opaque = NULL;
- bs->drv = NULL;
- return ret;
-}
-
-/*
- * Opens a file using a protocol (file, host_device, nbd, ...)
- *
- * options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict belongs to the block layer
- * after the call (even on failure), so if the caller intends to reuse the
- * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
- */
-int bdrv_file_open(BlockDriverState **pbs, const char *filename,
- QDict *options, int flags)
-{
- BlockDriverState *bs;
- BlockDriver *drv;
- const char *drvname;
- bool allow_protocol_prefix = false;
- int ret;
-
- /* NULL means an empty set of options */
- if (options == NULL) {
- options = qdict_new();
- }
-
- bs = bdrv_new("");
- bs->options = options;
- options = qdict_clone_shallow(options);
-
- /* Fetch the file name from the options QDict if necessary */
- if (!filename) {
- filename = qdict_get_try_str(options, "filename");
- } else if (filename && !qdict_haskey(options, "filename")) {
- qdict_put(options, "filename", qstring_from_str(filename));
- allow_protocol_prefix = true;
- } else {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
- "'filename' options at the same time");
- ret = -EINVAL;
- goto fail;
- }
-
- /* Find the right block driver */
- drvname = qdict_get_try_str(options, "driver");
- if (drvname) {
- drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
- qdict_del(options, "driver");
- } else if (filename) {
- drv = bdrv_find_protocol(filename, allow_protocol_prefix);
- if (!drv) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
- }
- } else {
- qerror_report(ERROR_CLASS_GENERIC_ERROR,
- "Must specify either driver or file");
- drv = NULL;
- }
-
- if (!drv) {
- ret = -ENOENT;
- goto fail;
- }
-
- /* Parse the filename and open it */
- if (drv->bdrv_parse_filename && filename) {
- Error *local_err = NULL;
- drv->bdrv_parse_filename(filename, options, &local_err);
- if (error_is_set(&local_err)) {
- qerror_report_err(local_err);
- error_free(local_err);
- ret = -EINVAL;
- goto fail;
- }
- qdict_del(options, "filename");
- } else if (!drv->bdrv_parse_filename && !filename) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR,
- "The '%s' block driver requires a file name",
- drv->format_name);
- ret = -EINVAL;
- goto fail;
- }
-
- ret = bdrv_open_common(bs, NULL, options, flags, drv);
- if (ret < 0) {
- goto fail;
- }
-
- /* Check if any unknown options were used */
- if (qdict_size(options) != 0) {
- const QDictEntry *entry = qdict_first(options);
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
- "support the option '%s'",
- drv->format_name, entry->key);
- ret = -EINVAL;
- goto fail;
- }
- QDECREF(options);
-
- bs->growable = 1;
- *pbs = bs;
- return 0;
-
-fail:
- QDECREF(options);
- if (!bs->drv) {
- QDECREF(bs->options);
- }
- bdrv_delete(bs);
- return ret;
-}
-
-/*
- * Opens the backing file for a BlockDriverState if not yet open
- *
- * options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict is transferred to this
- * function (even on failure), so if the caller intends to reuse the dictionary,
- * it needs to use QINCREF() before calling bdrv_file_open.
- */
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
-{
- char backing_filename[PATH_MAX];
- int back_flags, ret;
- BlockDriver *back_drv = NULL;
-
- if (bs->backing_hd != NULL) {
- QDECREF(options);
- return 0;
- }
-
- /* NULL means an empty set of options */
- if (options == NULL) {
- options = qdict_new();
- }
-
- bs->open_flags &= ~BDRV_O_NO_BACKING;
- if (qdict_haskey(options, "file.filename")) {
- backing_filename[0] = '\0';
- } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
- QDECREF(options);
- return 0;
- }
-
- bs->backing_hd = bdrv_new("");
- bdrv_get_full_backing_filename(bs, backing_filename,
- sizeof(backing_filename));
-
- if (bs->backing_format[0] != '\0') {
- back_drv = bdrv_find_format(bs->backing_format);
- }
-
- /* backing files always opened read-only */
- back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
-
- ret = bdrv_open(bs->backing_hd,
- *backing_filename ? backing_filename : NULL, options,
- back_flags, back_drv);
- if (ret < 0) {
- bdrv_delete(bs->backing_hd);
- bs->backing_hd = NULL;
- bs->open_flags |= BDRV_O_NO_BACKING;
- return ret;
- }
- return 0;
-}
-
-static void extract_subqdict(QDict *src, QDict **dst, const char *start)
-{
- const QDictEntry *entry, *next;
- const char *p;
-
- *dst = qdict_new();
- entry = qdict_first(src);
-
- while (entry != NULL) {
- next = qdict_next(src, entry);
- if (strstart(entry->key, start, &p)) {
- qobject_incref(entry->value);
- qdict_put_obj(*dst, p, entry->value);
- qdict_del(src, entry->key);
- }
- entry = next;
- }
-}
-
-/*
- * Opens a disk image (raw, qcow2, vmdk, ...)
- *
- * options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict belongs to the block layer
- * after the call (even on failure), so if the caller intends to reuse the
- * dictionary, it needs to use QINCREF() before calling bdrv_open.
- */
-int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
- int flags, BlockDriver *drv)
-{
- int ret;
- /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
- char tmp_filename[PATH_MAX + 1];
- BlockDriverState *file = NULL;
- QDict *file_options = NULL;
-
- /* NULL means an empty set of options */
- if (options == NULL) {
- options = qdict_new();
- }
-
- bs->options = options;
- options = qdict_clone_shallow(options);
-
- /* For snapshot=on, create a temporary qcow2 overlay */
- if (flags & BDRV_O_SNAPSHOT) {
- BlockDriverState *bs1;
- int64_t total_size;
- BlockDriver *bdrv_qcow2;
- QEMUOptionParameter *create_options;
- char backing_filename[PATH_MAX];
-
- if (qdict_size(options) != 0) {
- error_report("Can't use snapshot=on with driver-specific options");
- ret = -EINVAL;
- goto fail;
- }
- assert(filename != NULL);
-
- /* if snapshot, we create a temporary backing file and open it
- instead of opening 'filename' directly */
-
- /* if there is a backing file, use it */
- bs1 = bdrv_new("");
- ret = bdrv_open(bs1, filename, NULL, 0, drv);
- if (ret < 0) {
- bdrv_delete(bs1);
- goto fail;
- }
- total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
-
- bdrv_delete(bs1);
-
- ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
- if (ret < 0) {
- goto fail;
- }
-
- /* Real path is meaningless for protocols */
- if (path_has_protocol(filename)) {
- snprintf(backing_filename, sizeof(backing_filename),
- "%s", filename);
- } else if (!realpath(filename, backing_filename)) {
- ret = -errno;
- goto fail;
- }
-
- bdrv_qcow2 = bdrv_find_format("qcow2");
- create_options = parse_option_parameters("", bdrv_qcow2->create_options,
- NULL);
-
- set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
- set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
- backing_filename);
- if (drv) {
- set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
- drv->format_name);
- }
-
- ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
- free_option_parameters(create_options);
- if (ret < 0) {
- goto fail;
- }
-
- filename = tmp_filename;
- drv = bdrv_qcow2;
- bs->is_temporary = 1;
- }
-
- /* Open image file without format layer */
- if (flags & BDRV_O_RDWR) {
- flags |= BDRV_O_ALLOW_RDWR;
- }
-
- extract_subqdict(options, &file_options, "file.");
-
- ret = bdrv_file_open(&file, filename, file_options,
- bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
- if (ret < 0) {
- goto fail;
- }
-
- /* Find the right image format driver */
- if (!drv) {
- ret = find_image_format(file, filename, &drv);
- }
-
- if (!drv) {
- goto unlink_and_fail;
- }
-
- /* Open the image */
- ret = bdrv_open_common(bs, file, options, flags, drv);
- if (ret < 0) {
- goto unlink_and_fail;
- }
-
- if (bs->file != file) {
- bdrv_delete(file);
- file = NULL;
- }
-
- /* If there is a backing file, use it */
- if ((flags & BDRV_O_NO_BACKING) == 0) {
- QDict *backing_options;
-
- extract_subqdict(options, &backing_options, "backing.");
- ret = bdrv_open_backing_file(bs, backing_options);
- if (ret < 0) {
- goto close_and_fail;
- }
- }
-
- /* Check if any unknown options were used */
- if (qdict_size(options) != 0) {
- const QDictEntry *entry = qdict_first(options);
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by "
- "device '%s' doesn't support the option '%s'",
- drv->format_name, bs->device_name, entry->key);
-
- ret = -EINVAL;
- goto close_and_fail;
- }
- QDECREF(options);
-
- if (!bdrv_key_required(bs)) {
- bdrv_dev_change_media_cb(bs, true);
- }
-
- /* throttling disk I/O limits */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_enable(bs);
- }
-
- return 0;
-
-unlink_and_fail:
- if (file != NULL) {
- bdrv_delete(file);
- }
- if (bs->is_temporary) {
- unlink(filename);
- }
-fail:
- QDECREF(bs->options);
- QDECREF(options);
- bs->options = NULL;
- return ret;
-
-close_and_fail:
- bdrv_close(bs);
- QDECREF(options);
- return ret;
-}
-
-typedef struct BlockReopenQueueEntry {
- bool prepared;
- BDRVReopenState state;
- QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
-} BlockReopenQueueEntry;
-
-/*
- * Adds a BlockDriverState to a simple queue for an atomic, transactional
- * reopen of multiple devices.
- *
- * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
- * already performed, or alternatively may be NULL a new BlockReopenQueue will
- * be created and initialized. This newly created BlockReopenQueue should be
- * passed back in for subsequent calls that are intended to be of the same
- * atomic 'set'.
- *
- * bs is the BlockDriverState to add to the reopen queue.
- *
- * flags contains the open flags for the associated bs
- *
- * returns a pointer to bs_queue, which is either the newly allocated
- * bs_queue, or the existing bs_queue being used.
- *
- */
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, int flags)
-{
- assert(bs != NULL);
-
- BlockReopenQueueEntry *bs_entry;
- if (bs_queue == NULL) {
- bs_queue = g_new0(BlockReopenQueue, 1);
- QSIMPLEQ_INIT(bs_queue);
- }
-
- if (bs->file) {
- bdrv_reopen_queue(bs_queue, bs->file, flags);
- }
-
- bs_entry = g_new0(BlockReopenQueueEntry, 1);
- QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
-
- bs_entry->state.bs = bs;
- bs_entry->state.flags = flags;
-
- return bs_queue;
-}
-
-/*
- * Reopen multiple BlockDriverStates atomically & transactionally.
- *
- * The queue passed in (bs_queue) must have been built up previous
- * via bdrv_reopen_queue().
- *
- * Reopens all BDS specified in the queue, with the appropriate
- * flags. All devices are prepared for reopen, and failure of any
- * device will cause all device changes to be abandonded, and intermediate
- * data cleaned up.
- *
- * If all devices prepare successfully, then the changes are committed
- * to all devices.
- *
- */
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
-{
- int ret = -1;
- BlockReopenQueueEntry *bs_entry, *next;
- Error *local_err = NULL;
-
- assert(bs_queue != NULL);
-
- bdrv_drain_all();
-
- QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
- if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
- error_propagate(errp, local_err);
- goto cleanup;
- }
- bs_entry->prepared = true;
- }
-
- /* If we reach this point, we have success and just need to apply the
- * changes
- */
- QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
- bdrv_reopen_commit(&bs_entry->state);
- }
-
- ret = 0;
-
-cleanup:
- QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
- if (ret && bs_entry->prepared) {
- bdrv_reopen_abort(&bs_entry->state);
- }
- g_free(bs_entry);
- }
- g_free(bs_queue);
- return ret;
-}
-
-
-/* Reopen a single BlockDriverState with the specified flags. */
-int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
-{
- int ret = -1;
- Error *local_err = NULL;
- BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
-
- ret = bdrv_reopen_multiple(queue, &local_err);
- if (local_err != NULL) {
- error_propagate(errp, local_err);
- }
- return ret;
-}
-
-
-/*
- * Prepares a BlockDriverState for reopen. All changes are staged in the
- * 'opaque' field of the BDRVReopenState, which is used and allocated by
- * the block driver layer .bdrv_reopen_prepare()
- *
- * bs is the BlockDriverState to reopen
- * flags are the new open flags
- * queue is the reopen queue
- *
- * Returns 0 on success, non-zero on error. On error errp will be set
- * as well.
- *
- * On failure, bdrv_reopen_abort() will be called to clean up any data.
- * It is the responsibility of the caller to then call the abort() or
- * commit() for any other BDS that have been left in a prepare() state
- *
- */
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
- Error **errp)
-{
- int ret = -1;
- Error *local_err = NULL;
- BlockDriver *drv;
-
- assert(reopen_state != NULL);
- assert(reopen_state->bs->drv != NULL);
- drv = reopen_state->bs->drv;
-
- /* if we are to stay read-only, do not allow permission change
- * to r/w */
- if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
- reopen_state->flags & BDRV_O_RDWR) {
- error_set(errp, QERR_DEVICE_IS_READ_ONLY,
- reopen_state->bs->device_name);
- goto error;
- }
-
-
- ret = bdrv_flush(reopen_state->bs);
- if (ret) {
- error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
- strerror(-ret));
- goto error;
- }
-
- if (drv->bdrv_reopen_prepare) {
- ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
- if (ret) {
- if (local_err != NULL) {
- error_propagate(errp, local_err);
- } else {
- error_setg(errp, "failed while preparing to reopen image '%s'",
- reopen_state->bs->filename);
- }
- goto error;
- }
- } else {
- /* It is currently mandatory to have a bdrv_reopen_prepare()
- * handler for each supported drv. */
- error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- drv->format_name, reopen_state->bs->device_name,
- "reopening of file");
- ret = -1;
- goto error;
- }
-
- ret = 0;
-
-error:
- return ret;
-}
-
-/*
- * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
- * makes them final by swapping the staging BlockDriverState contents into
- * the active BlockDriverState contents.
- */
-void bdrv_reopen_commit(BDRVReopenState *reopen_state)
-{
- BlockDriver *drv;
-
- assert(reopen_state != NULL);
- drv = reopen_state->bs->drv;
- assert(drv != NULL);
-
- /* If there are any driver level actions to take */
- if (drv->bdrv_reopen_commit) {
- drv->bdrv_reopen_commit(reopen_state);
- }
-
- /* set BDS specific flags now */
- reopen_state->bs->open_flags = reopen_state->flags;
- reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
- BDRV_O_CACHE_WB);
- reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
-}
-
-/*
- * Abort the reopen, and delete and free the staged changes in
- * reopen_state
- */
-void bdrv_reopen_abort(BDRVReopenState *reopen_state)
-{
- BlockDriver *drv;
-
- assert(reopen_state != NULL);
- drv = reopen_state->bs->drv;
- assert(drv != NULL);
-
- if (drv->bdrv_reopen_abort) {
- drv->bdrv_reopen_abort(reopen_state);
- }
-}
-
-
-void bdrv_close(BlockDriverState *bs)
-{
- if (bs->job) {
- block_job_cancel_sync(bs->job);
- }
- bdrv_drain_all(); /* complete I/O */
- bdrv_flush(bs);
- bdrv_drain_all(); /* in case flush left pending I/O */
- notifier_list_notify(&bs->close_notifiers, bs);
-
- if (bs->drv) {
- if (bs->backing_hd) {
- bdrv_delete(bs->backing_hd);
- bs->backing_hd = NULL;
- }
- bs->drv->bdrv_close(bs);
- g_free(bs->opaque);
-#ifdef _WIN32
- if (bs->is_temporary) {
- unlink(bs->filename);
- }
-#endif
- bs->opaque = NULL;
- bs->drv = NULL;
- bs->copy_on_read = 0;
- bs->backing_file[0] = '\0';
- bs->backing_format[0] = '\0';
- bs->total_sectors = 0;
- bs->encrypted = 0;
- bs->valid_key = 0;
- bs->sg = 0;
- bs->growable = 0;
- QDECREF(bs->options);
- bs->options = NULL;
-
- if (bs->file != NULL) {
- bdrv_delete(bs->file);
- bs->file = NULL;
- }
- }
-
- bdrv_dev_change_media_cb(bs, false);
-
- /*throttling disk I/O limits*/
- if (bs->io_limits_enabled) {
- bdrv_io_limits_disable(bs);
- }
-}
-
-void bdrv_close_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- bdrv_close(bs);
- }
-}
-
-/*
- * Wait for pending requests to complete across all BlockDriverStates
- *
- * This function does not flush data to disk, use bdrv_flush_all() for that
- * after calling this function.
- *
- * Note that completion of an asynchronous I/O operation can trigger any
- * number of other I/O operations on other devices---for example a coroutine
- * can be arbitrarily complex and a constant flow of I/O can come until the
- * coroutine is complete. Because of this, it is not possible to have a
- * function to drain a single device's I/O queue.
- */
-void bdrv_drain_all(void)
-{
- BlockDriverState *bs;
- bool busy;
-
- do {
- busy = qemu_aio_wait();
-
- /* FIXME: We do not have timer support here, so this is effectively
- * a busy wait.
- */
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
- qemu_co_queue_restart_all(&bs->throttled_reqs);
- busy = true;
- }
- }
- } while (busy);
-
- /* If requests are still pending there is a bug somewhere */
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- assert(QLIST_EMPTY(&bs->tracked_requests));
- assert(qemu_co_queue_empty(&bs->throttled_reqs));
- }
-}
-
-/* make a BlockDriverState anonymous by removing from bdrv_state list.
- Also, NULL terminate the device_name to prevent double remove */
-void bdrv_make_anon(BlockDriverState *bs)
-{
- if (bs->device_name[0] != '\0') {
- QTAILQ_REMOVE(&bdrv_states, bs, list);
- }
- bs->device_name[0] = '\0';
-}
-
-static void bdrv_rebind(BlockDriverState *bs)
-{
- if (bs->drv && bs->drv->bdrv_rebind) {
- bs->drv->bdrv_rebind(bs);
- }
-}
-
-static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
- BlockDriverState *bs_src)
-{
- /* move some fields that need to stay attached to the device */
- bs_dest->open_flags = bs_src->open_flags;
-
- /* dev info */
- bs_dest->dev_ops = bs_src->dev_ops;
- bs_dest->dev_opaque = bs_src->dev_opaque;
- bs_dest->dev = bs_src->dev;
- bs_dest->buffer_alignment = bs_src->buffer_alignment;
- bs_dest->copy_on_read = bs_src->copy_on_read;
-
- bs_dest->enable_write_cache = bs_src->enable_write_cache;
-
- /* i/o timing parameters */
- bs_dest->slice_start = bs_src->slice_start;
- bs_dest->slice_end = bs_src->slice_end;
- bs_dest->slice_submitted = bs_src->slice_submitted;
- bs_dest->io_limits = bs_src->io_limits;
- bs_dest->throttled_reqs = bs_src->throttled_reqs;
- bs_dest->block_timer = bs_src->block_timer;
- bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
-
- /* r/w error */
- bs_dest->on_read_error = bs_src->on_read_error;
- bs_dest->on_write_error = bs_src->on_write_error;
-
- /* i/o status */
- bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
- bs_dest->iostatus = bs_src->iostatus;
-
- /* dirty bitmap */
- bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
-
- /* job */
- bs_dest->in_use = bs_src->in_use;
- bs_dest->job = bs_src->job;
-
- /* keep the same entry in bdrv_states */
- pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
- bs_src->device_name);
- bs_dest->list = bs_src->list;
-}
-
-/*
- * Swap bs contents for two image chains while they are live,
- * while keeping required fields on the BlockDriverState that is
- * actually attached to a device.
- *
- * This will modify the BlockDriverState fields, and swap contents
- * between bs_new and bs_old. Both bs_new and bs_old are modified.
- *
- * bs_new is required to be anonymous.
- *
- * This function does not create any image files.
- */
-void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
-{
- BlockDriverState tmp;
-
- /* bs_new must be anonymous and shouldn't have anything fancy enabled */
- assert(bs_new->device_name[0] == '\0');
- assert(bs_new->dirty_bitmap == NULL);
- assert(bs_new->job == NULL);
- assert(bs_new->dev == NULL);
- assert(bs_new->in_use == 0);
- assert(bs_new->io_limits_enabled == false);
- assert(bs_new->block_timer == NULL);
-
- tmp = *bs_new;
- *bs_new = *bs_old;
- *bs_old = tmp;
-
- /* there are some fields that should not be swapped, move them back */
- bdrv_move_feature_fields(&tmp, bs_old);
- bdrv_move_feature_fields(bs_old, bs_new);
- bdrv_move_feature_fields(bs_new, &tmp);
-
- /* bs_new shouldn't be in bdrv_states even after the swap! */
- assert(bs_new->device_name[0] == '\0');
-
- /* Check a few fields that should remain attached to the device */
- assert(bs_new->dev == NULL);
- assert(bs_new->job == NULL);
- assert(bs_new->in_use == 0);
- assert(bs_new->io_limits_enabled == false);
- assert(bs_new->block_timer == NULL);
-
- bdrv_rebind(bs_new);
- bdrv_rebind(bs_old);
-}
-
-/*
- * Add new bs contents at the top of an image chain while the chain is
- * live, while keeping required fields on the top layer.
- *
- * This will modify the BlockDriverState fields, and swap contents
- * between bs_new and bs_top. Both bs_new and bs_top are modified.
- *
- * bs_new is required to be anonymous.
- *
- * This function does not create any image files.
- */
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
-{
- bdrv_swap(bs_new, bs_top);
-
- /* The contents of 'tmp' will become bs_top, as we are
- * swapping bs_new and bs_top contents. */
- bs_top->backing_hd = bs_new;
- bs_top->open_flags &= ~BDRV_O_NO_BACKING;
- pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
- bs_new->filename);
- pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
- bs_new->drv ? bs_new->drv->format_name : "");
-}
-
-void bdrv_delete(BlockDriverState *bs)
-{
- assert(!bs->dev);
- assert(!bs->job);
- assert(!bs->in_use);
-
- /* remove from list, if necessary */
- bdrv_make_anon(bs);
-
- bdrv_close(bs);
-
- g_free(bs);
-}
-
-int bdrv_attach_dev(BlockDriverState *bs, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
- if (bs->dev) {
- return -EBUSY;
- }
- bs->dev = dev;
- bdrv_iostatus_reset(bs);
- return 0;
-}
-
-/* TODO qdevified devices don't use this, remove when devices are qdevified */
-void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
-{
- if (bdrv_attach_dev(bs, dev) < 0) {
- abort();
- }
-}
-
-void bdrv_detach_dev(BlockDriverState *bs, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
- assert(bs->dev == dev);
- bs->dev = NULL;
- bs->dev_ops = NULL;
- bs->dev_opaque = NULL;
- bs->buffer_alignment = 512;
-}
-
-/* TODO change to return DeviceState * when all users are qdevified */
-void *bdrv_get_attached_dev(BlockDriverState *bs)
-{
- return bs->dev;
-}
-
-void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
- void *opaque)
-{
- bs->dev_ops = ops;
- bs->dev_opaque = opaque;
-}
-
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
- enum MonitorEvent ev,
- BlockErrorAction action, bool is_read)
-{
- QObject *data;
- const char *action_str;
-
- switch (action) {
- case BDRV_ACTION_REPORT:
- action_str = "report";
- break;
- case BDRV_ACTION_IGNORE:
- action_str = "ignore";
- break;
- case BDRV_ACTION_STOP:
- action_str = "stop";
- break;
- default:
- abort();
- }
-
- data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
- bdrv->device_name,
- action_str,
- is_read ? "read" : "write");
- monitor_protocol_event(ev, data);
-
- qobject_decref(data);
-}
-
-static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
-{
- QObject *data;
-
- data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
- bdrv_get_device_name(bs), ejected);
- monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
-
- qobject_decref(data);
-}
-
-static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
-{
- if (bs->dev_ops && bs->dev_ops->change_media_cb) {
- bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
- bs->dev_ops->change_media_cb(bs->dev_opaque, load);
- if (tray_was_closed) {
- /* tray open */
- bdrv_emit_qmp_eject_event(bs, true);
- }
- if (load) {
- /* tray close */
- bdrv_emit_qmp_eject_event(bs, false);
- }
- }
-}
-
-bool bdrv_dev_has_removable_media(BlockDriverState *bs)
-{
- return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
-}
-
-void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
-{
- if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
- bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
- }
-}
-
-bool bdrv_dev_is_tray_open(BlockDriverState *bs)
-{
- if (bs->dev_ops && bs->dev_ops->is_tray_open) {
- return bs->dev_ops->is_tray_open(bs->dev_opaque);
- }
- return false;
-}
-
-static void bdrv_dev_resize_cb(BlockDriverState *bs)
-{
- if (bs->dev_ops && bs->dev_ops->resize_cb) {
- bs->dev_ops->resize_cb(bs->dev_opaque);
- }
-}
-
-bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
-{
- if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
- return bs->dev_ops->is_medium_locked(bs->dev_opaque);
- }
- return false;
-}
-
-/*
- * Run consistency checks on an image
- *
- * Returns 0 if the check could be completed (it doesn't mean that the image is
- * free of errors) or -errno when an internal error occurred. The results of the
- * check are stored in res.
- */
-int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
-{
- if (bs->drv->bdrv_check == NULL) {
- return -ENOTSUP;
- }
-
- memset(res, 0, sizeof(*res));
- return bs->drv->bdrv_check(bs, res, fix);
-}
-
-#define COMMIT_BUF_SECTORS 2048
-
-/* commit COW file into the raw image */
-int bdrv_commit(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- int64_t sector, total_sectors;
- int n, ro, open_flags;
- int ret = 0;
- uint8_t *buf;
- char filename[PATH_MAX];
-
- if (!drv)
- return -ENOMEDIUM;
-
- if (!bs->backing_hd) {
- return -ENOTSUP;
- }
-
- if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
- return -EBUSY;
- }
-
- ro = bs->backing_hd->read_only;
- /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
- pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
- open_flags = bs->backing_hd->open_flags;
-
- if (ro) {
- if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
- return -EACCES;
- }
- }
-
- total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
- buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
-
- for (sector = 0; sector < total_sectors; sector += n) {
- if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
-
- if (bdrv_read(bs, sector, buf, n) != 0) {
- ret = -EIO;
- goto ro_cleanup;
- }
-
- if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
- ret = -EIO;
- goto ro_cleanup;
- }
- }
- }
-
- if (drv->bdrv_make_empty) {
- ret = drv->bdrv_make_empty(bs);
- bdrv_flush(bs);
- }
-
- /*
- * Make sure all data we wrote to the backing device is actually
- * stable on disk.
- */
- if (bs->backing_hd)
- bdrv_flush(bs->backing_hd);
-
-ro_cleanup:
- g_free(buf);
-
- if (ro) {
- /* ignoring error return here */
- bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
- }
-
- return ret;
-}
-
-int bdrv_commit_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- if (bs->drv && bs->backing_hd) {
- int ret = bdrv_commit(bs);
- if (ret < 0) {
- return ret;
- }
- }
- }
- return 0;
-}
-
-/**
- * Remove an active request from the tracked requests list
- *
- * This function should be called when a tracked request is completing.
- */
-static void tracked_request_end(BdrvTrackedRequest *req)
-{
- QLIST_REMOVE(req, list);
- qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-/**
- * Add an active request to the tracked requests list
- */
-static void tracked_request_begin(BdrvTrackedRequest *req,
- BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, bool is_write)
-{
- *req = (BdrvTrackedRequest){
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .is_write = is_write,
- .co = qemu_coroutine_self(),
- };
-
- qemu_co_queue_init(&req->wait_queue);
-
- QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-}
-
-/**
- * Round a region to cluster boundaries
- */
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
-{
- BlockDriverInfo bdi;
-
- if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
- *cluster_sector_num = sector_num;
- *cluster_nb_sectors = nb_sectors;
- } else {
- int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
- *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
- *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
- nb_sectors, c);
- }
-}
-
-static bool tracked_request_overlaps(BdrvTrackedRequest *req,
- int64_t sector_num, int nb_sectors) {
- /* aaaa bbbb */
- if (sector_num >= req->sector_num + req->nb_sectors) {
- return false;
- }
- /* bbbb aaaa */
- if (req->sector_num >= sector_num + nb_sectors) {
- return false;
- }
- return true;
-}
-
-static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- BdrvTrackedRequest *req;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
- bool retry;
-
- /* If we touch the same cluster it counts as an overlap. This guarantees
- * that allocating writes will be serialized and not race with each other
- * for the same cluster. For example, in copy-on-read it ensures that the
- * CoR read and write operations are atomic and guest writes cannot
- * interleave between them.
- */
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
-
- do {
- retry = false;
- QLIST_FOREACH(req, &bs->tracked_requests, list) {
- if (tracked_request_overlaps(req, cluster_sector_num,
- cluster_nb_sectors)) {
- /* Hitting this means there was a reentrant request, for
- * example, a block driver issuing nested requests. This must
- * never happen since it means deadlock.
- */
- assert(qemu_coroutine_self() != req->co);
-
- qemu_co_queue_wait(&req->wait_queue);
- retry = true;
- break;
- }
- }
- } while (retry);
-}
-
-/*
- * Return values:
- * 0 - success
- * -EINVAL - backing format specified, but no file
- * -ENOSPC - can't update the backing file because no space is left in the
- * image file header
- * -ENOTSUP - format driver doesn't support changing the backing file
- */
-int bdrv_change_backing_file(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt)
-{
- BlockDriver *drv = bs->drv;
- int ret;
-
- /* Backing file format doesn't make sense without a backing file */
- if (backing_fmt && !backing_file) {
- return -EINVAL;
- }
-
- if (drv->bdrv_change_backing_file != NULL) {
- ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
- } else {
- ret = -ENOTSUP;
- }
-
- if (ret == 0) {
- pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
- pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
- }
- return ret;
-}
-
-/*
- * Finds the image layer in the chain that has 'bs' as its backing file.
- *
- * active is the current topmost image.
- *
- * Returns NULL if bs is not found in active's image chain,
- * or if active == bs.
- */
-BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
- BlockDriverState *bs)
-{
- BlockDriverState *overlay = NULL;
- BlockDriverState *intermediate;
-
- assert(active != NULL);
- assert(bs != NULL);
-
- /* if bs is the same as active, then by definition it has no overlay
- */
- if (active == bs) {
- return NULL;
- }
-
- intermediate = active;
- while (intermediate->backing_hd) {
- if (intermediate->backing_hd == bs) {
- overlay = intermediate;
- break;
- }
- intermediate = intermediate->backing_hd;
- }
-
- return overlay;
-}
-
-typedef struct BlkIntermediateStates {
- BlockDriverState *bs;
- QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
-} BlkIntermediateStates;
-
-
-/*
- * Drops images above 'base' up to and including 'top', and sets the image
- * above 'top' to have base as its backing file.
- *
- * Requires that the overlay to 'top' is opened r/w, so that the backing file
- * information in 'bs' can be properly updated.
- *
- * E.g., this will convert the following chain:
- * bottom <- base <- intermediate <- top <- active
- *
- * to
- *
- * bottom <- base <- active
- *
- * It is allowed for bottom==base, in which case it converts:
- *
- * base <- intermediate <- top <- active
- *
- * to
- *
- * base <- active
- *
- * Error conditions:
- * if active == top, that is considered an error
- *
- */
-int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
- BlockDriverState *base)
-{
- BlockDriverState *intermediate;
- BlockDriverState *base_bs = NULL;
- BlockDriverState *new_top_bs = NULL;
- BlkIntermediateStates *intermediate_state, *next;
- int ret = -EIO;
-
- QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
- QSIMPLEQ_INIT(&states_to_delete);
-
- if (!top->drv || !base->drv) {
- goto exit;
- }
-
- new_top_bs = bdrv_find_overlay(active, top);
-
- if (new_top_bs == NULL) {
- /* we could not find the image above 'top', this is an error */
- goto exit;
- }
-
- /* special case of new_top_bs->backing_hd already pointing to base - nothing
- * to do, no intermediate images */
- if (new_top_bs->backing_hd == base) {
- ret = 0;
- goto exit;
- }
-
- intermediate = top;
-
- /* now we will go down through the list, and add each BDS we find
- * into our deletion queue, until we hit the 'base'
- */
- while (intermediate) {
- intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
- intermediate_state->bs = intermediate;
- QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
-
- if (intermediate->backing_hd == base) {
- base_bs = intermediate->backing_hd;
- break;
- }
- intermediate = intermediate->backing_hd;
- }
- if (base_bs == NULL) {
- /* something went wrong, we did not end at the base. safely
- * unravel everything, and exit with error */
- goto exit;
- }
-
- /* success - we can delete the intermediate states, and link top->base */
- ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
- base_bs->drv ? base_bs->drv->format_name : "");
- if (ret) {
- goto exit;
- }
- new_top_bs->backing_hd = base_bs;
-
-
- QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
- /* so that bdrv_close() does not recursively close the chain */
- intermediate_state->bs->backing_hd = NULL;
- bdrv_delete(intermediate_state->bs);
- }
- ret = 0;
-
-exit:
- QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
- g_free(intermediate_state);
- }
- return ret;
-}
-
-
-static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
- size_t size)
-{
- int64_t len;
-
- if (!bdrv_is_inserted(bs))
- return -ENOMEDIUM;
-
- if (bs->growable)
- return 0;
-
- len = bdrv_getlength(bs);
-
- if (offset < 0)
- return -EIO;
-
- if ((offset > len) || (len - offset < size))
- return -EIO;
-
- return 0;
-}
-
-static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
-{
- return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE);
-}
-
-typedef struct RwCo {
- BlockDriverState *bs;
- int64_t sector_num;
- int nb_sectors;
- QEMUIOVector *qiov;
- bool is_write;
- int ret;
-} RwCo;
-
-static void coroutine_fn bdrv_rw_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- if (!rwco->is_write) {
- rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
- } else {
- rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov, 0);
- }
-}
-
-/*
- * Process a vectored synchronous request using coroutines
- */
-static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, bool is_write)
-{
- Coroutine *co;
- RwCo rwco = {
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
- .qiov = qiov,
- .is_write = is_write,
- .ret = NOT_DONE,
- };
- assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
-
- /**
- * In sync call context, when the vcpu is blocked, this throttling timer
- * will not fire; so the I/O throttling function has to be disabled here
- * if it has been enabled.
- */
- if (bs->io_limits_enabled) {
- fprintf(stderr, "Disabling I/O throttling on '%s' due "
- "to synchronous I/O.\n", bdrv_get_device_name(bs));
- bdrv_io_limits_disable(bs);
- }
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_rw_co_entry(&rwco);
- } else {
- co = qemu_coroutine_create(bdrv_rw_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
- }
- }
- return rwco.ret;
-}
-
-/*
- * Process a synchronous request using coroutines
- */
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors, bool is_write)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *)buf,
- .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
- };
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
-}
-
-/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
-}
-
-/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- bool enabled;
- int ret;
-
- enabled = bs->io_limits_enabled;
- bs->io_limits_enabled = false;
- ret = bdrv_read(bs, 0, buf, 1);
- bs->io_limits_enabled = enabled;
- return ret;
-}
-
-/* Return < 0 if error. Important errors are:
- -EIO generic I/O error (may happen for all errors)
- -ENOMEDIUM No media inserted.
- -EINVAL Invalid sector number or nb_sectors
- -EACCES Trying to write a read-only device
-*/
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
-}
-
-int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
-{
- return bdrv_rwv_co(bs, sector_num, qiov, true);
-}
-
-int bdrv_pread(BlockDriverState *bs, int64_t offset,
- void *buf, int count1)
-{
- uint8_t tmp_buf[BDRV_SECTOR_SIZE];
- int len, nb_sectors, count;
- int64_t sector_num;
- int ret;
-
- count = count1;
- /* first read to align to sector start */
- len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
- if (len > count)
- len = count;
- sector_num = offset >> BDRV_SECTOR_BITS;
- if (len > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
- count -= len;
- if (count == 0)
- return count1;
- sector_num++;
- buf += len;
- }
-
- /* read the sectors "in place" */
- nb_sectors = count >> BDRV_SECTOR_BITS;
- if (nb_sectors > 0) {
- if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
- return ret;
- sector_num += nb_sectors;
- len = nb_sectors << BDRV_SECTOR_BITS;
- buf += len;
- count -= len;
- }
-
- /* add data from the last sector */
- if (count > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- memcpy(buf, tmp_buf, count);
- }
- return count1;
-}
-
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
-{
- uint8_t tmp_buf[BDRV_SECTOR_SIZE];
- int len, nb_sectors, count;
- int64_t sector_num;
- int ret;
-
- count = qiov->size;
-
- /* first write to align to sector start */
- len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
- if (len > count)
- len = count;
- sector_num = offset >> BDRV_SECTOR_BITS;
- if (len > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
- len);
- if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- count -= len;
- if (count == 0)
- return qiov->size;
- sector_num++;
- }
-
- /* write the sectors "in place" */
- nb_sectors = count >> BDRV_SECTOR_BITS;
- if (nb_sectors > 0) {
- QEMUIOVector qiov_inplace;
-
- qemu_iovec_init(&qiov_inplace, qiov->niov);
- qemu_iovec_concat(&qiov_inplace, qiov, len,
- nb_sectors << BDRV_SECTOR_BITS);
- ret = bdrv_writev(bs, sector_num, &qiov_inplace);
- qemu_iovec_destroy(&qiov_inplace);
- if (ret < 0) {
- return ret;
- }
-
- sector_num += nb_sectors;
- len = nb_sectors << BDRV_SECTOR_BITS;
- count -= len;
- }
-
- /* add data from the last sector */
- if (count > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
- if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- }
- return qiov->size;
-}
-
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int count1)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = count1,
- };
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_pwritev(bs, offset, &qiov);
-}
-
-/*
- * Writes to the file and ensures that no writes are reordered across this
- * request (acts as a barrier)
- *
- * Returns 0 on success, -errno in error cases.
- */
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
- const void *buf, int count)
-{
- int ret;
-
- ret = bdrv_pwrite(bs, offset, buf, count);
- if (ret < 0) {
- return ret;
- }
-
- /* No flush needed for cache modes that already do it */
- if (bs->enable_write_cache) {
- bdrv_flush(bs);
- }
-
- return 0;
-}
-
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- /* Perform I/O through a temporary buffer so that users who scribble over
- * their read buffer while the operation is in progress do not end up
- * modifying the image file. This is critical for zero-copy guest I/O
- * where anything might happen inside guest memory.
- */
- void *bounce_buffer;
-
- BlockDriver *drv = bs->drv;
- struct iovec iov;
- QEMUIOVector bounce_qiov;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
- size_t skip_bytes;
- int ret;
-
- /* Cover entire cluster so no additional backing file I/O is required when
- * allocating cluster in the image file.
- */
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
-
- trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
- cluster_sector_num, cluster_nb_sectors);
-
- iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
- ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
- if (ret < 0) {
- goto err;
- }
-
- if (drv->bdrv_co_write_zeroes &&
- buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
- cluster_nb_sectors);
- } else {
- /* This does not change the data on the disk, it is not necessary
- * to flush even in cache=writethrough mode.
- */
- ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
- }
-
- if (ret < 0) {
- /* It might be okay to ignore write errors for guest requests. If this
- * is a deliberate copy-on-read then we don't want to ignore the error.
- * Simply report it in all cases.
- */
- goto err;
- }
-
- skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
- nb_sectors * BDRV_SECTOR_SIZE);
-
-err:
- qemu_vfree(bounce_buffer);
- return ret;
-}
-
-/*
- * Handle a read request in coroutine context
- */
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
-{
- BlockDriver *drv = bs->drv;
- BdrvTrackedRequest req;
- int ret;
-
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- }
-
- /* throttling disk read I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, false, nb_sectors);
- }
-
- if (bs->copy_on_read) {
- flags |= BDRV_REQ_COPY_ON_READ;
- }
- if (flags & BDRV_REQ_COPY_ON_READ) {
- bs->copy_on_read_in_flight++;
- }
-
- if (bs->copy_on_read_in_flight) {
- wait_for_overlapping_requests(bs, sector_num, nb_sectors);
- }
-
- tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
-
- if (flags & BDRV_REQ_COPY_ON_READ) {
- int pnum;
-
- ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
- if (ret < 0) {
- goto out;
- }
-
- if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
- goto out;
- }
- }
-
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
-
-out:
- tracked_request_end(&req);
-
- if (flags & BDRV_REQ_COPY_ON_READ) {
- bs->copy_on_read_in_flight--;
- }
-
- return ret;
-}
-
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_readv(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
- BDRV_REQ_COPY_ON_READ);
-}
-
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- BlockDriver *drv = bs->drv;
- QEMUIOVector qiov;
- struct iovec iov;
- int ret;
-
- /* TODO Emulate only part of misaligned requests instead of letting block
- * drivers return -ENOTSUP and emulate everything */
-
- /* First try the efficient write zeroes operation */
- if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
- if (ret != -ENOTSUP) {
- return ret;
- }
- }
-
- /* Fall back to bounce buffer if write zeroes is unsupported */
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
- memset(iov.iov_base, 0, iov.iov_len);
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
-
- qemu_vfree(iov.iov_base);
- return ret;
-}
-
-/*
- * Handle a write request in coroutine context
- */
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
-{
- BlockDriver *drv = bs->drv;
- BdrvTrackedRequest req;
- int ret;
-
- if (!bs->drv) {
- return -ENOMEDIUM;
- }
- if (bs->read_only) {
- return -EACCES;
- }
- if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- }
-
- /* throttling disk write I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, true, nb_sectors);
- }
-
- if (bs->copy_on_read_in_flight) {
- wait_for_overlapping_requests(bs, sector_num, nb_sectors);
- }
-
- tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
-
- ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
-
- if (ret < 0) {
- /* Do nothing, write notifier decided to fail this request */
- } else if (flags & BDRV_REQ_ZERO_WRITE) {
- ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
- } else {
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
- }
-
- if (ret == 0 && !bs->enable_write_cache) {
- ret = bdrv_co_flush(bs);
- }
-
- if (bs->dirty_bitmap) {
- bdrv_set_dirty(bs, sector_num, nb_sectors);
- }
-
- if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
- bs->wr_highest_sector = sector_num + nb_sectors - 1;
- }
-
- tracked_request_end(&req);
-
- return ret;
-}
-
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_writev(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE);
-}
-
-/**
- * Truncate file to 'offset' bytes (needed only for file protocols)
- */
-int bdrv_truncate(BlockDriverState *bs, int64_t offset)
-{
- BlockDriver *drv = bs->drv;
- int ret;
- if (!drv)
- return -ENOMEDIUM;
- if (!drv->bdrv_truncate)
- return -ENOTSUP;
- if (bs->read_only)
- return -EACCES;
- if (bdrv_in_use(bs))
- return -EBUSY;
- ret = drv->bdrv_truncate(bs, offset);
- if (ret == 0) {
- ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
- bdrv_dev_resize_cb(bs);
- }
- return ret;
-}
-
-/**
- * Length of a allocated file in bytes. Sparse files are counted by actual
- * allocated space. Return < 0 if error or unknown.
- */
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_get_allocated_file_size) {
- return drv->bdrv_get_allocated_file_size(bs);
- }
- if (bs->file) {
- return bdrv_get_allocated_file_size(bs->file);
- }
- return -ENOTSUP;
-}
-
-/**
- * Length of a file in bytes. Return < 0 if error or unknown.
- */
-int64_t bdrv_getlength(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
-
- if (bs->growable || bdrv_dev_has_removable_media(bs)) {
- if (drv->bdrv_getlength) {
- return drv->bdrv_getlength(bs);
- }
- }
- return bs->total_sectors * BDRV_SECTOR_SIZE;
-}
-
-/* return 0 as number of sectors if no device present or error */
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
-{
- int64_t length;
- length = bdrv_getlength(bs);
- if (length < 0)
- length = 0;
- else
- length = length >> BDRV_SECTOR_BITS;
- *nb_sectors_ptr = length;
-}
-
-/* throttling disk io limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
- BlockIOLimit *io_limits)
-{
- bs->io_limits = *io_limits;
- bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
-}
-
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error)
-{
- bs->on_read_error = on_read_error;
- bs->on_write_error = on_write_error;
-}
-
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
-{
- return is_read ? bs->on_read_error : bs->on_write_error;
-}
-
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
-{
- BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
-
- switch (on_err) {
- case BLOCKDEV_ON_ERROR_ENOSPC:
- return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
- case BLOCKDEV_ON_ERROR_STOP:
- return BDRV_ACTION_STOP;
- case BLOCKDEV_ON_ERROR_REPORT:
- return BDRV_ACTION_REPORT;
- case BLOCKDEV_ON_ERROR_IGNORE:
- return BDRV_ACTION_IGNORE;
- default:
- abort();
- }
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
- bool is_read, int error)
-{
- assert(error >= 0);
- bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
- if (action == BDRV_ACTION_STOP) {
- vm_stop(RUN_STATE_IO_ERROR);
- bdrv_iostatus_set_err(bs, error);
- }
-}
-
-int bdrv_is_read_only(BlockDriverState *bs)
-{
- return bs->read_only;
-}
-
-int bdrv_is_sg(BlockDriverState *bs)
-{
- return bs->sg;
-}
-
-int bdrv_enable_write_cache(BlockDriverState *bs)
-{
- return bs->enable_write_cache;
-}
-
-void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
-{
- bs->enable_write_cache = wce;
-
- /* so a reopen() will preserve wce */
- if (wce) {
- bs->open_flags |= BDRV_O_CACHE_WB;
- } else {
- bs->open_flags &= ~BDRV_O_CACHE_WB;
- }
-}
-
-int bdrv_is_encrypted(BlockDriverState *bs)
-{
- if (bs->backing_hd && bs->backing_hd->encrypted)
- return 1;
- return bs->encrypted;
-}
-
-int bdrv_key_required(BlockDriverState *bs)
-{
- BlockDriverState *backing_hd = bs->backing_hd;
-
- if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
- return 1;
- return (bs->encrypted && !bs->valid_key);
-}
-
-int bdrv_set_key(BlockDriverState *bs, const char *key)
-{
- int ret;
- if (bs->backing_hd && bs->backing_hd->encrypted) {
- ret = bdrv_set_key(bs->backing_hd, key);
- if (ret < 0)
- return ret;
- if (!bs->encrypted)
- return 0;
- }
- if (!bs->encrypted) {
- return -EINVAL;
- } else if (!bs->drv || !bs->drv->bdrv_set_key) {
- return -ENOMEDIUM;
- }
- ret = bs->drv->bdrv_set_key(bs, key);
- if (ret < 0) {
- bs->valid_key = 0;
- } else if (!bs->valid_key) {
- bs->valid_key = 1;
- /* call the change callback now, we skipped it on open */
- bdrv_dev_change_media_cb(bs, true);
- }
- return ret;
-}
-
-const char *bdrv_get_format_name(BlockDriverState *bs)
-{
- return bs->drv ? bs->drv->format_name : NULL;
-}
-
-void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
- void *opaque)
-{
- BlockDriver *drv;
-
- QLIST_FOREACH(drv, &bdrv_drivers, list) {
- it(opaque, drv->format_name);
- }
-}
-
-BlockDriverState *bdrv_find(const char *name)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- if (!strcmp(name, bs->device_name)) {
- return bs;
- }
- }
- return NULL;
-}
-
-BlockDriverState *bdrv_next(BlockDriverState *bs)
-{
- if (!bs) {
- return QTAILQ_FIRST(&bdrv_states);
- }
- return QTAILQ_NEXT(bs, list);
-}
-
-void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- it(opaque, bs);
- }
-}
-
-const char *bdrv_get_device_name(BlockDriverState *bs)
-{
- return bs->device_name;
-}
-
-int bdrv_get_flags(BlockDriverState *bs)
-{
- return bs->open_flags;
-}
-
-int bdrv_flush_all(void)
-{
- BlockDriverState *bs;
- int result = 0;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- int ret = bdrv_flush(bs);
- if (ret < 0 && !result) {
- result = ret;
- }
- }
-
- return result;
-}
-
-int bdrv_has_zero_init_1(BlockDriverState *bs)
-{
- return 1;
-}
-
-int bdrv_has_zero_init(BlockDriverState *bs)
-{
- assert(bs->drv);
-
- if (bs->drv->bdrv_has_zero_init) {
- return bs->drv->bdrv_has_zero_init(bs);
- }
-
- /* safe default */
- return 0;
-}
-
-typedef struct BdrvCoIsAllocatedData {
- BlockDriverState *bs;
- BlockDriverState *base;
- int64_t sector_num;
- int nb_sectors;
- int *pnum;
- int ret;
- bool done;
-} BdrvCoIsAllocatedData;
-
-/*
- * Returns true iff the specified sector is present in the disk image. Drivers
- * not implementing the functionality are assumed to not support backing files,
- * hence all their sectors are reported as allocated.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- */
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- int64_t n;
-
- if (sector_num >= bs->total_sectors) {
- *pnum = 0;
- return 0;
- }
-
- n = bs->total_sectors - sector_num;
- if (n < nb_sectors) {
- nb_sectors = n;
- }
-
- if (!bs->drv->bdrv_co_is_allocated) {
- *pnum = nb_sectors;
- return 1;
- }
-
- return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
-}
-
-/* Coroutine wrapper for bdrv_is_allocated() */
-static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
-{
- BdrvCoIsAllocatedData *data = opaque;
- BlockDriverState *bs = data->bs;
-
- data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
- data->pnum);
- data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_is_allocated().
- *
- * See bdrv_co_is_allocated() for details.
- */
-int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum)
-{
- Coroutine *co;
- BdrvCoIsAllocatedData data = {
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .pnum = pnum,
- .done = false,
- };
-
- co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
- qemu_coroutine_enter(co, &data);
- while (!data.done) {
- qemu_aio_wait();
- }
- return data.ret;
-}
-
-/*
- * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
- *
- * Return true if the given sector is allocated in any image between
- * BASE and TOP (inclusive). BASE can be NULL to check if the given
- * sector is allocated in any image of the chain. Return false otherwise.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- */
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- BlockDriverState *intermediate;
- int ret, n = nb_sectors;
-
- intermediate = top;
- while (intermediate && intermediate != base) {
- int pnum_inter;
- ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
- &pnum_inter);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- *pnum = pnum_inter;
- return 1;
- }
-
- /*
- * [sector_num, nb_sectors] is unallocated on top but intermediate
- * might have
- *
- * [sector_num+x, nr_sectors] allocated.
- */
- if (n > pnum_inter &&
- (intermediate == top ||
- sector_num + pnum_inter < intermediate->total_sectors)) {
- n = pnum_inter;
- }
-
- intermediate = intermediate->backing_hd;
- }
-
- *pnum = n;
- return 0;
-}
-
-/* Coroutine wrapper for bdrv_is_allocated_above() */
-static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque)
-{
- BdrvCoIsAllocatedData *data = opaque;
- BlockDriverState *top = data->bs;
- BlockDriverState *base = data->base;
-
- data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num,
- data->nb_sectors, data->pnum);
- data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_is_allocated_above().
- *
- * See bdrv_co_is_allocated_above() for details.
- */
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- Coroutine *co;
- BdrvCoIsAllocatedData data = {
- .bs = top,
- .base = base,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .pnum = pnum,
- .done = false,
- };
-
- co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry);
- qemu_coroutine_enter(co, &data);
- while (!data.done) {
- qemu_aio_wait();
- }
- return data.ret;
-}
-
-const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
-{
- if (bs->backing_hd && bs->backing_hd->encrypted)
- return bs->backing_file;
- else if (bs->encrypted)
- return bs->filename;
- else
- return NULL;
-}
-
-void bdrv_get_backing_filename(BlockDriverState *bs,
- char *filename, int filename_size)
-{
- pstrcpy(filename, filename_size, bs->backing_file);
-}
-
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (!drv->bdrv_write_compressed)
- return -ENOTSUP;
- if (bdrv_check_request(bs, sector_num, nb_sectors))
- return -EIO;
-
- assert(!bs->dirty_bitmap);
-
- return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
-}
-
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (!drv->bdrv_get_info)
- return -ENOTSUP;
- memset(bdi, 0, sizeof(*bdi));
- return drv->bdrv_get_info(bs, bdi);
-}
-
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = size,
- };
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_writev_vmstate(bs, &qiov, pos);
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
- BlockDriver *drv = bs->drv;
-
- if (!drv) {
- return -ENOMEDIUM;
- } else if (drv->bdrv_save_vmstate) {
- return drv->bdrv_save_vmstate(bs, qiov, pos);
- } else if (bs->file) {
- return bdrv_writev_vmstate(bs->file, qiov, pos);
- }
-
- return -ENOTSUP;
-}
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (drv->bdrv_load_vmstate)
- return drv->bdrv_load_vmstate(bs, buf, pos, size);
- if (bs->file)
- return bdrv_load_vmstate(bs->file, buf, pos, size);
- return -ENOTSUP;
-}
-
-void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
-{
- if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
- return;
- }
-
- bs->drv->bdrv_debug_event(bs, event);
-}
-
-int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
- const char *tag)
-{
- while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
- bs = bs->file;
- }
-
- if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
- return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
- }
-
- return -ENOTSUP;
-}
-
-int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
-{
- while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
- bs = bs->file;
- }
-
- if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
- return bs->drv->bdrv_debug_resume(bs, tag);
- }
-
- return -ENOTSUP;
-}
-
-bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
-{
- while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
- bs = bs->file;
- }
-
- if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
- return bs->drv->bdrv_debug_is_suspended(bs, tag);
- }
-
- return false;
-}
-
-int bdrv_is_snapshot(BlockDriverState *bs)
-{
- return !!(bs->open_flags & BDRV_O_SNAPSHOT);
-}
-
-/* backing_file can either be relative, or absolute, or a protocol. If it is
- * relative, it must be relative to the chain. So, passing in bs->filename
- * from a BDS as backing_file should not be done, as that may be relative to
- * the CWD rather than the chain. */
-BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
- const char *backing_file)
-{
- char *filename_full = NULL;
- char *backing_file_full = NULL;
- char *filename_tmp = NULL;
- int is_protocol = 0;
- BlockDriverState *curr_bs = NULL;
- BlockDriverState *retval = NULL;
-
- if (!bs || !bs->drv || !backing_file) {
- return NULL;
- }
-
- filename_full = g_malloc(PATH_MAX);
- backing_file_full = g_malloc(PATH_MAX);
- filename_tmp = g_malloc(PATH_MAX);
-
- is_protocol = path_has_protocol(backing_file);
-
- for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
-
- /* If either of the filename paths is actually a protocol, then
- * compare unmodified paths; otherwise make paths relative */
- if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
- if (strcmp(backing_file, curr_bs->backing_file) == 0) {
- retval = curr_bs->backing_hd;
- break;
- }
- } else {
- /* If not an absolute filename path, make it relative to the current
- * image's filename path */
- path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
- backing_file);
-
- /* We are going to compare absolute pathnames */
- if (!realpath(filename_tmp, filename_full)) {
- continue;
- }
-
- /* We need to make sure the backing filename we are comparing against
- * is relative to the current image filename (or absolute) */
- path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
- curr_bs->backing_file);
-
- if (!realpath(filename_tmp, backing_file_full)) {
- continue;
- }
-
- if (strcmp(backing_file_full, filename_full) == 0) {
- retval = curr_bs->backing_hd;
- break;
- }
- }
- }
-
- g_free(filename_full);
- g_free(backing_file_full);
- g_free(filename_tmp);
- return retval;
-}
-
-int bdrv_get_backing_file_depth(BlockDriverState *bs)
-{
- if (!bs->drv) {
- return 0;
- }
-
- if (!bs->backing_hd) {
- return 0;
- }
-
- return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
-}
-
-BlockDriverState *bdrv_find_base(BlockDriverState *bs)
-{
- BlockDriverState *curr_bs = NULL;
-
- if (!bs) {
- return NULL;
- }
-
- curr_bs = bs;
-
- while (curr_bs->backing_hd) {
- curr_bs = curr_bs->backing_hd;
- }
- return curr_bs;
-}
-
-/**************************************************************/
-/* async I/Os */
-
-BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
- cb, opaque, false);
-}
-
-BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
- cb, opaque, true);
-}
-
-
-typedef struct MultiwriteCB {
- int error;
- int num_requests;
- int num_callbacks;
- struct {
- BlockDriverCompletionFunc *cb;
- void *opaque;
- QEMUIOVector *free_qiov;
- } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
- int i;
-
- for (i = 0; i < mcb->num_callbacks; i++) {
- mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
- if (mcb->callbacks[i].free_qiov) {
- qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
- }
- g_free(mcb->callbacks[i].free_qiov);
- }
-}
-
-static void multiwrite_cb(void *opaque, int ret)
-{
- MultiwriteCB *mcb = opaque;
-
- trace_multiwrite_cb(mcb, ret);
-
- if (ret < 0 && !mcb->error) {
- mcb->error = ret;
- }
-
- mcb->num_requests--;
- if (mcb->num_requests == 0) {
- multiwrite_user_cb(mcb);
- g_free(mcb);
- }
-}
-
-static int multiwrite_req_compare(const void *a, const void *b)
-{
- const BlockRequest *req1 = a, *req2 = b;
-
- /*
- * Note that we can't simply subtract req2->sector from req1->sector
- * here as that could overflow the return value.
- */
- if (req1->sector > req2->sector) {
- return 1;
- } else if (req1->sector < req2->sector) {
- return -1;
- } else {
- return 0;
- }
-}
-
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs, MultiwriteCB *mcb)
-{
- int i, outidx;
-
- // Sort requests by start sector
- qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
- // Check if adjacent requests touch the same clusters. If so, combine them,
- // filling up gaps with zero sectors.
- outidx = 0;
- for (i = 1; i < num_reqs; i++) {
- int merge = 0;
- int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
- // Handle exactly sequential writes and overlapping writes.
- if (reqs[i].sector <= oldreq_last) {
- merge = 1;
- }
-
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
- merge = 0;
- }
-
- if (merge) {
- size_t size;
- QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
- qemu_iovec_init(qiov,
- reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
- // Add the first request to the merged one. If the requests are
- // overlapping, drop the last sectors of the first request.
- size = (reqs[i].sector - reqs[outidx].sector) << 9;
- qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
- // We should need to add any zeros between the two requests
- assert (reqs[i].sector <= oldreq_last);
-
- // Add the second request
- qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
- reqs[outidx].nb_sectors = qiov->size >> 9;
- reqs[outidx].qiov = qiov;
-
- mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
- } else {
- outidx++;
- reqs[outidx].sector = reqs[i].sector;
- reqs[outidx].nb_sectors = reqs[i].nb_sectors;
- reqs[outidx].qiov = reqs[i].qiov;
- }
- }
-
- return outidx + 1;
-}
-
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
-{
- MultiwriteCB *mcb;
- int i;
-
- /* don't submit writes if we don't have a medium */
- if (bs->drv == NULL) {
- for (i = 0; i < num_reqs; i++) {
- reqs[i].error = -ENOMEDIUM;
- }
- return -1;
- }
-
- if (num_reqs == 0) {
- return 0;
- }
-
- // Create MultiwriteCB structure
- mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
- mcb->num_requests = 0;
- mcb->num_callbacks = num_reqs;
-
- for (i = 0; i < num_reqs; i++) {
- mcb->callbacks[i].cb = reqs[i].cb;
- mcb->callbacks[i].opaque = reqs[i].opaque;
- }
-
- // Check for mergable requests
- num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
-
- trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
-
- /* Run the aio requests. */
- mcb->num_requests = num_reqs;
- for (i = 0; i < num_reqs; i++) {
- bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
- reqs[i].nb_sectors, multiwrite_cb, mcb);
- }
-
- return 0;
-}
-
-void bdrv_aio_cancel(BlockDriverAIOCB *acb)
-{
- acb->aiocb_info->cancel(acb);
-}
-
-/* block I/O throttling */
-static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, double elapsed_time, uint64_t *wait)
-{
- uint64_t bps_limit = 0;
- uint64_t extension;
- double bytes_limit, bytes_base, bytes_res;
- double slice_time, wait_time;
-
- if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
- bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
- } else if (bs->io_limits.bps[is_write]) {
- bps_limit = bs->io_limits.bps[is_write];
- } else {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- slice_time = bs->slice_end - bs->slice_start;
- slice_time /= (NANOSECONDS_PER_SECOND);
- bytes_limit = bps_limit * slice_time;
- bytes_base = bs->slice_submitted.bytes[is_write];
- if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
- bytes_base += bs->slice_submitted.bytes[!is_write];
- }
-
- /* bytes_base: the bytes of data which have been read/written; and
- * it is obtained from the history statistic info.
- * bytes_res: the remaining bytes of data which need to be read/written.
- * (bytes_base + bytes_res) / bps_limit: used to calcuate
- * the total time for completing reading/writting all data.
- */
- bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
-
- if (bytes_base + bytes_res <= bytes_limit) {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- /* Calc approx time to dispatch */
- wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
-
- /* When the I/O rate at runtime exceeds the limits,
- * bs->slice_end need to be extended in order that the current statistic
- * info can be kept until the timer fire, so it is increased and tuned
- * based on the result of experiment.
- */
- extension = wait_time * NANOSECONDS_PER_SECOND;
- extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
- BLOCK_IO_SLICE_TIME;
- bs->slice_end += extension;
- if (wait) {
- *wait = wait_time * NANOSECONDS_PER_SECOND;
- }
-
- return true;
-}
-
-static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
- double elapsed_time, uint64_t *wait)
-{
- uint64_t iops_limit = 0;
- double ios_limit, ios_base;
- double slice_time, wait_time;
-
- if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
- iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
- } else if (bs->io_limits.iops[is_write]) {
- iops_limit = bs->io_limits.iops[is_write];
- } else {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- slice_time = bs->slice_end - bs->slice_start;
- slice_time /= (NANOSECONDS_PER_SECOND);
- ios_limit = iops_limit * slice_time;
- ios_base = bs->slice_submitted.ios[is_write];
- if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
- ios_base += bs->slice_submitted.ios[!is_write];
- }
-
- if (ios_base + 1 <= ios_limit) {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- /* Calc approx time to dispatch, in seconds */
- wait_time = (ios_base + 1) / iops_limit;
- if (wait_time > elapsed_time) {
- wait_time = wait_time - elapsed_time;
- } else {
- wait_time = 0;
- }
-
- /* Exceeded current slice, extend it by another slice time */
- bs->slice_end += BLOCK_IO_SLICE_TIME;
- if (wait) {
- *wait = wait_time * NANOSECONDS_PER_SECOND;
- }
-
- return true;
-}
-
-static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, int64_t *wait)
-{
- int64_t now, max_wait;
- uint64_t bps_wait = 0, iops_wait = 0;
- double elapsed_time;
- int bps_ret, iops_ret;
-
- now = qemu_get_clock_ns(vm_clock);
- if (now > bs->slice_end) {
- bs->slice_start = now;
- bs->slice_end = now + BLOCK_IO_SLICE_TIME;
- memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
- }
-
- elapsed_time = now - bs->slice_start;
- elapsed_time /= (NANOSECONDS_PER_SECOND);
-
- bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
- is_write, elapsed_time, &bps_wait);
- iops_ret = bdrv_exceed_iops_limits(bs, is_write,
- elapsed_time, &iops_wait);
- if (bps_ret || iops_ret) {
- max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
- if (wait) {
- *wait = max_wait;
- }
-
- now = qemu_get_clock_ns(vm_clock);
- if (bs->slice_end < now + max_wait) {
- bs->slice_end = now + max_wait;
- }
-
- return true;
- }
-
- if (wait) {
- *wait = 0;
- }
-
- bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
- BDRV_SECTOR_SIZE;
- bs->slice_submitted.ios[is_write]++;
-
- return false;
-}
-
-/**************************************************************/
-/* async block device emulation */
-
-typedef struct BlockDriverAIOCBSync {
- BlockDriverAIOCB common;
- QEMUBH *bh;
- int ret;
- /* vector translation state */
- QEMUIOVector *qiov;
- uint8_t *bounce;
- int is_write;
-} BlockDriverAIOCBSync;
-
-static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
-{
- BlockDriverAIOCBSync *acb =
- container_of(blockacb, BlockDriverAIOCBSync, common);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_release(acb);
-}
-
-static const AIOCBInfo bdrv_em_aiocb_info = {
- .aiocb_size = sizeof(BlockDriverAIOCBSync),
- .cancel = bdrv_aio_cancel_em,
-};
-
-static void bdrv_aio_bh_cb(void *opaque)
-{
- BlockDriverAIOCBSync *acb = opaque;
-
- if (!acb->is_write)
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
- qemu_vfree(acb->bounce);
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_release(acb);
-}
-
-static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque,
- int is_write)
-
-{
- BlockDriverAIOCBSync *acb;
-
- acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
- acb->is_write = is_write;
- acb->qiov = qiov;
- acb->bounce = qemu_blockalign(bs, qiov->size);
- acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
-
- if (is_write) {
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
- acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
- } else {
- acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
- }
-
- qemu_bh_schedule(acb->bh);
-
- return &acb->common;
-}
-
-static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-}
-
-
-typedef struct BlockDriverAIOCBCoroutine {
- BlockDriverAIOCB common;
- BlockRequest req;
- bool is_write;
- bool *done;
- QEMUBH* bh;
-} BlockDriverAIOCBCoroutine;
-
-static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
-{
- BlockDriverAIOCBCoroutine *acb =
- container_of(blockacb, BlockDriverAIOCBCoroutine, common);
- bool done = false;
-
- acb->done = &done;
- while (!done) {
- qemu_aio_wait();
- }
-}
-
-static const AIOCBInfo bdrv_em_co_aiocb_info = {
- .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
- .cancel = bdrv_aio_co_cancel_em,
-};
-
-static void bdrv_co_em_bh(void *opaque)
-{
- BlockDriverAIOCBCoroutine *acb = opaque;
-
- acb->common.cb(acb->common.opaque, acb->req.error);
-
- if (acb->done) {
- *acb->done = true;
- }
-
- qemu_bh_delete(acb->bh);
- qemu_aio_release(acb);
-}
-
-/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
-static void coroutine_fn bdrv_co_do_rw(void *opaque)
-{
- BlockDriverAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- if (!acb->is_write) {
- acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, 0);
- } else {
- acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, 0);
- }
-
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque,
- bool is_write)
-{
- Coroutine *co;
- BlockDriverAIOCBCoroutine *acb;
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
- acb->req.qiov = qiov;
- acb->is_write = is_write;
- acb->done = NULL;
-
- co = qemu_coroutine_create(bdrv_co_do_rw);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
-{
- BlockDriverAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- acb->req.error = bdrv_co_flush(bs);
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_flush(bs, opaque);
-
- Coroutine *co;
- BlockDriverAIOCBCoroutine *acb;
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->done = NULL;
-
- co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
-{
- BlockDriverAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
- acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- Coroutine *co;
- BlockDriverAIOCBCoroutine *acb;
-
- trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
- acb->done = NULL;
- co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
-}
-
-void bdrv_init(void)
-{
- module_call_init(MODULE_INIT_BLOCK);
-}
-
-void bdrv_init_with_whitelist(void)
-{
- use_bdrv_whitelist = 1;
- bdrv_init();
-}
-
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BlockDriverAIOCB *acb;
-
- acb = g_slice_alloc(aiocb_info->aiocb_size);
- acb->aiocb_info = aiocb_info;
- acb->bs = bs;
- acb->cb = cb;
- acb->opaque = opaque;
- return acb;
-}
-
-void qemu_aio_release(void *p)
-{
- BlockDriverAIOCB *acb = p;
- g_slice_free1(acb->aiocb_info->aiocb_size, acb);
-}
-
-/**************************************************************/
-/* Coroutine block device emulation */
-
-typedef struct CoroutineIOCompletion {
- Coroutine *coroutine;
- int ret;
-} CoroutineIOCompletion;
-
-static void bdrv_co_io_em_complete(void *opaque, int ret)
-{
- CoroutineIOCompletion *co = opaque;
-
- co->ret = ret;
- qemu_coroutine_enter(co->coroutine, NULL);
-}
-
-static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *iov,
- bool is_write)
-{
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
- BlockDriverAIOCB *acb;
-
- if (is_write) {
- acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- } else {
- acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- }
-
- trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
- if (!acb) {
- return -EIO;
- }
- qemu_coroutine_yield();
-
- return co.ret;
-}
-
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
-}
-
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
-}
-
-static void coroutine_fn bdrv_flush_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- rwco->ret = bdrv_co_flush(rwco->bs);
-}
-
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
-{
- int ret;
-
- if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
- return 0;
- }
-
- /* Write back cached data to the OS even with cache=unsafe */
- BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
- if (bs->drv->bdrv_co_flush_to_os) {
- ret = bs->drv->bdrv_co_flush_to_os(bs);
- if (ret < 0) {
- return ret;
- }
- }
-
- /* But don't actually force it to the disk with cache=unsafe */
- if (bs->open_flags & BDRV_O_NO_FLUSH) {
- goto flush_parent;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
- if (bs->drv->bdrv_co_flush_to_disk) {
- ret = bs->drv->bdrv_co_flush_to_disk(bs);
- } else if (bs->drv->bdrv_aio_flush) {
- BlockDriverAIOCB *acb;
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
-
- acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
- if (acb == NULL) {
- ret = -EIO;
- } else {
- qemu_coroutine_yield();
- ret = co.ret;
- }
- } else {
- /*
- * Some block drivers always operate in either writethrough or unsafe
- * mode and don't support bdrv_flush therefore. Usually qemu doesn't
- * know how the server works (because the behaviour is hardcoded or
- * depends on server-side configuration), so we can't ensure that
- * everything is safe on disk. Returning an error doesn't work because
- * that would break guests even if the server operates in writethrough
- * mode.
- *
- * Let's hope the user knows what he's doing.
- */
- ret = 0;
- }
- if (ret < 0) {
- return ret;
- }
-
- /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
- * in the case of cache=unsafe, so there are no useless flushes.
- */
-flush_parent:
- return bdrv_co_flush(bs->file);
-}
-
-void bdrv_invalidate_cache(BlockDriverState *bs)
-{
- if (bs->drv && bs->drv->bdrv_invalidate_cache) {
- bs->drv->bdrv_invalidate_cache(bs);
- }
-}
-
-void bdrv_invalidate_cache_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- bdrv_invalidate_cache(bs);
- }
-}
-
-void bdrv_clear_incoming_migration_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, list) {
- bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
- }
-}
-
-int bdrv_flush(BlockDriverState *bs)
-{
- Coroutine *co;
- RwCo rwco = {
- .bs = bs,
- .ret = NOT_DONE,
- };
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_flush_co_entry(&rwco);
- } else {
- co = qemu_coroutine_create(bdrv_flush_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
- }
- }
-
- return rwco.ret;
-}
-
-static void coroutine_fn bdrv_discard_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
-}
-
-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
-{
- if (!bs->drv) {
- return -ENOMEDIUM;
- } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- } else if (bs->read_only) {
- return -EROFS;
- }
-
- if (bs->dirty_bitmap) {
- bdrv_reset_dirty(bs, sector_num, nb_sectors);
- }
-
- /* Do nothing if disabled. */
- if (!(bs->open_flags & BDRV_O_UNMAP)) {
- return 0;
- }
-
- if (bs->drv->bdrv_co_discard) {
- return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
- } else if (bs->drv->bdrv_aio_discard) {
- BlockDriverAIOCB *acb;
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
-
- acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
- bdrv_co_io_em_complete, &co);
- if (acb == NULL) {
- return -EIO;
- } else {
- qemu_coroutine_yield();
- return co.ret;
- }
- } else {
- return 0;
- }
-}
-
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
-{
- Coroutine *co;
- RwCo rwco = {
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .ret = NOT_DONE,
- };
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_discard_co_entry(&rwco);
- } else {
- co = qemu_coroutine_create(bdrv_discard_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- qemu_aio_wait();
- }
- }
-
- return rwco.ret;
-}
-
-/**************************************************************/
-/* removable device support */
-
-/**
- * Return TRUE if the media is present
- */
-int bdrv_is_inserted(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
-
- if (!drv)
- return 0;
- if (!drv->bdrv_is_inserted)
- return 1;
- return drv->bdrv_is_inserted(bs);
-}
-
-/**
- * Return whether the media changed since the last call to this
- * function, or -ENOTSUP if we don't know. Most drivers don't know.
- */
-int bdrv_media_changed(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_media_changed) {
- return drv->bdrv_media_changed(bs);
- }
- return -ENOTSUP;
-}
-
-/**
- * If eject_flag is TRUE, eject the media. Otherwise, close the tray
- */
-void bdrv_eject(BlockDriverState *bs, bool eject_flag)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_eject) {
- drv->bdrv_eject(bs, eject_flag);
- }
-
- if (bs->device_name[0] != '\0') {
- bdrv_emit_qmp_eject_event(bs, eject_flag);
- }
-}
-
-/**
- * Lock or unlock the media (if it is locked, the user won't be able
- * to eject it manually).
- */
-void bdrv_lock_medium(BlockDriverState *bs, bool locked)
-{
- BlockDriver *drv = bs->drv;
-
- trace_bdrv_lock_medium(bs, locked);
-
- if (drv && drv->bdrv_lock_medium) {
- drv->bdrv_lock_medium(bs, locked);
- }
-}
-
-/* needed for generic scsi interface */
-
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_ioctl)
- return drv->bdrv_ioctl(bs, req, buf);
- return -ENOTSUP;
-}
-
-BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_aio_ioctl)
- return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
- return NULL;
-}
-
-void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
-{
- bs->buffer_alignment = align;
-}
-
-void *qemu_blockalign(BlockDriverState *bs, size_t size)
-{
- return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
-}
-
-/*
- * Check if all memory in this vector is sector aligned.
- */
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
-{
- int i;
-
- for (i = 0; i < qiov->niov; i++) {
- if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
- return false;
- }
- }
-
- return true;
-}
-
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
-{
- int64_t bitmap_size;
-
- assert((granularity & (granularity - 1)) == 0);
-
- if (granularity) {
- granularity >>= BDRV_SECTOR_BITS;
- assert(!bs->dirty_bitmap);
- bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
- bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
- } else {
- if (bs->dirty_bitmap) {
- hbitmap_free(bs->dirty_bitmap);
- bs->dirty_bitmap = NULL;
- }
- }
-}
-
-int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
-{
- if (bs->dirty_bitmap) {
- return hbitmap_get(bs->dirty_bitmap, sector);
- } else {
- return 0;
- }
-}
-
-void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
-{
- hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
-}
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
- int nr_sectors)
-{
- hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
- int nr_sectors)
-{
- hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
-}
-
-int64_t bdrv_get_dirty_count(BlockDriverState *bs)
-{
- if (bs->dirty_bitmap) {
- return hbitmap_count(bs->dirty_bitmap);
- } else {
- return 0;
- }
-}
-
-void bdrv_set_in_use(BlockDriverState *bs, int in_use)
-{
- assert(bs->in_use != in_use);
- bs->in_use = in_use;
-}
-
-int bdrv_in_use(BlockDriverState *bs)
-{
- return bs->in_use;
-}
-
-void bdrv_iostatus_enable(BlockDriverState *bs)
-{
- bs->iostatus_enabled = true;
- bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-}
-
-/* The I/O status is only enabled if the drive explicitly
- * enables it _and_ the VM is configured to stop on errors */
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
-{
- return (bs->iostatus_enabled &&
- (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
- bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
- bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
-}
-
-void bdrv_iostatus_disable(BlockDriverState *bs)
-{
- bs->iostatus_enabled = false;
-}
-
-void bdrv_iostatus_reset(BlockDriverState *bs)
-{
- if (bdrv_iostatus_is_enabled(bs)) {
- bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
- if (bs->job) {
- block_job_iostatus_reset(bs->job);
- }
- }
-}
-
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
-{
- assert(bdrv_iostatus_is_enabled(bs));
- if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
- bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
- BLOCK_DEVICE_IO_STATUS_FAILED;
- }
-}
-
-void
-bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
- enum BlockAcctType type)
-{
- assert(type < BDRV_MAX_IOTYPE);
-
- cookie->bytes = bytes;
- cookie->start_time_ns = get_clock();
- cookie->type = type;
-}
-
-void
-bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
-{
- assert(cookie->type < BDRV_MAX_IOTYPE);
-
- bs->nr_bytes[cookie->type] += cookie->bytes;
- bs->nr_ops[cookie->type]++;
- bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
-}
-
-void bdrv_img_create(const char *filename, const char *fmt,
- const char *base_filename, const char *base_fmt,
- char *options, uint64_t img_size, int flags,
- Error **errp, bool quiet)
-{
- QEMUOptionParameter *param = NULL, *create_options = NULL;
- QEMUOptionParameter *backing_fmt, *backing_file, *size;
- BlockDriverState *bs = NULL;
- BlockDriver *drv, *proto_drv;
- BlockDriver *backing_drv = NULL;
- int ret = 0;
-
- /* Find driver and parse its options */
- drv = bdrv_find_format(fmt);
- if (!drv) {
- error_setg(errp, "Unknown file format '%s'", fmt);
- return;
- }
-
- proto_drv = bdrv_find_protocol(filename, true);
- if (!proto_drv) {
- error_setg(errp, "Unknown protocol '%s'", filename);
- return;
- }
-
- create_options = append_option_parameters(create_options,
- drv->create_options);
- create_options = append_option_parameters(create_options,
- proto_drv->create_options);
-
- /* Create parameter list with default values */
- param = parse_option_parameters("", create_options, param);
-
- set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
-
- /* Parse -o options */
- if (options) {
- param = parse_option_parameters(options, create_options, param);
- if (param == NULL) {
- error_setg(errp, "Invalid options for file format '%s'.", fmt);
- goto out;
- }
- }
-
- if (base_filename) {
- if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
- base_filename)) {
- error_setg(errp, "Backing file not supported for file format '%s'",
- fmt);
- goto out;
- }
- }
-
- if (base_fmt) {
- if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
- error_setg(errp, "Backing file format not supported for file "
- "format '%s'", fmt);
- goto out;
- }
- }
-
- backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
- if (backing_file && backing_file->value.s) {
- if (!strcmp(filename, backing_file->value.s)) {
- error_setg(errp, "Error: Trying to create an image with the "
- "same filename as the backing file");
- goto out;
- }
- }
-
- backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
- if (backing_fmt && backing_fmt->value.s) {
- backing_drv = bdrv_find_format(backing_fmt->value.s);
- if (!backing_drv) {
- error_setg(errp, "Unknown backing file format '%s'",
- backing_fmt->value.s);
- goto out;
- }
- }
-
- // The size for the image must always be specified, with one exception:
- // If we are using a backing file, we can obtain the size from there
- size = get_option_parameter(param, BLOCK_OPT_SIZE);
- if (size && size->value.n == -1) {
- if (backing_file && backing_file->value.s) {
- uint64_t size;
- char buf[32];
- int back_flags;
-
- /* backing files always opened read-only */
- back_flags =
- flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
- bs = bdrv_new("");
-
- ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags,
- backing_drv);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "Could not open '%s'",
- backing_file->value.s);
- goto out;
- }
- bdrv_get_geometry(bs, &size);
- size *= 512;
-
- snprintf(buf, sizeof(buf), "%" PRId64, size);
- set_option_parameter(param, BLOCK_OPT_SIZE, buf);
- } else {
- error_setg(errp, "Image creation needs a size parameter");
- goto out;
- }
- }
-
- if (!quiet) {
- printf("Formatting '%s', fmt=%s ", filename, fmt);
- print_option_parameters(param);
- puts("");
- }
- ret = bdrv_create(drv, filename, param);
- if (ret < 0) {
- if (ret == -ENOTSUP) {
- error_setg(errp,"Formatting or formatting option not supported for "
- "file format '%s'", fmt);
- } else if (ret == -EFBIG) {
- const char *cluster_size_hint = "";
- if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
- cluster_size_hint = " (try using a larger cluster size)";
- }
- error_setg(errp, "The image size is too large for file format '%s'%s",
- fmt, cluster_size_hint);
- } else {
- error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
- strerror(-ret));
- }
- }
-
-out:
- free_option_parameters(create_options);
- free_option_parameters(param);
-
- if (bs) {
- bdrv_delete(bs);
- }
-}
-
-AioContext *bdrv_get_aio_context(BlockDriverState *bs)
-{
- /* Currently BlockDriverState always uses the main loop AioContext */
- return qemu_get_aio_context();
-}
-
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
- NotifierWithReturn *notifier)
-{
- notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
diff --git a/contrib/qemu/block/qcow.c b/contrib/qemu/block/qcow.c
deleted file mode 100644
index 5239bd68f1c..00000000000
--- a/contrib/qemu/block/qcow.c
+++ /dev/null
@@ -1,914 +0,0 @@
-/*
- * Block driver for the QCOW format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "qemu/aes.h"
-#include "migration/migration.h"
-
-/**************************************************************/
-/* QEMU COW block driver with compression and encryption support */
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 1
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES 1
-
-#define QCOW_OFLAG_COMPRESSED (1LL << 63)
-
-typedef struct QCowHeader {
- uint32_t magic;
- uint32_t version;
- uint64_t backing_file_offset;
- uint32_t backing_file_size;
- uint32_t mtime;
- uint64_t size; /* in bytes */
- uint8_t cluster_bits;
- uint8_t l2_bits;
- uint32_t crypt_method;
- uint64_t l1_table_offset;
-} QCowHeader;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct BDRVQcowState {
- int cluster_bits;
- int cluster_size;
- int cluster_sectors;
- int l2_bits;
- int l2_size;
- int l1_size;
- uint64_t cluster_offset_mask;
- uint64_t l1_table_offset;
- uint64_t *l1_table;
- uint64_t *l2_cache;
- uint64_t l2_cache_offsets[L2_CACHE_SIZE];
- uint32_t l2_cache_counts[L2_CACHE_SIZE];
- uint8_t *cluster_cache;
- uint8_t *cluster_data;
- uint64_t cluster_cache_offset;
- uint32_t crypt_method; /* current crypt method, 0 if no key yet */
- uint32_t crypt_method_header;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- CoMutex lock;
- Error *migration_blocker;
-} BDRVQcowState;
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-
-static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
- const QCowHeader *cow_header = (const void *)buf;
-
- if (buf_size >= sizeof(QCowHeader) &&
- be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
- be32_to_cpu(cow_header->version) == QCOW_VERSION)
- return 100;
- else
- return 0;
-}
-
-static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQcowState *s = bs->opaque;
- int len, i, shift, ret;
- QCowHeader header;
-
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
- if (ret < 0) {
- goto fail;
- }
- be32_to_cpus(&header.magic);
- be32_to_cpus(&header.version);
- be64_to_cpus(&header.backing_file_offset);
- be32_to_cpus(&header.backing_file_size);
- be32_to_cpus(&header.mtime);
- be64_to_cpus(&header.size);
- be32_to_cpus(&header.crypt_method);
- be64_to_cpus(&header.l1_table_offset);
-
- if (header.magic != QCOW_MAGIC) {
- ret = -EMEDIUMTYPE;
- goto fail;
- }
- if (header.version != QCOW_VERSION) {
- char version[64];
- snprintf(version, sizeof(version), "QCOW version %d", header.version);
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "qcow", version);
- ret = -ENOTSUP;
- goto fail;
- }
-
- if (header.size <= 1 || header.cluster_bits < 9) {
- ret = -EINVAL;
- goto fail;
- }
- if (header.crypt_method > QCOW_CRYPT_AES) {
- ret = -EINVAL;
- goto fail;
- }
- s->crypt_method_header = header.crypt_method;
- if (s->crypt_method_header) {
- bs->encrypted = 1;
- }
- s->cluster_bits = header.cluster_bits;
- s->cluster_size = 1 << s->cluster_bits;
- s->cluster_sectors = 1 << (s->cluster_bits - 9);
- s->l2_bits = header.l2_bits;
- s->l2_size = 1 << s->l2_bits;
- bs->total_sectors = header.size / 512;
- s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
-
- /* read the level 1 table */
- shift = s->cluster_bits + s->l2_bits;
- s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
-
- s->l1_table_offset = header.l1_table_offset;
- s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
-
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
-
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
- /* alloc L2 cache */
- s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
- s->cluster_cache = g_malloc(s->cluster_size);
- s->cluster_data = g_malloc(s->cluster_size);
- s->cluster_cache_offset = -1;
-
- /* read the backing file name */
- if (header.backing_file_offset != 0) {
- len = header.backing_file_size;
- if (len > 1023) {
- len = 1023;
- }
- ret = bdrv_pread(bs->file, header.backing_file_offset,
- bs->backing_file, len);
- if (ret < 0) {
- goto fail;
- }
- bs->backing_file[len] = '\0';
- }
-
- /* Disable migration when qcow images are used */
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "qcow", bs->device_name, "live migration");
- migrate_add_blocker(s->migration_blocker);
-
- qemu_co_mutex_init(&s->lock);
- return 0;
-
- fail:
- g_free(s->l1_table);
- g_free(s->l2_cache);
- g_free(s->cluster_cache);
- g_free(s->cluster_data);
- return ret;
-}
-
-
-/* We have nothing to do for QCOW reopen, stubs just return
- * success */
-static int qcow_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
- BDRVQcowState *s = bs->opaque;
- uint8_t keybuf[16];
- int len, i;
-
- memset(keybuf, 0, 16);
- len = strlen(key);
- if (len > 16)
- len = 16;
- /* XXX: we could compress the chars to 7 bits to increase
- entropy */
- for(i = 0;i < len;i++) {
- keybuf[i] = key[i];
- }
- s->crypt_method = s->crypt_method_header;
-
- if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
- return -1;
- if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
- return -1;
- return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
- algorithm for < 4 GB images. NOTE: out_buf == in_buf is
- supported */
-static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key)
-{
- union {
- uint64_t ll[2];
- uint8_t b[16];
- } ivec;
- int i;
-
- for(i = 0; i < nb_sectors; i++) {
- ivec.ll[0] = cpu_to_le64(sector_num);
- ivec.ll[1] = 0;
- AES_cbc_encrypt(in_buf, out_buf, 512, key,
- ivec.b, enc);
- sector_num++;
- in_buf += 512;
- out_buf += 512;
- }
-}
-
-/* 'allocate' is:
- *
- * 0 to not allocate.
- *
- * 1 to allocate a normal cluster (for sector indexes 'n_start' to
- * 'n_end')
- *
- * 2 to allocate a compressed cluster of size
- * 'compressed_size'. 'compressed_size' must be > 0 and <
- * cluster_size
- *
- * return 0 if not allocated.
- */
-static uint64_t get_cluster_offset(BlockDriverState *bs,
- uint64_t offset, int allocate,
- int compressed_size,
- int n_start, int n_end)
-{
- BDRVQcowState *s = bs->opaque;
- int min_index, i, j, l1_index, l2_index;
- uint64_t l2_offset, *l2_table, cluster_offset, tmp;
- uint32_t min_count;
- int new_l2_table;
-
- l1_index = offset >> (s->l2_bits + s->cluster_bits);
- l2_offset = s->l1_table[l1_index];
- new_l2_table = 0;
- if (!l2_offset) {
- if (!allocate)
- return 0;
- /* allocate a new l2 entry */
- l2_offset = bdrv_getlength(bs->file);
- /* round to cluster size */
- l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
- /* update the L1 entry */
- s->l1_table[l1_index] = l2_offset;
- tmp = cpu_to_be64(l2_offset);
- if (bdrv_pwrite_sync(bs->file,
- s->l1_table_offset + l1_index * sizeof(tmp),
- &tmp, sizeof(tmp)) < 0)
- return 0;
- new_l2_table = 1;
- }
- for(i = 0; i < L2_CACHE_SIZE; i++) {
- if (l2_offset == s->l2_cache_offsets[i]) {
- /* increment the hit count */
- if (++s->l2_cache_counts[i] == 0xffffffff) {
- for(j = 0; j < L2_CACHE_SIZE; j++) {
- s->l2_cache_counts[j] >>= 1;
- }
- }
- l2_table = s->l2_cache + (i << s->l2_bits);
- goto found;
- }
- }
- /* not found: load a new entry in the least used one */
- min_index = 0;
- min_count = 0xffffffff;
- for(i = 0; i < L2_CACHE_SIZE; i++) {
- if (s->l2_cache_counts[i] < min_count) {
- min_count = s->l2_cache_counts[i];
- min_index = i;
- }
- }
- l2_table = s->l2_cache + (min_index << s->l2_bits);
- if (new_l2_table) {
- memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
- s->l2_size * sizeof(uint64_t)) < 0)
- return 0;
- } else {
- if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
- s->l2_size * sizeof(uint64_t))
- return 0;
- }
- s->l2_cache_offsets[min_index] = l2_offset;
- s->l2_cache_counts[min_index] = 1;
- found:
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
- if (!cluster_offset ||
- ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
- if (!allocate)
- return 0;
- /* allocate a new cluster */
- if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
- (n_end - n_start) < s->cluster_sectors) {
- /* if the cluster is already compressed, we must
- decompress it in the case it is not completely
- overwritten */
- if (decompress_cluster(bs, cluster_offset) < 0)
- return 0;
- cluster_offset = bdrv_getlength(bs->file);
- cluster_offset = (cluster_offset + s->cluster_size - 1) &
- ~(s->cluster_size - 1);
- /* write the cluster content */
- if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
- s->cluster_size)
- return -1;
- } else {
- cluster_offset = bdrv_getlength(bs->file);
- if (allocate == 1) {
- /* round to cluster size */
- cluster_offset = (cluster_offset + s->cluster_size - 1) &
- ~(s->cluster_size - 1);
- bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
- /* if encrypted, we must initialize the cluster
- content which won't be written */
- if (s->crypt_method &&
- (n_end - n_start) < s->cluster_sectors) {
- uint64_t start_sect;
- start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
- memset(s->cluster_data + 512, 0x00, 512);
- for(i = 0; i < s->cluster_sectors; i++) {
- if (i < n_start || i >= n_end) {
- encrypt_sectors(s, start_sect + i,
- s->cluster_data,
- s->cluster_data + 512, 1, 1,
- &s->aes_encrypt_key);
- if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
- s->cluster_data, 512) != 512)
- return -1;
- }
- }
- }
- } else if (allocate == 2) {
- cluster_offset |= QCOW_OFLAG_COMPRESSED |
- (uint64_t)compressed_size << (63 - s->cluster_bits);
- }
- }
- /* update L2 table */
- tmp = cpu_to_be64(cluster_offset);
- l2_table[l2_index] = tmp;
- if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
- &tmp, sizeof(tmp)) < 0)
- return 0;
- }
- return cluster_offset;
-}
-
-static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster, n;
- uint64_t cluster_offset;
-
- qemu_co_mutex_lock(&s->lock);
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
- qemu_co_mutex_unlock(&s->lock);
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors)
- n = nb_sectors;
- *pnum = n;
- return (cluster_offset != 0);
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
- const uint8_t *buf, int buf_size)
-{
- z_stream strm1, *strm = &strm1;
- int ret, out_len;
-
- memset(strm, 0, sizeof(*strm));
-
- strm->next_in = (uint8_t *)buf;
- strm->avail_in = buf_size;
- strm->next_out = out_buf;
- strm->avail_out = out_buf_size;
-
- ret = inflateInit2(strm, -12);
- if (ret != Z_OK)
- return -1;
- ret = inflate(strm, Z_FINISH);
- out_len = strm->next_out - out_buf;
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
- out_len != out_buf_size) {
- inflateEnd(strm);
- return -1;
- }
- inflateEnd(strm);
- return 0;
-}
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, csize;
- uint64_t coffset;
-
- coffset = cluster_offset & s->cluster_offset_mask;
- if (s->cluster_cache_offset != coffset) {
- csize = cluster_offset >> (63 - s->cluster_bits);
- csize &= (s->cluster_size - 1);
- ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
- if (ret != csize)
- return -1;
- if (decompress_buffer(s->cluster_cache, s->cluster_size,
- s->cluster_data, csize) < 0) {
- return -1;
- }
- s->cluster_cache_offset = coffset;
- }
- return 0;
-}
-
-static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- int ret = 0, n;
- uint64_t cluster_offset;
- struct iovec hd_iov;
- QEMUIOVector hd_qiov;
- uint8_t *buf;
- void *orig_buf;
-
- if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
- } else {
- orig_buf = NULL;
- buf = (uint8_t *)qiov->iov->iov_base;
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- while (nb_sectors != 0) {
- /* prepare next request */
- cluster_offset = get_cluster_offset(bs, sector_num << 9,
- 0, 0, 0, 0);
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
-
- if (!cluster_offset) {
- if (bs->backing_hd) {
- /* read from the base image */
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- } else {
- /* Note: in this case, no need to wait */
- memset(buf, 0, 512 * n);
- }
- } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- /* add AIO support for compressed blocks ? */
- if (decompress_cluster(bs, cluster_offset) < 0) {
- goto fail;
- }
- memcpy(buf,
- s->cluster_cache + index_in_cluster * 512, 512 * n);
- } else {
- if ((cluster_offset & 511) != 0) {
- goto fail;
- }
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- break;
- }
- if (s->crypt_method) {
- encrypt_sectors(s, sector_num, buf, buf,
- n, 0,
- &s->aes_decrypt_key);
- }
- }
- ret = 0;
-
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- }
-
-done:
- qemu_co_mutex_unlock(&s->lock);
-
- if (qiov->niov > 1) {
- qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
- qemu_vfree(orig_buf);
- }
-
- return ret;
-
-fail:
- ret = -EIO;
- goto done;
-}
-
-static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- uint64_t cluster_offset;
- const uint8_t *src_buf;
- int ret = 0, n;
- uint8_t *cluster_data = NULL;
- struct iovec hd_iov;
- QEMUIOVector hd_qiov;
- uint8_t *buf;
- void *orig_buf;
-
- s->cluster_cache_offset = -1; /* disable compressed cache */
-
- if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
- qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
- } else {
- orig_buf = NULL;
- buf = (uint8_t *)qiov->iov->iov_base;
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- while (nb_sectors != 0) {
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
- index_in_cluster,
- index_in_cluster + n);
- if (!cluster_offset || (cluster_offset & 511) != 0) {
- ret = -EIO;
- break;
- }
- if (s->crypt_method) {
- if (!cluster_data) {
- cluster_data = g_malloc0(s->cluster_size);
- }
- encrypt_sectors(s, sector_num, cluster_data, buf,
- n, 1, &s->aes_encrypt_key);
- src_buf = cluster_data;
- } else {
- src_buf = buf;
- }
-
- hd_iov.iov_base = (void *)src_buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- break;
- }
- ret = 0;
-
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- }
- qemu_co_mutex_unlock(&s->lock);
-
- if (qiov->niov > 1) {
- qemu_vfree(orig_buf);
- }
- g_free(cluster_data);
-
- return ret;
-}
-
-static void qcow_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
-
- g_free(s->l1_table);
- g_free(s->l2_cache);
- g_free(s->cluster_cache);
- g_free(s->cluster_data);
-
- migrate_del_blocker(s->migration_blocker);
- error_free(s->migration_blocker);
-}
-
-static int qcow_create(const char *filename, QEMUOptionParameter *options)
-{
- int header_size, backing_filename_len, l1_size, shift, i;
- QCowHeader header;
- uint8_t *tmp;
- int64_t total_size = 0;
- const char *backing_file = NULL;
- int flags = 0;
- int ret;
- BlockDriverState *qcow_bs;
-
- /* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
- flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
- }
- options++;
- }
-
- ret = bdrv_create_file(filename, options);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_truncate(qcow_bs, 0);
- if (ret < 0) {
- goto exit;
- }
-
- memset(&header, 0, sizeof(header));
- header.magic = cpu_to_be32(QCOW_MAGIC);
- header.version = cpu_to_be32(QCOW_VERSION);
- header.size = cpu_to_be64(total_size * 512);
- header_size = sizeof(header);
- backing_filename_len = 0;
- if (backing_file) {
- if (strcmp(backing_file, "fat:")) {
- header.backing_file_offset = cpu_to_be64(header_size);
- backing_filename_len = strlen(backing_file);
- header.backing_file_size = cpu_to_be32(backing_filename_len);
- header_size += backing_filename_len;
- } else {
- /* special backing file for vvfat */
- backing_file = NULL;
- }
- header.cluster_bits = 9; /* 512 byte cluster to avoid copying
- unmodifyed sectors */
- header.l2_bits = 12; /* 32 KB L2 tables */
- } else {
- header.cluster_bits = 12; /* 4 KB clusters */
- header.l2_bits = 9; /* 4 KB L2 tables */
- }
- header_size = (header_size + 7) & ~7;
- shift = header.cluster_bits + header.l2_bits;
- l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
-
- header.l1_table_offset = cpu_to_be64(header_size);
- if (flags & BLOCK_FLAG_ENCRYPT) {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
- } else {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
- }
-
- /* write all the data */
- ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
- if (ret != sizeof(header)) {
- goto exit;
- }
-
- if (backing_file) {
- ret = bdrv_pwrite(qcow_bs, sizeof(header),
- backing_file, backing_filename_len);
- if (ret != backing_filename_len) {
- goto exit;
- }
- }
-
- tmp = g_malloc0(BDRV_SECTOR_SIZE);
- for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
- BDRV_SECTOR_SIZE); i++) {
- ret = bdrv_pwrite(qcow_bs, header_size +
- BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
- if (ret != BDRV_SECTOR_SIZE) {
- g_free(tmp);
- goto exit;
- }
- }
-
- g_free(tmp);
- ret = 0;
-exit:
- bdrv_delete(qcow_bs);
- return ret;
-}
-
-static int qcow_make_empty(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- uint32_t l1_length = s->l1_size * sizeof(uint64_t);
- int ret;
-
- memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
- l1_length) < 0)
- return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
- if (ret < 0)
- return ret;
-
- memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
- memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
- memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
-
- return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
- tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- z_stream strm;
- int ret, out_len;
- uint8_t *out_buf;
- uint64_t cluster_offset;
-
- if (nb_sectors != s->cluster_sectors) {
- ret = -EINVAL;
-
- /* Zero-pad last write if image size is not cluster aligned */
- if (sector_num + nb_sectors == bs->total_sectors &&
- nb_sectors < s->cluster_sectors) {
- uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
- memset(pad_buf, 0, s->cluster_size);
- memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
- ret = qcow_write_compressed(bs, sector_num,
- pad_buf, s->cluster_sectors);
- qemu_vfree(pad_buf);
- }
- return ret;
- }
-
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
- /* best compression, small window, no zlib header */
- memset(&strm, 0, sizeof(strm));
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
- Z_DEFLATED, -12,
- 9, Z_DEFAULT_STRATEGY);
- if (ret != 0) {
- ret = -EINVAL;
- goto fail;
- }
-
- strm.avail_in = s->cluster_size;
- strm.next_in = (uint8_t *)buf;
- strm.avail_out = s->cluster_size;
- strm.next_out = out_buf;
-
- ret = deflate(&strm, Z_FINISH);
- if (ret != Z_STREAM_END && ret != Z_OK) {
- deflateEnd(&strm);
- ret = -EINVAL;
- goto fail;
- }
- out_len = strm.next_out - out_buf;
-
- deflateEnd(&strm);
-
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
- /* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
- if (ret < 0) {
- goto fail;
- }
- } else {
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
- out_len, 0, 0);
- if (cluster_offset == 0) {
- ret = -EIO;
- goto fail;
- }
-
- cluster_offset &= s->cluster_offset_mask;
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = 0;
-fail:
- g_free(out_buf);
- return ret;
-}
-
-static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQcowState *s = bs->opaque;
- bdi->cluster_size = s->cluster_size;
- return 0;
-}
-
-
-static QEMUOptionParameter qcow_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_ENCRYPT,
- .type = OPT_FLAG,
- .help = "Encrypt the image"
- },
- { NULL }
-};
-
-static BlockDriver bdrv_qcow = {
- .format_name = "qcow",
- .instance_size = sizeof(BDRVQcowState),
- .bdrv_probe = qcow_probe,
- .bdrv_open = qcow_open,
- .bdrv_close = qcow_close,
- .bdrv_reopen_prepare = qcow_reopen_prepare,
- .bdrv_create = qcow_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
-
- .bdrv_co_readv = qcow_co_readv,
- .bdrv_co_writev = qcow_co_writev,
- .bdrv_co_is_allocated = qcow_co_is_allocated,
-
- .bdrv_set_key = qcow_set_key,
- .bdrv_make_empty = qcow_make_empty,
- .bdrv_write_compressed = qcow_write_compressed,
- .bdrv_get_info = qcow_get_info,
-
- .create_options = qcow_create_options,
-};
-
-static void bdrv_qcow_init(void)
-{
- bdrv_register(&bdrv_qcow);
-}
-
-block_init(bdrv_qcow_init);
diff --git a/contrib/qemu/block/qcow2-cache.c b/contrib/qemu/block/qcow2-cache.c
deleted file mode 100644
index 2f3114ecc24..00000000000
--- a/contrib/qemu/block/qcow2-cache.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * L2/refcount table cache for the QCOW2 format
- *
- * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/block_int.h"
-#include "qemu-common.h"
-#include "qcow2.h"
-#include "trace.h"
-
-typedef struct Qcow2CachedTable {
- void* table;
- int64_t offset;
- bool dirty;
- int cache_hits;
- int ref;
-} Qcow2CachedTable;
-
-struct Qcow2Cache {
- Qcow2CachedTable* entries;
- struct Qcow2Cache* depends;
- int size;
- bool depends_on_flush;
-};
-
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2Cache *c;
- int i;
-
- c = g_malloc0(sizeof(*c));
- c->size = num_tables;
- c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
-
- for (i = 0; i < c->size; i++) {
- c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
- }
-
- return c;
-}
-
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- assert(c->entries[i].ref == 0);
- qemu_vfree(c->entries[i].table);
- }
-
- g_free(c->entries);
- g_free(c);
-
- return 0;
-}
-
-static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
-{
- int ret;
-
- ret = qcow2_cache_flush(bs, c->depends);
- if (ret < 0) {
- return ret;
- }
-
- c->depends = NULL;
- c->depends_on_flush = false;
-
- return 0;
-}
-
-static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
-{
- BDRVQcowState *s = bs->opaque;
- int ret = 0;
-
- if (!c->entries[i].dirty || !c->entries[i].offset) {
- return 0;
- }
-
- trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
-
- if (c->depends) {
- ret = qcow2_cache_flush_dependency(bs, c);
- } else if (c->depends_on_flush) {
- ret = bdrv_flush(bs->file);
- if (ret >= 0) {
- c->depends_on_flush = false;
- }
- }
-
- if (ret < 0) {
- return ret;
- }
-
- if (c == s->refcount_block_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
- } else if (c == s->l2_table_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
- }
-
- ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
- s->cluster_size);
- if (ret < 0) {
- return ret;
- }
-
- c->entries[i].dirty = false;
-
- return 0;
-}
-
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
-{
- BDRVQcowState *s = bs->opaque;
- int result = 0;
- int ret;
- int i;
-
- trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
-
- for (i = 0; i < c->size; i++) {
- ret = qcow2_cache_entry_flush(bs, c, i);
- if (ret < 0 && result != -ENOSPC) {
- result = ret;
- }
- }
-
- if (result == 0) {
- ret = bdrv_flush(bs->file);
- if (ret < 0) {
- result = ret;
- }
- }
-
- return result;
-}
-
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
- Qcow2Cache *dependency)
-{
- int ret;
-
- if (dependency->depends) {
- ret = qcow2_cache_flush_dependency(bs, dependency);
- if (ret < 0) {
- return ret;
- }
- }
-
- if (c->depends && (c->depends != dependency)) {
- ret = qcow2_cache_flush_dependency(bs, c);
- if (ret < 0) {
- return ret;
- }
- }
-
- c->depends = dependency;
- return 0;
-}
-
-void qcow2_cache_depends_on_flush(Qcow2Cache *c)
-{
- c->depends_on_flush = true;
-}
-
-static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
-{
- int i;
- int min_count = INT_MAX;
- int min_index = -1;
-
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].ref) {
- continue;
- }
-
- if (c->entries[i].cache_hits < min_count) {
- min_index = i;
- min_count = c->entries[i].cache_hits;
- }
-
- /* Give newer hits priority */
- /* TODO Check how to optimize the replacement strategy */
- c->entries[i].cache_hits /= 2;
- }
-
- if (min_index == -1) {
- /* This can't happen in current synchronous code, but leave the check
- * here as a reminder for whoever starts using AIO with the cache */
- abort();
- }
- return min_index;
-}
-
-static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
- uint64_t offset, void **table, bool read_from_disk)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
- int ret;
-
- trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
- offset, read_from_disk);
-
- /* Check if the table is already cached */
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].offset == offset) {
- goto found;
- }
- }
-
- /* If not, write a table back and replace it */
- i = qcow2_cache_find_entry_to_replace(c);
- trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
- if (i < 0) {
- return i;
- }
-
- ret = qcow2_cache_entry_flush(bs, c, i);
- if (ret < 0) {
- return ret;
- }
-
- trace_qcow2_cache_get_read(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
- c->entries[i].offset = 0;
- if (read_from_disk) {
- if (c == s->l2_table_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
- }
-
- ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
- if (ret < 0) {
- return ret;
- }
- }
-
- /* Give the table some hits for the start so that it won't be replaced
- * immediately. The number 32 is completely arbitrary. */
- c->entries[i].cache_hits = 32;
- c->entries[i].offset = offset;
-
- /* And return the right table */
-found:
- c->entries[i].cache_hits++;
- c->entries[i].ref++;
- *table = c->entries[i].table;
-
- trace_qcow2_cache_get_done(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
-
- return 0;
-}
-
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table)
-{
- return qcow2_cache_do_get(bs, c, offset, table, true);
-}
-
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table)
-{
- return qcow2_cache_do_get(bs, c, offset, table, false);
-}
-
-int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].table == *table) {
- goto found;
- }
- }
- return -ENOENT;
-
-found:
- c->entries[i].ref--;
- *table = NULL;
-
- assert(c->entries[i].ref >= 0);
- return 0;
-}
-
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].table == table) {
- goto found;
- }
- }
- abort();
-
-found:
- c->entries[i].dirty = true;
-}
diff --git a/contrib/qemu/block/qcow2-cluster.c b/contrib/qemu/block/qcow2-cluster.c
deleted file mode 100644
index cca76d4fcdd..00000000000
--- a/contrib/qemu/block/qcow2-cluster.c
+++ /dev/null
@@ -1,1478 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include <zlib.h>
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-#include "trace.h"
-
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
- bool exact_size)
-{
- BDRVQcowState *s = bs->opaque;
- int new_l1_size2, ret, i;
- uint64_t *new_l1_table;
- int64_t new_l1_table_offset, new_l1_size;
- uint8_t data[12];
-
- if (min_size <= s->l1_size)
- return 0;
-
- if (exact_size) {
- new_l1_size = min_size;
- } else {
- /* Bump size up to reduce the number of times we have to grow */
- new_l1_size = s->l1_size;
- if (new_l1_size == 0) {
- new_l1_size = 1;
- }
- while (min_size > new_l1_size) {
- new_l1_size = (new_l1_size * 3 + 1) / 2;
- }
- }
-
- if (new_l1_size > INT_MAX) {
- return -EFBIG;
- }
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
- s->l1_size, new_l1_size);
-#endif
-
- new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
- memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
-
- /* write new table (align to cluster) */
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
- new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
- if (new_l1_table_offset < 0) {
- g_free(new_l1_table);
- return new_l1_table_offset;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
- for(i = 0; i < s->l1_size; i++)
- new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
- ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
- if (ret < 0)
- goto fail;
- for(i = 0; i < s->l1_size; i++)
- new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
-
- /* set new table */
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
- cpu_to_be32w((uint32_t*)data, new_l1_size);
- cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
- if (ret < 0) {
- goto fail;
- }
- g_free(s->l1_table);
- qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- s->l1_table_offset = new_l1_table_offset;
- s->l1_table = new_l1_table;
- s->l1_size = new_l1_size;
- return 0;
- fail:
- g_free(new_l1_table);
- qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
- QCOW2_DISCARD_OTHER);
- return ret;
-}
-
-/*
- * l2_load
- *
- * Loads a L2 table into memory. If the table is in the cache, the cache
- * is used; otherwise the L2 table is loaded from the image file.
- *
- * Returns a pointer to the L2 table on success, or NULL if the read from
- * the image file failed.
- */
-
-static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
- uint64_t **l2_table)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
-
- return ret;
-}
-
-/*
- * Writes one sector of the L1 table to the disk (can't update single entries
- * and we really don't want bdrv_pread to perform a read-modify-write)
- */
-#define L1_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l1_entry(BlockDriverState *bs, int l1_index)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t buf[L1_ENTRIES_PER_SECTOR];
- int l1_start_index;
- int i, ret;
-
- l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
- for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
- buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
- buf, sizeof(buf));
- if (ret < 0) {
- return ret;
- }
-
- return 0;
-}
-
-/*
- * l2_allocate
- *
- * Allocate a new l2 entry in the file. If l1_index points to an already
- * used entry in the L2 table (i.e. we are doing a copy on write for the L2
- * table) copy the contents of the old L2 table into the newly allocated one.
- * Otherwise the new table is initialized with zeros.
- *
- */
-
-static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t old_l2_offset;
- uint64_t *l2_table;
- int64_t l2_offset;
- int ret;
-
- old_l2_offset = s->l1_table[l1_index];
-
- trace_qcow2_l2_allocate(bs, l1_index);
-
- /* allocate a new l2 entry */
-
- l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
- if (l2_offset < 0) {
- return l2_offset;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail;
- }
-
- /* allocate a new entry in the l2 cache */
-
- trace_qcow2_l2_allocate_get_empty(bs, l1_index);
- ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
- if (ret < 0) {
- return ret;
- }
-
- l2_table = *table;
-
- if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
- /* if there was no old l2 table, clear the new table */
- memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- } else {
- uint64_t* old_table;
-
- /* if there was an old l2 table, read it from the disk */
- BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
- ret = qcow2_cache_get(bs, s->l2_table_cache,
- old_l2_offset & L1E_OFFSET_MASK,
- (void**) &old_table);
- if (ret < 0) {
- goto fail;
- }
-
- memcpy(l2_table, old_table, s->cluster_size);
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* write the l2 table to the file */
- BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-
- trace_qcow2_l2_allocate_write_l2(bs, l1_index);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- goto fail;
- }
-
- /* update the L1 entry */
- trace_qcow2_l2_allocate_write_l1(bs, l1_index);
- s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
- ret = write_l1_entry(bs, l1_index);
- if (ret < 0) {
- goto fail;
- }
-
- *table = l2_table;
- trace_qcow2_l2_allocate_done(bs, l1_index, 0);
- return 0;
-
-fail:
- trace_qcow2_l2_allocate_done(bs, l1_index, ret);
- qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
- s->l1_table[l1_index] = old_l2_offset;
- return ret;
-}
-
-/*
- * Checks how many clusters in a given L2 table are contiguous in the image
- * file. As soon as one of the flags in the bitmask stop_flags changes compared
- * to the first cluster, the search is stopped and the cluster is not counted
- * as contiguous. (This allows it, for example, to stop at the first compressed
- * cluster which may require a different handling)
- */
-static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
- uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
-{
- int i;
- uint64_t mask = stop_flags | L2E_OFFSET_MASK;
- uint64_t offset = be64_to_cpu(l2_table[0]) & mask;
-
- if (!offset)
- return 0;
-
- for (i = start; i < start + nb_clusters; i++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
- if (offset + (uint64_t) i * cluster_size != l2_entry) {
- break;
- }
- }
-
- return (i - start);
-}
-
-static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
-{
- int i;
-
- for (i = 0; i < nb_clusters; i++) {
- int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
-
- if (type != QCOW2_CLUSTER_UNALLOCATED) {
- break;
- }
- }
-
- return i;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
- algorithm for < 4 GB images. NOTE: out_buf == in_buf is
- supported */
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key)
-{
- union {
- uint64_t ll[2];
- uint8_t b[16];
- } ivec;
- int i;
-
- for(i = 0; i < nb_sectors; i++) {
- ivec.ll[0] = cpu_to_le64(sector_num);
- ivec.ll[1] = 0;
- AES_cbc_encrypt(in_buf, out_buf, 512, key,
- ivec.b, enc);
- sector_num++;
- in_buf += 512;
- out_buf += 512;
- }
-}
-
-static int coroutine_fn copy_sectors(BlockDriverState *bs,
- uint64_t start_sect,
- uint64_t cluster_offset,
- int n_start, int n_end)
-{
- BDRVQcowState *s = bs->opaque;
- QEMUIOVector qiov;
- struct iovec iov;
- int n, ret;
-
- /*
- * If this is the last cluster and it is only partially used, we must only
- * copy until the end of the image, or bdrv_check_request will fail for the
- * bdrv_read/write calls below.
- */
- if (start_sect + n_end > bs->total_sectors) {
- n_end = bs->total_sectors - start_sect;
- }
-
- n = n_end - n_start;
- if (n <= 0) {
- return 0;
- }
-
- iov.iov_len = n * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
-
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
-
- /* Call .bdrv_co_readv() directly instead of using the public block-layer
- * interface. This avoids double I/O throttling and request tracking,
- * which can lead to deadlock when block layer copy-on-read is enabled.
- */
- ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
- if (ret < 0) {
- goto out;
- }
-
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, start_sect + n_start,
- iov.iov_base, iov.iov_base, n, 1,
- &s->aes_encrypt_key);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
- if (ret < 0) {
- goto out;
- }
-
- ret = 0;
-out:
- qemu_vfree(iov.iov_base);
- return ret;
-}
-
-
-/*
- * get_cluster_offset
- *
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
- *
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
- *
- * on exit, *num is the number of contiguous sectors we can read.
- *
- * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
- * cases.
- */
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int l2_index;
- uint64_t l1_index, l2_offset, *l2_table;
- int l1_bits, c;
- unsigned int index_in_cluster, nb_clusters;
- uint64_t nb_available, nb_needed;
- int ret;
-
- index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
- nb_needed = *num + index_in_cluster;
-
- l1_bits = s->l2_bits + s->cluster_bits;
-
- /* compute how many bytes there are between the offset and
- * the end of the l1 entry
- */
-
- nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
-
- /* compute the number of available sectors */
-
- nb_available = (nb_available >> 9) + index_in_cluster;
-
- if (nb_needed > nb_available) {
- nb_needed = nb_available;
- }
-
- *cluster_offset = 0;
-
- /* seek the the l2 offset in the l1 table */
-
- l1_index = offset >> l1_bits;
- if (l1_index >= s->l1_size) {
- ret = QCOW2_CLUSTER_UNALLOCATED;
- goto out;
- }
-
- l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
- if (!l2_offset) {
- ret = QCOW2_CLUSTER_UNALLOCATED;
- goto out;
- }
-
- /* load the l2 table in memory */
-
- ret = l2_load(bs, l2_offset, &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* find the cluster offset for the given disk offset */
-
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
- *cluster_offset = be64_to_cpu(l2_table[l2_index]);
- nb_clusters = size_to_clusters(s, nb_needed << 9);
-
- ret = qcow2_get_cluster_type(*cluster_offset);
- switch (ret) {
- case QCOW2_CLUSTER_COMPRESSED:
- /* Compressed clusters can only be processed one by one */
- c = 1;
- *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
- break;
- case QCOW2_CLUSTER_ZERO:
- if (s->qcow_version < 3) {
- return -EIO;
- }
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
- *cluster_offset = 0;
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- /* how many empty clusters ? */
- c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
- *cluster_offset = 0;
- break;
- case QCOW2_CLUSTER_NORMAL:
- /* how many allocated clusters ? */
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
- *cluster_offset &= L2E_OFFSET_MASK;
- break;
- default:
- abort();
- }
-
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-
- nb_available = (c * s->cluster_sectors);
-
-out:
- if (nb_available > nb_needed)
- nb_available = nb_needed;
-
- *num = nb_available - index_in_cluster;
-
- return ret;
-}
-
-/*
- * get_cluster_table
- *
- * for a given disk offset, load (and allocate if needed)
- * the l2 table.
- *
- * the l2 table offset in the qcow2 file and the cluster index
- * in the l2 table are given to the caller.
- *
- * Returns 0 on success, -errno in failure case
- */
-static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
- uint64_t **new_l2_table,
- int *new_l2_index)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int l2_index;
- uint64_t l1_index, l2_offset;
- uint64_t *l2_table = NULL;
- int ret;
-
- /* seek the the l2 offset in the l1 table */
-
- l1_index = offset >> (s->l2_bits + s->cluster_bits);
- if (l1_index >= s->l1_size) {
- ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
- if (ret < 0) {
- return ret;
- }
- }
-
- assert(l1_index < s->l1_size);
- l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
-
- /* seek the l2 table of the given l2 offset */
-
- if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
- /* load the l2 table in memory */
- ret = l2_load(bs, l2_offset, &l2_table);
- if (ret < 0) {
- return ret;
- }
- } else {
- /* First allocate a new L2 table (and do COW if needed) */
- ret = l2_allocate(bs, l1_index, &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* Then decrease the refcount of the old table */
- if (l2_offset) {
- qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- }
- }
-
- /* find the cluster offset for the given disk offset */
-
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-
- *new_l2_table = l2_table;
- *new_l2_index = l2_index;
-
- return 0;
-}
-
-/*
- * alloc_compressed_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
- * If the offset is not found, allocate a new compressed cluster.
- *
- * Return the cluster offset if successful,
- * Return 0, otherwise.
- *
- */
-
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
- uint64_t offset,
- int compressed_size)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index, ret;
- uint64_t *l2_table;
- int64_t cluster_offset;
- int nb_csectors;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return 0;
- }
-
- /* Compression can't overwrite anything. Fail if the cluster was already
- * allocated. */
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
- if (cluster_offset & L2E_OFFSET_MASK) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- return 0;
- }
-
- cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
- if (cluster_offset < 0) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- return 0;
- }
-
- nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
- (cluster_offset >> 9);
-
- cluster_offset |= QCOW_OFLAG_COMPRESSED |
- ((uint64_t)nb_csectors << s->csize_shift);
-
- /* update L2 table */
-
- /* compressed clusters never have the copied flag */
-
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- l2_table[l2_index] = cpu_to_be64(cluster_offset);
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return 0;
- }
-
- return cluster_offset;
-}
-
-static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- if (r->nb_sectors == 0) {
- return 0;
- }
-
- qemu_co_mutex_unlock(&s->lock);
- ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
- r->offset / BDRV_SECTOR_SIZE,
- r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
- qemu_co_mutex_lock(&s->lock);
-
- if (ret < 0) {
- return ret;
- }
-
- /*
- * Before we update the L2 table to actually point to the new cluster, we
- * need to be sure that the refcounts have been increased and COW was
- * handled.
- */
- qcow2_cache_depends_on_flush(s->l2_table_cache);
-
- return 0;
-}
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
-{
- BDRVQcowState *s = bs->opaque;
- int i, j = 0, l2_index, ret;
- uint64_t *old_cluster, *l2_table;
- uint64_t cluster_offset = m->alloc_offset;
-
- trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
- assert(m->nb_clusters > 0);
-
- old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
-
- /* copy content of unmodified sectors */
- ret = perform_cow(bs, m, &m->cow_start);
- if (ret < 0) {
- goto err;
- }
-
- ret = perform_cow(bs, m, &m->cow_end);
- if (ret < 0) {
- goto err;
- }
-
- /* Update L2 table. */
- if (s->use_lazy_refcounts) {
- qcow2_mark_dirty(bs);
- }
- if (qcow2_need_accurate_refcounts(s)) {
- qcow2_cache_set_dependency(bs, s->l2_table_cache,
- s->refcount_block_cache);
- }
-
- ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
- if (ret < 0) {
- goto err;
- }
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-
- for (i = 0; i < m->nb_clusters; i++) {
- /* if two concurrent writes happen to the same unallocated cluster
- * each write allocates separate cluster and writes data concurrently.
- * The first one to complete updates l2 table with pointer to its
- * cluster the second one has to do RMW (which is done above by
- * copy_sectors()), update l2 table with its cluster pointer and free
- * old cluster. This is what this loop does */
- if(l2_table[l2_index + i] != 0)
- old_cluster[j++] = l2_table[l2_index + i];
-
- l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
- (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
- }
-
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- goto err;
- }
-
- /*
- * If this was a COW, we need to decrease the refcount of the old cluster.
- * Also flush bs->file to get the right order for L2 and refcount update.
- *
- * Don't discard clusters that reach a refcount of 0 (e.g. compressed
- * clusters), the next write will reuse them anyway.
- */
- if (j != 0) {
- for (i = 0; i < j; i++) {
- qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
- QCOW2_DISCARD_NEVER);
- }
- }
-
- ret = 0;
-err:
- g_free(old_cluster);
- return ret;
- }
-
-/*
- * Returns the number of contiguous clusters that can be used for an allocating
- * write, but require COW to be performed (this includes yet unallocated space,
- * which must copy from the backing file)
- */
-static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
- uint64_t *l2_table, int l2_index)
-{
- int i;
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
- int cluster_type = qcow2_get_cluster_type(l2_entry);
-
- switch(cluster_type) {
- case QCOW2_CLUSTER_NORMAL:
- if (l2_entry & QCOW_OFLAG_COPIED) {
- goto out;
- }
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- case QCOW2_CLUSTER_COMPRESSED:
- case QCOW2_CLUSTER_ZERO:
- break;
- default:
- abort();
- }
- }
-
-out:
- assert(i <= nb_clusters);
- return i;
-}
-
-/*
- * Check if there already is an AIO write request in flight which allocates
- * the same cluster. In this case we need to wait until the previous
- * request has completed and updated the L2 table accordingly.
- *
- * Returns:
- * 0 if there was no dependency. *cur_bytes indicates the number of
- * bytes from guest_offset that can be read before the next
- * dependency must be processed (or the request is complete)
- *
- * -EAGAIN if we had to wait for another request, previously gathered
- * information on cluster allocation may be invalid now. The caller
- * must start over anyway, so consider *cur_bytes undefined.
- */
-static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *cur_bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- QCowL2Meta *old_alloc;
- uint64_t bytes = *cur_bytes;
-
- QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
-
- uint64_t start = guest_offset;
- uint64_t end = start + bytes;
- uint64_t old_start = l2meta_cow_start(old_alloc);
- uint64_t old_end = l2meta_cow_end(old_alloc);
-
- if (end <= old_start || start >= old_end) {
- /* No intersection */
- } else {
- if (start < old_start) {
- /* Stop at the start of a running allocation */
- bytes = old_start - start;
- } else {
- bytes = 0;
- }
-
- /* Stop if already an l2meta exists. After yielding, it wouldn't
- * be valid any more, so we'd have to clean up the old L2Metas
- * and deal with requests depending on them before starting to
- * gather new ones. Not worth the trouble. */
- if (bytes == 0 && *m) {
- *cur_bytes = 0;
- return 0;
- }
-
- if (bytes == 0) {
- /* Wait for the dependency to complete. We need to recheck
- * the free/allocated clusters when we continue. */
- qemu_co_mutex_unlock(&s->lock);
- qemu_co_queue_wait(&old_alloc->dependent_requests);
- qemu_co_mutex_lock(&s->lock);
- return -EAGAIN;
- }
- }
- }
-
- /* Make sure that existing clusters and new allocations are only used up to
- * the next dependency if we shortened the request above */
- *cur_bytes = bytes;
-
- return 0;
-}
-
-/*
- * Checks how many already allocated clusters that don't require a copy on
- * write there are at the given guest_offset (up to *bytes). If
- * *host_offset is not zero, only physically contiguous clusters beginning at
- * this host offset are counted.
- *
- * Note that guest_offset may not be cluster aligned. In this case, the
- * returned *host_offset points to exact byte referenced by guest_offset and
- * therefore isn't cluster aligned as well.
- *
- * Returns:
- * 0: if no allocated clusters are available at the given offset.
- * *bytes is normally unchanged. It is set to 0 if the cluster
- * is allocated and doesn't need COW, but doesn't have the right
- * physical offset.
- *
- * 1: if allocated clusters that don't require a COW are available at
- * the requested offset. *bytes may have decreased and describes
- * the length of the area that can be written to.
- *
- * -errno: in error cases
- */
-static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index;
- uint64_t cluster_offset;
- uint64_t *l2_table;
- unsigned int nb_clusters;
- unsigned int keep_clusters;
- int ret, pret;
-
- trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
- *bytes);
-
- assert(*host_offset == 0 || offset_into_cluster(s, guest_offset)
- == offset_into_cluster(s, *host_offset));
-
- /*
- * Calculate the number of clusters to look for. We stop at L2 table
- * boundaries to keep things simple.
- */
- nb_clusters =
- size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
-
- l2_index = offset_to_l2_index(s, guest_offset);
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- /* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
-
- /* Check how many clusters are already allocated and don't need COW */
- if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
- && (cluster_offset & QCOW_OFLAG_COPIED))
- {
- /* If a specific host_offset is required, check it */
- bool offset_matches =
- (cluster_offset & L2E_OFFSET_MASK) == *host_offset;
-
- if (*host_offset != 0 && !offset_matches) {
- *bytes = 0;
- ret = 0;
- goto out;
- }
-
- /* We keep all QCOW_OFLAG_COPIED clusters */
- keep_clusters =
- count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
- assert(keep_clusters <= nb_clusters);
-
- *bytes = MIN(*bytes,
- keep_clusters * s->cluster_size
- - offset_into_cluster(s, guest_offset));
-
- ret = 1;
- } else {
- ret = 0;
- }
-
- /* Cleanup */
-out:
- pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (pret < 0) {
- return pret;
- }
-
- /* Only return a host offset if we actually made progress. Otherwise we
- * would make requirements for handle_alloc() that it can't fulfill */
- if (ret) {
- *host_offset = (cluster_offset & L2E_OFFSET_MASK)
- + offset_into_cluster(s, guest_offset);
- }
-
- return ret;
-}
-
-/*
- * Allocates new clusters for the given guest_offset.
- *
- * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
- * contain the number of clusters that have been allocated and are contiguous
- * in the image file.
- *
- * If *host_offset is non-zero, it specifies the offset in the image file at
- * which the new clusters must start. *nb_clusters can be 0 on return in this
- * case if the cluster at host_offset is already in use. If *host_offset is
- * zero, the clusters can be allocated anywhere in the image file.
- *
- * *host_offset is updated to contain the offset into the image file at which
- * the first allocated cluster starts.
- *
- * Return 0 on success and -errno in error cases. -EAGAIN means that the
- * function has been waiting for another request and the allocation must be
- * restarted, but the whole request should not be failed.
- */
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, unsigned int *nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
-
- trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
- *host_offset, *nb_clusters);
-
- /* Allocate new clusters */
- trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
- if (*host_offset == 0) {
- int64_t cluster_offset =
- qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
- if (cluster_offset < 0) {
- return cluster_offset;
- }
- *host_offset = cluster_offset;
- return 0;
- } else {
- int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
- if (ret < 0) {
- return ret;
- }
- *nb_clusters = ret;
- return 0;
- }
-}
-
-/*
- * Allocates new clusters for an area that either is yet unallocated or needs a
- * copy on write. If *host_offset is non-zero, clusters are only allocated if
- * the new allocation can match the specified host offset.
- *
- * Note that guest_offset may not be cluster aligned. In this case, the
- * returned *host_offset points to exact byte referenced by guest_offset and
- * therefore isn't cluster aligned as well.
- *
- * Returns:
- * 0: if no clusters could be allocated. *bytes is set to 0,
- * *host_offset is left unchanged.
- *
- * 1: if new clusters were allocated. *bytes may be decreased if the
- * new allocation doesn't cover all of the requested area.
- * *host_offset is updated to contain the host offset of the first
- * newly allocated cluster.
- *
- * -errno: in error cases
- */
-static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index;
- uint64_t *l2_table;
- uint64_t entry;
- unsigned int nb_clusters;
- int ret;
-
- uint64_t alloc_cluster_offset;
-
- trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
- *bytes);
- assert(*bytes > 0);
-
- /*
- * Calculate the number of clusters to look for. We stop at L2 table
- * boundaries to keep things simple.
- */
- nb_clusters =
- size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
-
- l2_index = offset_to_l2_index(s, guest_offset);
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- /* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- entry = be64_to_cpu(l2_table[l2_index]);
-
- /* For the moment, overwrite compressed clusters one by one */
- if (entry & QCOW_OFLAG_COMPRESSED) {
- nb_clusters = 1;
- } else {
- nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
- }
-
- /* This function is only called when there were no non-COW clusters, so if
- * we can't find any unallocated or COW clusters either, something is
- * wrong with our code. */
- assert(nb_clusters > 0);
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* Allocate, if necessary at a given offset in the image file */
- alloc_cluster_offset = start_of_cluster(s, *host_offset);
- ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
- &nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- /* Can't extend contiguous allocation */
- if (nb_clusters == 0) {
- *bytes = 0;
- return 0;
- }
-
- /*
- * Save info needed for meta data update.
- *
- * requested_sectors: Number of sectors from the start of the first
- * newly allocated cluster to the end of the (possibly shortened
- * before) write request.
- *
- * avail_sectors: Number of sectors from the start of the first
- * newly allocated to the end of the last newly allocated cluster.
- *
- * nb_sectors: The number of sectors from the start of the first
- * newly allocated cluster to the end of the area that the write
- * request actually writes to (excluding COW at the end)
- */
- int requested_sectors =
- (*bytes + offset_into_cluster(s, guest_offset))
- >> BDRV_SECTOR_BITS;
- int avail_sectors = nb_clusters
- << (s->cluster_bits - BDRV_SECTOR_BITS);
- int alloc_n_start = offset_into_cluster(s, guest_offset)
- >> BDRV_SECTOR_BITS;
- int nb_sectors = MIN(requested_sectors, avail_sectors);
- QCowL2Meta *old_m = *m;
-
- *m = g_malloc0(sizeof(**m));
-
- **m = (QCowL2Meta) {
- .next = old_m,
-
- .alloc_offset = alloc_cluster_offset,
- .offset = start_of_cluster(s, guest_offset),
- .nb_clusters = nb_clusters,
- .nb_available = nb_sectors,
-
- .cow_start = {
- .offset = 0,
- .nb_sectors = alloc_n_start,
- },
- .cow_end = {
- .offset = nb_sectors * BDRV_SECTOR_SIZE,
- .nb_sectors = avail_sectors - nb_sectors,
- },
- };
- qemu_co_queue_init(&(*m)->dependent_requests);
- QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
-
- *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
- *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
- - offset_into_cluster(s, guest_offset));
- assert(*bytes != 0);
-
- return 1;
-
-fail:
- if (*m && (*m)->nb_clusters > 0) {
- QLIST_REMOVE(*m, next_in_flight);
- }
- return ret;
-}
-
-/*
- * alloc_cluster_offset
- *
- * For a given offset on the virtual disk, find the cluster offset in qcow2
- * file. If the offset is not found, allocate a new cluster.
- *
- * If the cluster was already allocated, m->nb_clusters is set to 0 and
- * other fields in m are meaningless.
- *
- * If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. In this case, the other
- * fields of m are valid and contain information about the first allocated
- * cluster.
- *
- * If the request conflicts with another write request in flight, the coroutine
- * is queued and will be reentered when the dependency has completed.
- *
- * Return 0 on success and -errno in error cases
- */
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t start, remaining;
- uint64_t cluster_offset;
- uint64_t cur_bytes;
- int ret;
-
- trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
- n_start, n_end);
-
- assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
- offset = start_of_cluster(s, offset);
-
-again:
- start = offset + (n_start << BDRV_SECTOR_BITS);
- remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
- cluster_offset = 0;
- *host_offset = 0;
- cur_bytes = 0;
- *m = NULL;
-
- while (true) {
-
- if (!*host_offset) {
- *host_offset = start_of_cluster(s, cluster_offset);
- }
-
- assert(remaining >= cur_bytes);
-
- start += cur_bytes;
- remaining -= cur_bytes;
- cluster_offset += cur_bytes;
-
- if (remaining == 0) {
- break;
- }
-
- cur_bytes = remaining;
-
- /*
- * Now start gathering as many contiguous clusters as possible:
- *
- * 1. Check for overlaps with in-flight allocations
- *
- * a) Overlap not in the first cluster -> shorten this request and
- * let the caller handle the rest in its next loop iteration.
- *
- * b) Real overlaps of two requests. Yield and restart the search
- * for contiguous clusters (the situation could have changed
- * while we were sleeping)
- *
- * c) TODO: Request starts in the same cluster as the in-flight
- * allocation ends. Shorten the COW of the in-fight allocation,
- * set cluster_offset to write to the same cluster and set up
- * the right synchronisation between the in-flight request and
- * the new one.
- */
- ret = handle_dependencies(bs, start, &cur_bytes, m);
- if (ret == -EAGAIN) {
- /* Currently handle_dependencies() doesn't yield if we already had
- * an allocation. If it did, we would have to clean up the L2Meta
- * structs before starting over. */
- assert(*m == NULL);
- goto again;
- } else if (ret < 0) {
- return ret;
- } else if (cur_bytes == 0) {
- break;
- } else {
- /* handle_dependencies() may have decreased cur_bytes (shortened
- * the allocations below) so that the next dependency is processed
- * correctly during the next loop iteration. */
- }
-
- /*
- * 2. Count contiguous COPIED clusters.
- */
- ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- continue;
- } else if (cur_bytes == 0) {
- break;
- }
-
- /*
- * 3. If the request still hasn't completed, allocate new clusters,
- * considering any cluster_offset of steps 1c or 2.
- */
- ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- continue;
- } else {
- assert(cur_bytes == 0);
- break;
- }
- }
-
- *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
- assert(*num > 0);
- assert(*host_offset != 0);
-
- return 0;
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
- const uint8_t *buf, int buf_size)
-{
- z_stream strm1, *strm = &strm1;
- int ret, out_len;
-
- memset(strm, 0, sizeof(*strm));
-
- strm->next_in = (uint8_t *)buf;
- strm->avail_in = buf_size;
- strm->next_out = out_buf;
- strm->avail_out = out_buf_size;
-
- ret = inflateInit2(strm, -12);
- if (ret != Z_OK)
- return -1;
- ret = inflate(strm, Z_FINISH);
- out_len = strm->next_out - out_buf;
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
- out_len != out_buf_size) {
- inflateEnd(strm);
- return -1;
- }
- inflateEnd(strm);
- return 0;
-}
-
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, csize, nb_csectors, sector_offset;
- uint64_t coffset;
-
- coffset = cluster_offset & s->cluster_offset_mask;
- if (s->cluster_cache_offset != coffset) {
- nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
- sector_offset = coffset & 511;
- csize = nb_csectors * 512 - sector_offset;
- BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
- if (ret < 0) {
- return ret;
- }
- if (decompress_buffer(s->cluster_cache, s->cluster_size,
- s->cluster_data + sector_offset, csize) < 0) {
- return -EIO;
- }
- s->cluster_cache_offset = coffset;
- }
- return 0;
-}
-
-/*
- * This discards as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of discarded
- * clusters.
- */
-static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table;
- int l2_index;
- int ret;
- int i;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- /* Limit nb_clusters to one L2 table */
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t old_offset;
-
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
- if ((old_offset & L2E_OFFSET_MASK) == 0) {
- continue;
- }
-
- /* First remove L2 entries */
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- l2_table[l2_index + i] = cpu_to_be64(0);
-
- /* Then decrease the refcount */
- qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- return nb_clusters;
-}
-
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t end_offset;
- unsigned int nb_clusters;
- int ret;
-
- end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
-
- /* Round start up and end down */
- offset = align_offset(offset, s->cluster_size);
- end_offset &= ~(s->cluster_size - 1);
-
- if (offset > end_offset) {
- return 0;
- }
-
- nb_clusters = size_to_clusters(s, end_offset - offset);
-
- s->cache_discards = true;
-
- /* Each L2 table is handled by its own loop iteration */
- while (nb_clusters > 0) {
- ret = discard_single_l2(bs, offset, nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- nb_clusters -= ret;
- offset += (ret * s->cluster_size);
- }
-
- ret = 0;
-fail:
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- return ret;
-}
-
-/*
- * This zeroes as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of zeroed
- * clusters.
- */
-static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table;
- int l2_index;
- int ret;
- int i;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- /* Limit nb_clusters to one L2 table */
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t old_offset;
-
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
-
- /* Update L2 entries */
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- if (old_offset & QCOW_OFLAG_COMPRESSED) {
- l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
- qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
- } else {
- l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
- }
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- return nb_clusters;
-}
-
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int nb_clusters;
- int ret;
-
- /* The zero flag is only supported by version 3 and newer */
- if (s->qcow_version < 3) {
- return -ENOTSUP;
- }
-
- /* Each L2 table is handled by its own loop iteration */
- nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
-
- s->cache_discards = true;
-
- while (nb_clusters > 0) {
- ret = zero_single_l2(bs, offset, nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- nb_clusters -= ret;
- offset += (ret * s->cluster_size);
- }
-
- ret = 0;
-fail:
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- return ret;
-}
diff --git a/contrib/qemu/block/qcow2-refcount.c b/contrib/qemu/block/qcow2-refcount.c
deleted file mode 100644
index 1244693f39e..00000000000
--- a/contrib/qemu/block/qcow2-refcount.c
+++ /dev/null
@@ -1,1374 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length,
- int addend, enum qcow2_discard_type type);
-
-
-/*********************************************************/
-/* refcount handling */
-
-int qcow2_refcount_init(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, refcount_table_size2, i;
-
- refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
- s->refcount_table = g_malloc(refcount_table_size2);
- if (s->refcount_table_size > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
- ret = bdrv_pread(bs->file, s->refcount_table_offset,
- s->refcount_table, refcount_table_size2);
- if (ret != refcount_table_size2)
- goto fail;
- for(i = 0; i < s->refcount_table_size; i++)
- be64_to_cpus(&s->refcount_table[i]);
- }
- return 0;
- fail:
- return -ENOMEM;
-}
-
-void qcow2_refcount_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- g_free(s->refcount_table);
-}
-
-
-static int load_refcount_block(BlockDriverState *bs,
- int64_t refcount_block_offset,
- void **refcount_block)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
- ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- refcount_block);
-
- return ret;
-}
-
-/*
- * Returns the refcount of the cluster given by its index. Any non-negative
- * return value is the refcount of the cluster, negative values are -errno
- * and indicate an error.
- */
-static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
-{
- BDRVQcowState *s = bs->opaque;
- int refcount_table_index, block_index;
- int64_t refcount_block_offset;
- int ret;
- uint16_t *refcount_block;
- uint16_t refcount;
-
- refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
- if (refcount_table_index >= s->refcount_table_size)
- return 0;
- refcount_block_offset = s->refcount_table[refcount_table_index];
- if (!refcount_block_offset)
- return 0;
-
- ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- (void**) &refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- block_index = cluster_index &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
- refcount = be16_to_cpu(refcount_block[block_index]);
-
- ret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- return refcount;
-}
-
-/*
- * Rounds the refcount table size up to avoid growing the table for each single
- * refcount block that is allocated.
- */
-static unsigned int next_refcount_table_size(BDRVQcowState *s,
- unsigned int min_size)
-{
- unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
- unsigned int refcount_table_clusters =
- MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
-
- while (min_clusters > refcount_table_clusters) {
- refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
- }
-
- return refcount_table_clusters << (s->cluster_bits - 3);
-}
-
-
-/* Checks if two offsets are described by the same refcount block */
-static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
- uint64_t offset_b)
-{
- uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
- uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
-
- return (block_a == block_b);
-}
-
-/*
- * Loads a refcount block. If it doesn't exist yet, it is allocated first
- * (including growing the refcount table if needed).
- *
- * Returns 0 on success or -errno in error case
- */
-static int alloc_refcount_block(BlockDriverState *bs,
- int64_t cluster_index, uint16_t **refcount_block)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int refcount_table_index;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
-
- /* Find the refcount block for the given cluster */
- refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-
- if (refcount_table_index < s->refcount_table_size) {
-
- uint64_t refcount_block_offset =
- s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
-
- /* If it's already there, we're done */
- if (refcount_block_offset) {
- return load_refcount_block(bs, refcount_block_offset,
- (void**) refcount_block);
- }
- }
-
- /*
- * If we came here, we need to allocate something. Something is at least
- * a cluster for the new refcount block. It may also include a new refcount
- * table if the old refcount table is too small.
- *
- * Note that allocating clusters here needs some special care:
- *
- * - We can't use the normal qcow2_alloc_clusters(), it would try to
- * increase the refcount and very likely we would end up with an endless
- * recursion. Instead we must place the refcount blocks in a way that
- * they can describe them themselves.
- *
- * - We need to consider that at this point we are inside update_refcounts
- * and doing the initial refcount increase. This means that some clusters
- * have already been allocated by the caller, but their refcount isn't
- * accurate yet. free_cluster_index tells us where this allocation ends
- * as long as we don't overwrite it by freeing clusters.
- *
- * - alloc_clusters_noref and qcow2_free_clusters may load a different
- * refcount block into the cache
- */
-
- *refcount_block = NULL;
-
- /* We write to the refcount table, so we might depend on L2 tables */
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- return ret;
- }
-
- /* Allocate the refcount block itself and mark it as used */
- int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
- if (new_block < 0) {
- return new_block;
- }
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
- " at %" PRIx64 "\n",
- refcount_table_index, cluster_index << s->cluster_bits, new_block);
-#endif
-
- if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
- /* Zero the new refcount block before updating it */
- ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
- (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- memset(*refcount_block, 0, s->cluster_size);
-
- /* The block describes itself, need to update the cache */
- int block_index = (new_block >> s->cluster_bits) &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
- (*refcount_block)[block_index] = cpu_to_be16(1);
- } else {
- /* Described somewhere else. This can recurse at most twice before we
- * arrive at a block that describes itself. */
- ret = update_refcount(bs, new_block, s->cluster_size, 1,
- QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- goto fail_block;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail_block;
- }
-
- /* Initialize the new refcount block only after updating its refcount,
- * update_refcount uses the refcount cache itself */
- ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
- (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- memset(*refcount_block, 0, s->cluster_size);
- }
-
- /* Now the new refcount block needs to be written to disk */
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
- qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail_block;
- }
-
- /* If the refcount table is big enough, just hook the block up there */
- if (refcount_table_index < s->refcount_table_size) {
- uint64_t data64 = cpu_to_be64(new_block);
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
- ret = bdrv_pwrite_sync(bs->file,
- s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
- &data64, sizeof(data64));
- if (ret < 0) {
- goto fail_block;
- }
-
- s->refcount_table[refcount_table_index] = new_block;
- return 0;
- }
-
- ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- /*
- * If we come here, we need to grow the refcount table. Again, a new
- * refcount table needs some space and we can't simply allocate to avoid
- * endless recursion.
- *
- * Therefore let's grab new refcount blocks at the end of the image, which
- * will describe themselves and the new refcount table. This way we can
- * reference them only in the new table and do the switch to the new
- * refcount table at once without producing an inconsistent state in
- * between.
- */
- BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
-
- /* Calculate the number of refcount blocks needed so far */
- uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
- uint64_t blocks_used = (s->free_cluster_index +
- refcount_block_clusters - 1) / refcount_block_clusters;
-
- /* And now we need at least one block more for the new metadata */
- uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
- uint64_t last_table_size;
- uint64_t blocks_clusters;
- do {
- uint64_t table_clusters =
- size_to_clusters(s, table_size * sizeof(uint64_t));
- blocks_clusters = 1 +
- ((table_clusters + refcount_block_clusters - 1)
- / refcount_block_clusters);
- uint64_t meta_clusters = table_clusters + blocks_clusters;
-
- last_table_size = table_size;
- table_size = next_refcount_table_size(s, blocks_used +
- ((meta_clusters + refcount_block_clusters - 1)
- / refcount_block_clusters));
-
- } while (last_table_size != table_size);
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
- s->refcount_table_size, table_size);
-#endif
-
- /* Create the new refcount table and blocks */
- uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
- s->cluster_size;
- uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
- uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
- uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
-
- assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
-
- /* Fill the new refcount table */
- memcpy(new_table, s->refcount_table,
- s->refcount_table_size * sizeof(uint64_t));
- new_table[refcount_table_index] = new_block;
-
- int i;
- for (i = 0; i < blocks_clusters; i++) {
- new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
- }
-
- /* Fill the refcount blocks */
- uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
- int block = 0;
- for (i = 0; i < table_clusters + blocks_clusters; i++) {
- new_blocks[block++] = cpu_to_be16(1);
- }
-
- /* Write refcount blocks to disk */
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
- ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
- blocks_clusters * s->cluster_size);
- g_free(new_blocks);
- if (ret < 0) {
- goto fail_table;
- }
-
- /* Write refcount table to disk */
- for(i = 0; i < table_size; i++) {
- cpu_to_be64s(&new_table[i]);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
- ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
- table_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail_table;
- }
-
- for(i = 0; i < table_size; i++) {
- be64_to_cpus(&new_table[i]);
- }
-
- /* Hook up the new refcount table in the qcow2 header */
- uint8_t data[12];
- cpu_to_be64w((uint64_t*)data, table_offset);
- cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
- data, sizeof(data));
- if (ret < 0) {
- goto fail_table;
- }
-
- /* And switch it in memory */
- uint64_t old_table_offset = s->refcount_table_offset;
- uint64_t old_table_size = s->refcount_table_size;
-
- g_free(s->refcount_table);
- s->refcount_table = new_table;
- s->refcount_table_size = table_size;
- s->refcount_table_offset = table_offset;
-
- /* Free old table. Remember, we must not change free_cluster_index */
- uint64_t old_free_cluster_index = s->free_cluster_index;
- qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- s->free_cluster_index = old_free_cluster_index;
-
- ret = load_refcount_block(bs, new_block, (void**) refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- return 0;
-
-fail_table:
- g_free(new_table);
-fail_block:
- if (*refcount_block != NULL) {
- qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
- }
- return ret;
-}
-
-void qcow2_process_discards(BlockDriverState *bs, int ret)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2DiscardRegion *d, *next;
-
- QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
- QTAILQ_REMOVE(&s->discards, d, next);
-
- /* Discard is optional, ignore the return value */
- if (ret >= 0) {
- bdrv_discard(bs->file,
- d->offset >> BDRV_SECTOR_BITS,
- d->bytes >> BDRV_SECTOR_BITS);
- }
-
- g_free(d);
- }
-}
-
-static void update_refcount_discard(BlockDriverState *bs,
- uint64_t offset, uint64_t length)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2DiscardRegion *d, *p, *next;
-
- QTAILQ_FOREACH(d, &s->discards, next) {
- uint64_t new_start = MIN(offset, d->offset);
- uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
-
- if (new_end - new_start <= length + d->bytes) {
- /* There can't be any overlap, areas ending up here have no
- * references any more and therefore shouldn't get freed another
- * time. */
- assert(d->bytes + length == new_end - new_start);
- d->offset = new_start;
- d->bytes = new_end - new_start;
- goto found;
- }
- }
-
- d = g_malloc(sizeof(*d));
- *d = (Qcow2DiscardRegion) {
- .bs = bs,
- .offset = offset,
- .bytes = length,
- };
- QTAILQ_INSERT_TAIL(&s->discards, d, next);
-
-found:
- /* Merge discard requests if they are adjacent now */
- QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
- if (p == d
- || p->offset > d->offset + d->bytes
- || d->offset > p->offset + p->bytes)
- {
- continue;
- }
-
- /* Still no overlap possible */
- assert(p->offset == d->offset + d->bytes
- || d->offset == p->offset + p->bytes);
-
- QTAILQ_REMOVE(&s->discards, p, next);
- d->offset = MIN(d->offset, p->offset);
- d->bytes += p->bytes;
- }
-}
-
-/* XXX: cache several refcount block clusters ? */
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t start, last, cluster_offset;
- uint16_t *refcount_block = NULL;
- int64_t old_table_index = -1;
- int ret;
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
- offset, length, addend);
-#endif
- if (length < 0) {
- return -EINVAL;
- } else if (length == 0) {
- return 0;
- }
-
- if (addend < 0) {
- qcow2_cache_set_dependency(bs, s->refcount_block_cache,
- s->l2_table_cache);
- }
-
- start = offset & ~(s->cluster_size - 1);
- last = (offset + length - 1) & ~(s->cluster_size - 1);
- for(cluster_offset = start; cluster_offset <= last;
- cluster_offset += s->cluster_size)
- {
- int block_index, refcount;
- int64_t cluster_index = cluster_offset >> s->cluster_bits;
- int64_t table_index =
- cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-
- /* Load the refcount block and allocate it if needed */
- if (table_index != old_table_index) {
- if (refcount_block) {
- ret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
- if (ret < 0) {
- goto fail;
- }
- }
- old_table_index = table_index;
-
- qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
-
- /* we can update the count and save it */
- block_index = cluster_index &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
-
- refcount = be16_to_cpu(refcount_block[block_index]);
- refcount += addend;
- if (refcount < 0 || refcount > 0xffff) {
- ret = -EINVAL;
- goto fail;
- }
- if (refcount == 0 && cluster_index < s->free_cluster_index) {
- s->free_cluster_index = cluster_index;
- }
- refcount_block[block_index] = cpu_to_be16(refcount);
-
- if (refcount == 0 && s->discard_passthrough[type]) {
- update_refcount_discard(bs, cluster_offset, s->cluster_size);
- }
- }
-
- ret = 0;
-fail:
- if (!s->cache_discards) {
- qcow2_process_discards(bs, ret);
- }
-
- /* Write last changed block to disk */
- if (refcount_block) {
- int wret;
- wret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (wret < 0) {
- return ret < 0 ? ret : wret;
- }
- }
-
- /*
- * Try do undo any updates if an error is returned (This may succeed in
- * some cases like ENOSPC for allocating a new refcount block)
- */
- if (ret < 0) {
- int dummy;
- dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
- QCOW2_DISCARD_NEVER);
- (void)dummy;
- }
-
- return ret;
-}
-
-/*
- * Increases or decreases the refcount of a given cluster by one.
- * addend must be 1 or -1.
- *
- * If the return value is non-negative, it is the new refcount of the cluster.
- * If it is negative, it is -errno and indicates an error.
- */
-static int update_cluster_refcount(BlockDriverState *bs,
- int64_t cluster_index,
- int addend,
- enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
- type);
- if (ret < 0) {
- return ret;
- }
-
- return get_refcount(bs, cluster_index);
-}
-
-
-
-/*********************************************************/
-/* cluster allocation functions */
-
-
-
-/* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
-{
- BDRVQcowState *s = bs->opaque;
- int i, nb_clusters, refcount;
-
- nb_clusters = size_to_clusters(s, size);
-retry:
- for(i = 0; i < nb_clusters; i++) {
- int64_t next_cluster_index = s->free_cluster_index++;
- refcount = get_refcount(bs, next_cluster_index);
-
- if (refcount < 0) {
- return refcount;
- } else if (refcount != 0) {
- goto retry;
- }
- }
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
- size,
- (s->free_cluster_index - nb_clusters) << s->cluster_bits);
-#endif
- return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
-}
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
-{
- int64_t offset;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
- offset = alloc_clusters_noref(bs, size);
- if (offset < 0) {
- return offset;
- }
-
- ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- return ret;
- }
-
- return offset;
-}
-
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t cluster_index;
- uint64_t old_free_cluster_index;
- int i, refcount, ret;
-
- /* Check how many clusters there are free */
- cluster_index = offset >> s->cluster_bits;
- for(i = 0; i < nb_clusters; i++) {
- refcount = get_refcount(bs, cluster_index++);
-
- if (refcount < 0) {
- return refcount;
- } else if (refcount != 0) {
- break;
- }
- }
-
- /* And then allocate them */
- old_free_cluster_index = s->free_cluster_index;
- s->free_cluster_index = cluster_index + i;
-
- ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- return ret;
- }
-
- s->free_cluster_index = old_free_cluster_index;
-
- return i;
-}
-
-/* only used to allocate compressed sectors. We try to allocate
- contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t offset, cluster_offset;
- int free_in_cluster;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
- assert(size > 0 && size <= s->cluster_size);
- if (s->free_byte_offset == 0) {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
- }
- s->free_byte_offset = offset;
- }
- redo:
- free_in_cluster = s->cluster_size -
- (s->free_byte_offset & (s->cluster_size - 1));
- if (size <= free_in_cluster) {
- /* enough space in current cluster */
- offset = s->free_byte_offset;
- s->free_byte_offset += size;
- free_in_cluster -= size;
- if (free_in_cluster == 0)
- s->free_byte_offset = 0;
- if ((offset & (s->cluster_size - 1)) != 0)
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- } else {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
- }
- cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
- if ((cluster_offset + s->cluster_size) == offset) {
- /* we are lucky: contiguous data */
- offset = s->free_byte_offset;
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- s->free_byte_offset += size;
- } else {
- s->free_byte_offset = offset;
- goto redo;
- }
- }
-
- /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
- * or explicitly by update_cluster_refcount(). Refcount blocks must be
- * flushed before the caller's L2 table updates.
- */
- qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
- return offset;
-}
-
-void qcow2_free_clusters(BlockDriverState *bs,
- int64_t offset, int64_t size,
- enum qcow2_discard_type type)
-{
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
- ret = update_refcount(bs, offset, size, -1, type);
- if (ret < 0) {
- fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
- /* TODO Remember the clusters to free them later and avoid leaking */
- }
-}
-
-/*
- * Free a cluster using its L2 entry (handles clusters of all types, e.g.
- * normal cluster, compressed cluster, etc.)
- */
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
- int nb_clusters, enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
-
- switch (qcow2_get_cluster_type(l2_entry)) {
- case QCOW2_CLUSTER_COMPRESSED:
- {
- int nb_csectors;
- nb_csectors = ((l2_entry >> s->csize_shift) &
- s->csize_mask) + 1;
- qcow2_free_clusters(bs,
- (l2_entry & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, type);
- }
- break;
- case QCOW2_CLUSTER_NORMAL:
- qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
- nb_clusters << s->cluster_bits, type);
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- case QCOW2_CLUSTER_ZERO:
- break;
- default:
- abort();
- }
-}
-
-
-
-/*********************************************************/
-/* snapshots and image creation */
-
-
-
-/* update the refcounts of snapshots and the copied flag */
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
- int64_t l1_table_offset, int l1_size, int addend)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
- int64_t old_offset, old_l2_offset;
- int i, j, l1_modified = 0, nb_csectors, refcount;
- int ret;
-
- l2_table = NULL;
- l1_table = NULL;
- l1_size2 = l1_size * sizeof(uint64_t);
-
- s->cache_discards = true;
-
- /* WARNING: qcow2_snapshot_goto relies on this function not using the
- * l1_table_offset when it is the current s->l1_table_offset! Be careful
- * when changing this! */
- if (l1_table_offset != s->l1_table_offset) {
- l1_table = g_malloc0(align_offset(l1_size2, 512));
- l1_allocated = 1;
-
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
- if (ret < 0) {
- goto fail;
- }
-
- for(i = 0;i < l1_size; i++)
- be64_to_cpus(&l1_table[i]);
- } else {
- assert(l1_size == s->l1_size);
- l1_table = s->l1_table;
- l1_allocated = 0;
- }
-
- for(i = 0; i < l1_size; i++) {
- l2_offset = l1_table[i];
- if (l2_offset) {
- old_l2_offset = l2_offset;
- l2_offset &= L1E_OFFSET_MASK;
-
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
- (void**) &l2_table);
- if (ret < 0) {
- goto fail;
- }
-
- for(j = 0; j < s->l2_size; j++) {
- offset = be64_to_cpu(l2_table[j]);
- if (offset != 0) {
- old_offset = offset;
- offset &= ~QCOW_OFLAG_COPIED;
- if (offset & QCOW_OFLAG_COMPRESSED) {
- nb_csectors = ((offset >> s->csize_shift) &
- s->csize_mask) + 1;
- if (addend != 0) {
- int ret;
- ret = update_refcount(bs,
- (offset & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, addend,
- QCOW2_DISCARD_SNAPSHOT);
- if (ret < 0) {
- goto fail;
- }
- }
- /* compressed clusters are never modified */
- refcount = 2;
- } else {
- uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
- if (addend != 0) {
- refcount = update_cluster_refcount(bs, cluster_index, addend,
- QCOW2_DISCARD_SNAPSHOT);
- } else {
- refcount = get_refcount(bs, cluster_index);
- }
-
- if (refcount < 0) {
- ret = refcount;
- goto fail;
- }
- }
-
- if (refcount == 1) {
- offset |= QCOW_OFLAG_COPIED;
- }
- if (offset != old_offset) {
- if (addend > 0) {
- qcow2_cache_set_dependency(bs, s->l2_table_cache,
- s->refcount_block_cache);
- }
- l2_table[j] = cpu_to_be64(offset);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- }
- }
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- goto fail;
- }
-
-
- if (addend != 0) {
- refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
- QCOW2_DISCARD_SNAPSHOT);
- } else {
- refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
- }
- if (refcount < 0) {
- ret = refcount;
- goto fail;
- } else if (refcount == 1) {
- l2_offset |= QCOW_OFLAG_COPIED;
- }
- if (l2_offset != old_l2_offset) {
- l1_table[i] = l2_offset;
- l1_modified = 1;
- }
- }
- }
-
- ret = bdrv_flush(bs);
-fail:
- if (l2_table) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- }
-
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- /* Update L1 only if it isn't deleted anyway (addend = -1) */
- if (ret == 0 && addend >= 0 && l1_modified) {
- for (i = 0; i < l1_size; i++) {
- cpu_to_be64s(&l1_table[i]);
- }
-
- ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
-
- for (i = 0; i < l1_size; i++) {
- be64_to_cpus(&l1_table[i]);
- }
- }
- if (l1_allocated)
- g_free(l1_table);
- return ret;
-}
-
-
-
-
-/*********************************************************/
-/* refcount checking functions */
-
-
-
-/*
- * Increases the refcount for a range of clusters in a given refcount table.
- * This is used to construct a temporary refcount table out of L1 and L2 tables
- * which can be compared the the refcount table saved in the image.
- *
- * Modifies the number of errors in res.
- */
-static void inc_refcounts(BlockDriverState *bs,
- BdrvCheckResult *res,
- uint16_t *refcount_table,
- int refcount_table_size,
- int64_t offset, int64_t size)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t start, last, cluster_offset;
- int k;
-
- if (size <= 0)
- return;
-
- start = offset & ~(s->cluster_size - 1);
- last = (offset + size - 1) & ~(s->cluster_size - 1);
- for(cluster_offset = start; cluster_offset <= last;
- cluster_offset += s->cluster_size) {
- k = cluster_offset >> s->cluster_bits;
- if (k < 0) {
- fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
- cluster_offset);
- res->corruptions++;
- } else if (k >= refcount_table_size) {
- fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
- "the end of the image file, can't properly check refcounts.\n",
- cluster_offset);
- res->check_errors++;
- } else {
- if (++refcount_table[k] == 0) {
- fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
- "\n", cluster_offset);
- res->corruptions++;
- }
- }
- }
-}
-
-/* Flags for check_refcounts_l1() and check_refcounts_l2() */
-enum {
- CHECK_OFLAG_COPIED = 0x1, /* check QCOW_OFLAG_COPIED matches refcount */
- CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */
-};
-
-/*
- * Increases the refcount in the given refcount table for the all clusters
- * referenced in the L2 table. While doing so, performs some checks on L2
- * entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
- uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
- int flags)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table, l2_entry;
- uint64_t next_contiguous_offset = 0;
- int i, l2_size, nb_csectors, refcount;
-
- /* Read L2 table from disk */
- l2_size = s->l2_size * sizeof(uint64_t);
- l2_table = g_malloc(l2_size);
-
- if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
- goto fail;
-
- /* Do the actual checks */
- for(i = 0; i < s->l2_size; i++) {
- l2_entry = be64_to_cpu(l2_table[i]);
-
- switch (qcow2_get_cluster_type(l2_entry)) {
- case QCOW2_CLUSTER_COMPRESSED:
- /* Compressed clusters don't have QCOW_OFLAG_COPIED */
- if (l2_entry & QCOW_OFLAG_COPIED) {
- fprintf(stderr, "ERROR: cluster %" PRId64 ": "
- "copied flag must never be set for compressed "
- "clusters\n", l2_entry >> s->cluster_bits);
- l2_entry &= ~QCOW_OFLAG_COPIED;
- res->corruptions++;
- }
-
- /* Mark cluster as used */
- nb_csectors = ((l2_entry >> s->csize_shift) &
- s->csize_mask) + 1;
- l2_entry &= s->cluster_offset_mask;
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l2_entry & ~511, nb_csectors * 512);
-
- if (flags & CHECK_FRAG_INFO) {
- res->bfi.allocated_clusters++;
- res->bfi.compressed_clusters++;
-
- /* Compressed clusters are fragmented by nature. Since they
- * take up sub-sector space but we only have sector granularity
- * I/O we need to re-read the same sectors even for adjacent
- * compressed clusters.
- */
- res->bfi.fragmented_clusters++;
- }
- break;
-
- case QCOW2_CLUSTER_ZERO:
- if ((l2_entry & L2E_OFFSET_MASK) == 0) {
- break;
- }
- /* fall through */
-
- case QCOW2_CLUSTER_NORMAL:
- {
- /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
- uint64_t offset = l2_entry & L2E_OFFSET_MASK;
-
- if (flags & CHECK_OFLAG_COPIED) {
- refcount = get_refcount(bs, offset >> s->cluster_bits);
- if (refcount < 0) {
- fprintf(stderr, "Can't get refcount for offset %"
- PRIx64 ": %s\n", l2_entry, strerror(-refcount));
- goto fail;
- }
- if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
- fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
- PRIx64 " refcount=%d\n", l2_entry, refcount);
- res->corruptions++;
- }
- }
-
- if (flags & CHECK_FRAG_INFO) {
- res->bfi.allocated_clusters++;
- if (next_contiguous_offset &&
- offset != next_contiguous_offset) {
- res->bfi.fragmented_clusters++;
- }
- next_contiguous_offset = offset + s->cluster_size;
- }
-
- /* Mark cluster as used */
- inc_refcounts(bs, res, refcount_table,refcount_table_size,
- offset, s->cluster_size);
-
- /* Correct offsets are cluster aligned */
- if (offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
- "properly aligned; L2 entry corrupted.\n", offset);
- res->corruptions++;
- }
- break;
- }
-
- case QCOW2_CLUSTER_UNALLOCATED:
- break;
-
- default:
- abort();
- }
- }
-
- g_free(l2_table);
- return 0;
-
-fail:
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
- g_free(l2_table);
- return -EIO;
-}
-
-/*
- * Increases the refcount for the L1 table, its L2 tables and all referenced
- * clusters in the given refcount table. While doing so, performs some checks
- * on L1 and L2 entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l1(BlockDriverState *bs,
- BdrvCheckResult *res,
- uint16_t *refcount_table,
- int refcount_table_size,
- int64_t l1_table_offset, int l1_size,
- int flags)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, l2_offset, l1_size2;
- int i, refcount, ret;
-
- l1_size2 = l1_size * sizeof(uint64_t);
-
- /* Mark L1 table as used */
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l1_table_offset, l1_size2);
-
- /* Read L1 table entries from disk */
- if (l1_size2 == 0) {
- l1_table = NULL;
- } else {
- l1_table = g_malloc(l1_size2);
- if (bdrv_pread(bs->file, l1_table_offset,
- l1_table, l1_size2) != l1_size2)
- goto fail;
- for(i = 0;i < l1_size; i++)
- be64_to_cpus(&l1_table[i]);
- }
-
- /* Do the actual checks */
- for(i = 0; i < l1_size; i++) {
- l2_offset = l1_table[i];
- if (l2_offset) {
- /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
- if (flags & CHECK_OFLAG_COPIED) {
- refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
- >> s->cluster_bits);
- if (refcount < 0) {
- fprintf(stderr, "Can't get refcount for l2_offset %"
- PRIx64 ": %s\n", l2_offset, strerror(-refcount));
- goto fail;
- }
- if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
- fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
- " refcount=%d\n", l2_offset, refcount);
- res->corruptions++;
- }
- }
-
- /* Mark L2 table as used */
- l2_offset &= L1E_OFFSET_MASK;
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l2_offset, s->cluster_size);
-
- /* L2 tables are cluster aligned */
- if (l2_offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
- "cluster aligned; L1 entry corrupted\n", l2_offset);
- res->corruptions++;
- }
-
- /* Process and check L2 entries */
- ret = check_refcounts_l2(bs, res, refcount_table,
- refcount_table_size, l2_offset, flags);
- if (ret < 0) {
- goto fail;
- }
- }
- }
- g_free(l1_table);
- return 0;
-
-fail:
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
- res->check_errors++;
- g_free(l1_table);
- return -EIO;
-}
-
-/*
- * Checks an image for refcount consistency.
- *
- * Returns 0 if no errors are found, the number of errors in case the image is
- * detected as corrupted, and -errno when an internal error occurred.
- */
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t size, i, highest_cluster;
- int nb_clusters, refcount1, refcount2;
- QCowSnapshot *sn;
- uint16_t *refcount_table;
- int ret;
-
- size = bdrv_getlength(bs->file);
- nb_clusters = size_to_clusters(s, size);
- refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
-
- res->bfi.total_clusters =
- size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
-
- /* header */
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- 0, s->cluster_size);
-
- /* current L1 table */
- ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
- s->l1_table_offset, s->l1_size,
- CHECK_OFLAG_COPIED | CHECK_FRAG_INFO);
- if (ret < 0) {
- goto fail;
- }
-
- /* snapshots */
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
- sn->l1_table_offset, sn->l1_size, 0);
- if (ret < 0) {
- goto fail;
- }
- }
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- s->snapshots_offset, s->snapshots_size);
-
- /* refcount data */
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- s->refcount_table_offset,
- s->refcount_table_size * sizeof(uint64_t));
-
- for(i = 0; i < s->refcount_table_size; i++) {
- uint64_t offset, cluster;
- offset = s->refcount_table[i];
- cluster = offset >> s->cluster_bits;
-
- /* Refcount blocks are cluster aligned */
- if (offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
- "cluster aligned; refcount table entry corrupted\n", i);
- res->corruptions++;
- continue;
- }
-
- if (cluster >= nb_clusters) {
- fprintf(stderr, "ERROR refcount block %" PRId64
- " is outside image\n", i);
- res->corruptions++;
- continue;
- }
-
- if (offset != 0) {
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- offset, s->cluster_size);
- if (refcount_table[cluster] != 1) {
- fprintf(stderr, "ERROR refcount block %" PRId64
- " refcount=%d\n",
- i, refcount_table[cluster]);
- res->corruptions++;
- }
- }
- }
-
- /* compare ref counts */
- for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
- refcount1 = get_refcount(bs, i);
- if (refcount1 < 0) {
- fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
- i, strerror(-refcount1));
- res->check_errors++;
- continue;
- }
-
- refcount2 = refcount_table[i];
-
- if (refcount1 > 0 || refcount2 > 0) {
- highest_cluster = i;
- }
-
- if (refcount1 != refcount2) {
-
- /* Check if we're allowed to fix the mismatch */
- int *num_fixed = NULL;
- if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
- num_fixed = &res->leaks_fixed;
- } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
- num_fixed = &res->corruptions_fixed;
- }
-
- fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
- num_fixed != NULL ? "Repairing" :
- refcount1 < refcount2 ? "ERROR" :
- "Leaked",
- i, refcount1, refcount2);
-
- if (num_fixed) {
- ret = update_refcount(bs, i << s->cluster_bits, 1,
- refcount2 - refcount1,
- QCOW2_DISCARD_ALWAYS);
- if (ret >= 0) {
- (*num_fixed)++;
- continue;
- }
- }
-
- /* And if we couldn't, print an error */
- if (refcount1 < refcount2) {
- res->corruptions++;
- } else {
- res->leaks++;
- }
- }
- }
-
- res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
- ret = 0;
-
-fail:
- g_free(refcount_table);
-
- return ret;
-}
-
diff --git a/contrib/qemu/block/qcow2-snapshot.c b/contrib/qemu/block/qcow2-snapshot.c
deleted file mode 100644
index 0caac9055f8..00000000000
--- a/contrib/qemu/block/qcow2-snapshot.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-
-typedef struct QEMU_PACKED QCowSnapshotHeader {
- /* header is 8 byte aligned */
- uint64_t l1_table_offset;
-
- uint32_t l1_size;
- uint16_t id_str_size;
- uint16_t name_size;
-
- uint32_t date_sec;
- uint32_t date_nsec;
-
- uint64_t vm_clock_nsec;
-
- uint32_t vm_state_size;
- uint32_t extra_data_size; /* for extension */
- /* extra data follows */
- /* id_str follows */
- /* name follows */
-} QCowSnapshotHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotExtraData {
- uint64_t vm_state_size_large;
- uint64_t disk_size;
-} QCowSnapshotExtraData;
-
-void qcow2_free_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- g_free(s->snapshots[i].name);
- g_free(s->snapshots[i].id_str);
- }
- g_free(s->snapshots);
- s->snapshots = NULL;
- s->nb_snapshots = 0;
-}
-
-int qcow2_read_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshotHeader h;
- QCowSnapshotExtraData extra;
- QCowSnapshot *sn;
- int i, id_str_size, name_size;
- int64_t offset;
- uint32_t extra_data_size;
- int ret;
-
- if (!s->nb_snapshots) {
- s->snapshots = NULL;
- s->snapshots_size = 0;
- return 0;
- }
-
- offset = s->snapshots_offset;
- s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
-
- for(i = 0; i < s->nb_snapshots; i++) {
- /* Read statically sized part of the snapshot header */
- offset = align_offset(offset, 8);
- ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
- if (ret < 0) {
- goto fail;
- }
-
- offset += sizeof(h);
- sn = s->snapshots + i;
- sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
- sn->l1_size = be32_to_cpu(h.l1_size);
- sn->vm_state_size = be32_to_cpu(h.vm_state_size);
- sn->date_sec = be32_to_cpu(h.date_sec);
- sn->date_nsec = be32_to_cpu(h.date_nsec);
- sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
- extra_data_size = be32_to_cpu(h.extra_data_size);
-
- id_str_size = be16_to_cpu(h.id_str_size);
- name_size = be16_to_cpu(h.name_size);
-
- /* Read extra data */
- ret = bdrv_pread(bs->file, offset, &extra,
- MIN(sizeof(extra), extra_data_size));
- if (ret < 0) {
- goto fail;
- }
- offset += extra_data_size;
-
- if (extra_data_size >= 8) {
- sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
- }
-
- if (extra_data_size >= 16) {
- sn->disk_size = be64_to_cpu(extra.disk_size);
- } else {
- sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- }
-
- /* Read snapshot ID */
- sn->id_str = g_malloc(id_str_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
- if (ret < 0) {
- goto fail;
- }
- offset += id_str_size;
- sn->id_str[id_str_size] = '\0';
-
- /* Read snapshot name */
- sn->name = g_malloc(name_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->name, name_size);
- if (ret < 0) {
- goto fail;
- }
- offset += name_size;
- sn->name[name_size] = '\0';
- }
-
- s->snapshots_size = offset - s->snapshots_offset;
- return 0;
-
-fail:
- qcow2_free_snapshots(bs);
- return ret;
-}
-
-/* add at the end of the file a new list of snapshots */
-static int qcow2_write_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- QCowSnapshotHeader h;
- QCowSnapshotExtraData extra;
- int i, name_size, id_str_size, snapshots_size;
- struct {
- uint32_t nb_snapshots;
- uint64_t snapshots_offset;
- } QEMU_PACKED header_data;
- int64_t offset, snapshots_offset;
- int ret;
-
- /* compute the size of the snapshots */
- offset = 0;
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- offset = align_offset(offset, 8);
- offset += sizeof(h);
- offset += sizeof(extra);
- offset += strlen(sn->id_str);
- offset += strlen(sn->name);
- }
- snapshots_size = offset;
-
- /* Allocate space for the new snapshot list */
- snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
- offset = snapshots_offset;
- if (offset < 0) {
- return offset;
- }
- ret = bdrv_flush(bs);
- if (ret < 0) {
- return ret;
- }
-
- /* Write all snapshots to the new list */
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- memset(&h, 0, sizeof(h));
- h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
- h.l1_size = cpu_to_be32(sn->l1_size);
- /* If it doesn't fit in 32 bit, older implementations should treat it
- * as a disk-only snapshot rather than truncate the VM state */
- if (sn->vm_state_size <= 0xffffffff) {
- h.vm_state_size = cpu_to_be32(sn->vm_state_size);
- }
- h.date_sec = cpu_to_be32(sn->date_sec);
- h.date_nsec = cpu_to_be32(sn->date_nsec);
- h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
- h.extra_data_size = cpu_to_be32(sizeof(extra));
-
- memset(&extra, 0, sizeof(extra));
- extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
- extra.disk_size = cpu_to_be64(sn->disk_size);
-
- id_str_size = strlen(sn->id_str);
- name_size = strlen(sn->name);
- h.id_str_size = cpu_to_be16(id_str_size);
- h.name_size = cpu_to_be16(name_size);
- offset = align_offset(offset, 8);
-
- ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
- if (ret < 0) {
- goto fail;
- }
- offset += sizeof(h);
-
- ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
- if (ret < 0) {
- goto fail;
- }
- offset += sizeof(extra);
-
- ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
- if (ret < 0) {
- goto fail;
- }
- offset += id_str_size;
-
- ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
- if (ret < 0) {
- goto fail;
- }
- offset += name_size;
- }
-
- /*
- * Update the header to point to the new snapshot table. This requires the
- * new table and its refcounts to be stable on disk.
- */
- ret = bdrv_flush(bs);
- if (ret < 0) {
- goto fail;
- }
-
- QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
- offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
-
- header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
- header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
-
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
- &header_data, sizeof(header_data));
- if (ret < 0) {
- goto fail;
- }
-
- /* free the old snapshot table */
- qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
- QCOW2_DISCARD_SNAPSHOT);
- s->snapshots_offset = snapshots_offset;
- s->snapshots_size = snapshots_size;
- return 0;
-
-fail:
- return ret;
-}
-
-static void find_new_snapshot_id(BlockDriverState *bs,
- char *id_str, int id_str_size)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- int i, id, id_max = 0;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- id = strtoul(sn->id_str, NULL, 10);
- if (id > id_max)
- id_max = id;
- }
- snprintf(id_str, id_str_size, "%d", id_max + 1);
-}
-
-static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- if (!strcmp(s->snapshots[i].id_str, id_str))
- return i;
- }
- return -1;
-}
-
-static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
-{
- BDRVQcowState *s = bs->opaque;
- int i, ret;
-
- ret = find_snapshot_by_id(bs, name);
- if (ret >= 0)
- return ret;
- for(i = 0; i < s->nb_snapshots; i++) {
- if (!strcmp(s->snapshots[i].name, name))
- return i;
- }
- return -1;
-}
-
-/* if no id is provided, a new one is constructed */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *new_snapshot_list = NULL;
- QCowSnapshot *old_snapshot_list = NULL;
- QCowSnapshot sn1, *sn = &sn1;
- int i, ret;
- uint64_t *l1_table = NULL;
- int64_t l1_table_offset;
-
- memset(sn, 0, sizeof(*sn));
-
- /* Generate an ID if it wasn't passed */
- if (sn_info->id_str[0] == '\0') {
- find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
- }
-
- /* Check that the ID is unique */
- if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
- return -EEXIST;
- }
-
- /* Populate sn with passed data */
- sn->id_str = g_strdup(sn_info->id_str);
- sn->name = g_strdup(sn_info->name);
-
- sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- sn->vm_state_size = sn_info->vm_state_size;
- sn->date_sec = sn_info->date_sec;
- sn->date_nsec = sn_info->date_nsec;
- sn->vm_clock_nsec = sn_info->vm_clock_nsec;
-
- /* Allocate the L1 table of the snapshot and copy the current one there. */
- l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
- if (l1_table_offset < 0) {
- ret = l1_table_offset;
- goto fail;
- }
-
- sn->l1_table_offset = l1_table_offset;
- sn->l1_size = s->l1_size;
-
- l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
- for(i = 0; i < s->l1_size; i++) {
- l1_table[i] = cpu_to_be64(s->l1_table[i]);
- }
-
- ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
-
- g_free(l1_table);
- l1_table = NULL;
-
- /*
- * Increase the refcounts of all clusters and make sure everything is
- * stable on disk before updating the snapshot table to contain a pointer
- * to the new L1 table.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
- if (ret < 0) {
- goto fail;
- }
-
- /* Append the new snapshot to the snapshot list */
- new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
- if (s->snapshots) {
- memcpy(new_snapshot_list, s->snapshots,
- s->nb_snapshots * sizeof(QCowSnapshot));
- old_snapshot_list = s->snapshots;
- }
- s->snapshots = new_snapshot_list;
- s->snapshots[s->nb_snapshots++] = *sn;
-
- ret = qcow2_write_snapshots(bs);
- if (ret < 0) {
- g_free(s->snapshots);
- s->snapshots = old_snapshot_list;
- goto fail;
- }
-
- g_free(old_snapshot_list);
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-
-fail:
- g_free(sn->id_str);
- g_free(sn->name);
- g_free(l1_table);
-
- return ret;
-}
-
-/* copy the snapshot 'snapshot_name' into the current disk image */
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- int i, snapshot_index;
- int cur_l1_bytes, sn_l1_bytes;
- int ret;
- uint64_t *sn_l1_table = NULL;
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = &s->snapshots[snapshot_index];
-
- if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
- error_report("qcow2: Loading snapshots with different disk "
- "size is not implemented");
- ret = -ENOTSUP;
- goto fail;
- }
-
- /*
- * Make sure that the current L1 table is big enough to contain the whole
- * L1 table of the snapshot. If the snapshot L1 table is smaller, the
- * current one must be padded with zeros.
- */
- ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
- if (ret < 0) {
- goto fail;
- }
-
- cur_l1_bytes = s->l1_size * sizeof(uint64_t);
- sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
-
- /*
- * Copy the snapshot L1 table to the current L1 table.
- *
- * Before overwriting the old current L1 table on disk, make sure to
- * increase all refcounts for the clusters referenced by the new one.
- * Decrease the refcount referenced by the old one only when the L1
- * table is overwritten.
- */
- sn_l1_table = g_malloc0(cur_l1_bytes);
-
- ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
- if (ret < 0) {
- goto fail;
- }
-
- ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
- sn->l1_size, 1);
- if (ret < 0) {
- goto fail;
- }
-
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
- cur_l1_bytes);
- if (ret < 0) {
- goto fail;
- }
-
- /*
- * Decrease refcount of clusters of current L1 table.
- *
- * At this point, the in-memory s->l1_table points to the old L1 table,
- * whereas on disk we already have the new one.
- *
- * qcow2_update_snapshot_refcount special cases the current L1 table to use
- * the in-memory data instead of really using the offset to load a new one,
- * which is why this works.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
- s->l1_size, -1);
-
- /*
- * Now update the in-memory L1 table to be in sync with the on-disk one. We
- * need to do this even if updating refcounts failed.
- */
- for(i = 0;i < s->l1_size; i++) {
- s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
- }
-
- if (ret < 0) {
- goto fail;
- }
-
- g_free(sn_l1_table);
- sn_l1_table = NULL;
-
- /*
- * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
- * when we decreased the refcount of the old snapshot.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
- if (ret < 0) {
- goto fail;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-
-fail:
- g_free(sn_l1_table);
- return ret;
-}
-
-int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot sn;
- int snapshot_index, ret;
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = s->snapshots[snapshot_index];
-
- /* Remove it from the snapshot list */
- memmove(s->snapshots + snapshot_index,
- s->snapshots + snapshot_index + 1,
- (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
- s->nb_snapshots--;
- ret = qcow2_write_snapshots(bs);
- if (ret < 0) {
- return ret;
- }
-
- /*
- * The snapshot is now unused, clean up. If we fail after this point, we
- * won't recover but just leak clusters.
- */
- g_free(sn.id_str);
- g_free(sn.name);
-
- /*
- * Now decrease the refcounts of clusters referenced by the snapshot and
- * free the L1 table.
- */
- ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
- sn.l1_size, -1);
- if (ret < 0) {
- return ret;
- }
- qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
- QCOW2_DISCARD_SNAPSHOT);
-
- /* must update the copied flag on the current cluster offsets */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
- if (ret < 0) {
- return ret;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-}
-
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
-{
- BDRVQcowState *s = bs->opaque;
- QEMUSnapshotInfo *sn_tab, *sn_info;
- QCowSnapshot *sn;
- int i;
-
- if (!s->nb_snapshots) {
- *psn_tab = NULL;
- return s->nb_snapshots;
- }
-
- sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
- for(i = 0; i < s->nb_snapshots; i++) {
- sn_info = sn_tab + i;
- sn = s->snapshots + i;
- pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
- sn->id_str);
- pstrcpy(sn_info->name, sizeof(sn_info->name),
- sn->name);
- sn_info->vm_state_size = sn->vm_state_size;
- sn_info->date_sec = sn->date_sec;
- sn_info->date_nsec = sn->date_nsec;
- sn_info->vm_clock_nsec = sn->vm_clock_nsec;
- }
- *psn_tab = sn_tab;
- return s->nb_snapshots;
-}
-
-int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
-{
- int i, snapshot_index;
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- uint64_t *new_l1_table;
- int new_l1_bytes;
- int ret;
-
- assert(bs->read_only);
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = &s->snapshots[snapshot_index];
-
- /* Allocate and read in the snapshot's L1 table */
- new_l1_bytes = s->l1_size * sizeof(uint64_t);
- new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
-
- ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
- if (ret < 0) {
- g_free(new_l1_table);
- return ret;
- }
-
- /* Switch the L1 table */
- g_free(s->l1_table);
-
- s->l1_size = sn->l1_size;
- s->l1_table_offset = sn->l1_table_offset;
- s->l1_table = new_l1_table;
-
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
-
- return 0;
-}
diff --git a/contrib/qemu/block/qcow2.c b/contrib/qemu/block/qcow2.c
deleted file mode 100644
index 0eceefe2cd9..00000000000
--- a/contrib/qemu/block/qcow2.c
+++ /dev/null
@@ -1,1825 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "qemu/aes.h"
-#include "block/qcow2.h"
-#include "qemu/error-report.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qbool.h"
-#include "trace.h"
-
-/*
- Differences with QCOW:
-
- - Support for multiple incremental snapshots.
- - Memory management by reference counts.
- - Clusters which have a reference count of one have the bit
- QCOW_OFLAG_COPIED to optimize write performance.
- - Size of compressed clusters is stored in sectors to reduce bit usage
- in the cluster offsets.
- - Support for storing additional data (such as the VM state) in the
- snapshots.
- - If a backing store is used, the cluster size is not constrained
- (could be backported to QCOW).
- - L2 tables have always a size of one cluster.
-*/
-
-
-typedef struct {
- uint32_t magic;
- uint32_t len;
-} QCowExtension;
-
-#define QCOW2_EXT_MAGIC_END 0
-#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
-#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
-
-static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
- const QCowHeader *cow_header = (const void *)buf;
-
- if (buf_size >= sizeof(QCowHeader) &&
- be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
- be32_to_cpu(cow_header->version) >= 2)
- return 100;
- else
- return 0;
-}
-
-
-/*
- * read qcow2 extension and fill bs
- * start reading from start_offset
- * finish reading upon magic of value 0 or when end_offset reached
- * unknown magic is skipped (future extension this version knows nothing about)
- * return 0 upon success, non-0 otherwise
- */
-static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
- uint64_t end_offset, void **p_feature_table)
-{
- BDRVQcowState *s = bs->opaque;
- QCowExtension ext;
- uint64_t offset;
- int ret;
-
-#ifdef DEBUG_EXT
- printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
-#endif
- offset = start_offset;
- while (offset < end_offset) {
-
-#ifdef DEBUG_EXT
- /* Sanity check */
- if (offset > s->cluster_size)
- printf("qcow2_read_extension: suspicious offset %lu\n", offset);
-
- printf("attempting to read extended header in offset %lu\n", offset);
-#endif
-
- if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
- fprintf(stderr, "qcow2_read_extension: ERROR: "
- "pread fail from offset %" PRIu64 "\n",
- offset);
- return 1;
- }
- be32_to_cpus(&ext.magic);
- be32_to_cpus(&ext.len);
- offset += sizeof(ext);
-#ifdef DEBUG_EXT
- printf("ext.magic = 0x%x\n", ext.magic);
-#endif
- if (ext.len > end_offset - offset) {
- error_report("Header extension too large");
- return -EINVAL;
- }
-
- switch (ext.magic) {
- case QCOW2_EXT_MAGIC_END:
- return 0;
-
- case QCOW2_EXT_MAGIC_BACKING_FORMAT:
- if (ext.len >= sizeof(bs->backing_format)) {
- fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
- " (>=%zu)\n",
- ext.len, sizeof(bs->backing_format));
- return 2;
- }
- if (bdrv_pread(bs->file, offset , bs->backing_format,
- ext.len) != ext.len)
- return 3;
- bs->backing_format[ext.len] = '\0';
-#ifdef DEBUG_EXT
- printf("Qcow2: Got format extension %s\n", bs->backing_format);
-#endif
- break;
-
- case QCOW2_EXT_MAGIC_FEATURE_TABLE:
- if (p_feature_table != NULL) {
- void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
- ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
- if (ret < 0) {
- return ret;
- }
-
- *p_feature_table = feature_table;
- }
- break;
-
- default:
- /* unknown magic - save it in case we need to rewrite the header */
- {
- Qcow2UnknownHeaderExtension *uext;
-
- uext = g_malloc0(sizeof(*uext) + ext.len);
- uext->magic = ext.magic;
- uext->len = ext.len;
- QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
-
- ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
- if (ret < 0) {
- return ret;
- }
- }
- break;
- }
-
- offset += ((ext.len + 7) & ~7);
- }
-
- return 0;
-}
-
-static void cleanup_unknown_header_ext(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2UnknownHeaderExtension *uext, *next;
-
- QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
- QLIST_REMOVE(uext, next);
- g_free(uext);
- }
-}
-
-static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs,
- const char *fmt, ...)
-{
- char msg[64];
- va_list ap;
-
- va_start(ap, fmt);
- vsnprintf(msg, sizeof(msg), fmt, ap);
- va_end(ap);
-
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "qcow2", msg);
-}
-
-static void report_unsupported_feature(BlockDriverState *bs,
- Qcow2Feature *table, uint64_t mask)
-{
- while (table && table->name[0] != '\0') {
- if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
- if (mask & (1 << table->bit)) {
- report_unsupported(bs, "%.46s",table->name);
- mask &= ~(1 << table->bit);
- }
- }
- table++;
- }
-
- if (mask) {
- report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask);
- }
-}
-
-/*
- * Sets the dirty bit and flushes afterwards if necessary.
- *
- * The incompatible_features bit is only set if the image file header was
- * updated successfully. Therefore it is not required to check the return
- * value of this function.
- */
-int qcow2_mark_dirty(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t val;
- int ret;
-
- assert(s->qcow_version >= 3);
-
- if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
- return 0; /* already dirty */
- }
-
- val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
- ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
- &val, sizeof(val));
- if (ret < 0) {
- return ret;
- }
- ret = bdrv_flush(bs->file);
- if (ret < 0) {
- return ret;
- }
-
- /* Only treat image as dirty if the header was updated successfully */
- s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
- return 0;
-}
-
-/*
- * Clears the dirty bit and flushes before if necessary. Only call this
- * function when there are no pending requests, it does not guard against
- * concurrent requests dirtying the image.
- */
-static int qcow2_mark_clean(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
-
- if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
- int ret = bdrv_flush(bs);
- if (ret < 0) {
- return ret;
- }
-
- s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
- return qcow2_update_header(bs);
- }
- return 0;
-}
-
-static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
- BdrvCheckMode fix)
-{
- int ret = qcow2_check_refcounts(bs, result, fix);
- if (ret < 0) {
- return ret;
- }
-
- if (fix && result->check_errors == 0 && result->corruptions == 0) {
- return qcow2_mark_clean(bs);
- }
- return ret;
-}
-
-static QemuOptsList qcow2_runtime_opts = {
- .name = "qcow2",
- .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
- .desc = {
- {
- .name = "lazy_refcounts",
- .type = QEMU_OPT_BOOL,
- .help = "Postpone refcount updates",
- },
- {
- .name = QCOW2_OPT_DISCARD_REQUEST,
- .type = QEMU_OPT_BOOL,
- .help = "Pass guest discard requests to the layer below",
- },
- {
- .name = QCOW2_OPT_DISCARD_SNAPSHOT,
- .type = QEMU_OPT_BOOL,
- .help = "Generate discard requests when snapshot related space "
- "is freed",
- },
- {
- .name = QCOW2_OPT_DISCARD_OTHER,
- .type = QEMU_OPT_BOOL,
- .help = "Generate discard requests when other clusters are freed",
- },
- { /* end of list */ }
- },
-};
-
-static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQcowState *s = bs->opaque;
- int len, i, ret = 0;
- QCowHeader header;
- QemuOpts *opts;
- Error *local_err = NULL;
- uint64_t ext_end;
- uint64_t l1_vm_state_index;
-
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
- if (ret < 0) {
- goto fail;
- }
- be32_to_cpus(&header.magic);
- be32_to_cpus(&header.version);
- be64_to_cpus(&header.backing_file_offset);
- be32_to_cpus(&header.backing_file_size);
- be64_to_cpus(&header.size);
- be32_to_cpus(&header.cluster_bits);
- be32_to_cpus(&header.crypt_method);
- be64_to_cpus(&header.l1_table_offset);
- be32_to_cpus(&header.l1_size);
- be64_to_cpus(&header.refcount_table_offset);
- be32_to_cpus(&header.refcount_table_clusters);
- be64_to_cpus(&header.snapshots_offset);
- be32_to_cpus(&header.nb_snapshots);
-
- if (header.magic != QCOW_MAGIC) {
- ret = -EMEDIUMTYPE;
- goto fail;
- }
- if (header.version < 2 || header.version > 3) {
- report_unsupported(bs, "QCOW version %d", header.version);
- ret = -ENOTSUP;
- goto fail;
- }
-
- s->qcow_version = header.version;
-
- /* Initialise version 3 header fields */
- if (header.version == 2) {
- header.incompatible_features = 0;
- header.compatible_features = 0;
- header.autoclear_features = 0;
- header.refcount_order = 4;
- header.header_length = 72;
- } else {
- be64_to_cpus(&header.incompatible_features);
- be64_to_cpus(&header.compatible_features);
- be64_to_cpus(&header.autoclear_features);
- be32_to_cpus(&header.refcount_order);
- be32_to_cpus(&header.header_length);
- }
-
- if (header.header_length > sizeof(header)) {
- s->unknown_header_fields_size = header.header_length - sizeof(header);
- s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
- ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
- s->unknown_header_fields_size);
- if (ret < 0) {
- goto fail;
- }
- }
-
- if (header.backing_file_offset) {
- ext_end = header.backing_file_offset;
- } else {
- ext_end = 1 << header.cluster_bits;
- }
-
- /* Handle feature bits */
- s->incompatible_features = header.incompatible_features;
- s->compatible_features = header.compatible_features;
- s->autoclear_features = header.autoclear_features;
-
- if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
- void *feature_table = NULL;
- qcow2_read_extensions(bs, header.header_length, ext_end,
- &feature_table);
- report_unsupported_feature(bs, feature_table,
- s->incompatible_features &
- ~QCOW2_INCOMPAT_MASK);
- ret = -ENOTSUP;
- goto fail;
- }
-
- /* Check support for various header values */
- if (header.refcount_order != 4) {
- report_unsupported(bs, "%d bit reference counts",
- 1 << header.refcount_order);
- ret = -ENOTSUP;
- goto fail;
- }
-
- if (header.cluster_bits < MIN_CLUSTER_BITS ||
- header.cluster_bits > MAX_CLUSTER_BITS) {
- ret = -EINVAL;
- goto fail;
- }
- if (header.crypt_method > QCOW_CRYPT_AES) {
- ret = -EINVAL;
- goto fail;
- }
- s->crypt_method_header = header.crypt_method;
- if (s->crypt_method_header) {
- bs->encrypted = 1;
- }
- s->cluster_bits = header.cluster_bits;
- s->cluster_size = 1 << s->cluster_bits;
- s->cluster_sectors = 1 << (s->cluster_bits - 9);
- s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
- s->l2_size = 1 << s->l2_bits;
- bs->total_sectors = header.size / 512;
- s->csize_shift = (62 - (s->cluster_bits - 8));
- s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
- s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
- s->refcount_table_offset = header.refcount_table_offset;
- s->refcount_table_size =
- header.refcount_table_clusters << (s->cluster_bits - 3);
-
- s->snapshots_offset = header.snapshots_offset;
- s->nb_snapshots = header.nb_snapshots;
-
- /* read the level 1 table */
- s->l1_size = header.l1_size;
-
- l1_vm_state_index = size_to_l1(s, header.size);
- if (l1_vm_state_index > INT_MAX) {
- ret = -EFBIG;
- goto fail;
- }
- s->l1_vm_state_index = l1_vm_state_index;
-
- /* the L1 table must contain at least enough entries to put
- header.size bytes */
- if (s->l1_size < s->l1_vm_state_index) {
- ret = -EINVAL;
- goto fail;
- }
- s->l1_table_offset = header.l1_table_offset;
- if (s->l1_size > 0) {
- s->l1_table = g_malloc0(
- align_offset(s->l1_size * sizeof(uint64_t), 512));
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
- }
-
- /* alloc L2 table/refcount block cache */
- s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
- s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
-
- s->cluster_cache = g_malloc(s->cluster_size);
- /* one more sector for decompressed data alignment */
- s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
- + 512);
- s->cluster_cache_offset = -1;
- s->flags = flags;
-
- ret = qcow2_refcount_init(bs);
- if (ret != 0) {
- goto fail;
- }
-
- QLIST_INIT(&s->cluster_allocs);
- QTAILQ_INIT(&s->discards);
-
- /* read qcow2 extensions */
- if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
- ret = -EINVAL;
- goto fail;
- }
-
- /* read the backing file name */
- if (header.backing_file_offset != 0) {
- len = header.backing_file_size;
- if (len > 1023) {
- len = 1023;
- }
- ret = bdrv_pread(bs->file, header.backing_file_offset,
- bs->backing_file, len);
- if (ret < 0) {
- goto fail;
- }
- bs->backing_file[len] = '\0';
- }
-
- ret = qcow2_read_snapshots(bs);
- if (ret < 0) {
- goto fail;
- }
-
- /* Clear unknown autoclear feature bits */
- if (!bs->read_only && s->autoclear_features != 0) {
- s->autoclear_features = 0;
- ret = qcow2_update_header(bs);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* Initialise locks */
- qemu_co_mutex_init(&s->lock);
-
- /* Repair image if dirty */
- if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
- (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
- BdrvCheckResult result = {0};
-
- ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* Enable lazy_refcounts according to image and command line options */
- opts = qemu_opts_create_nofail(&qcow2_runtime_opts);
- qemu_opts_absorb_qdict(opts, options, &local_err);
- if (error_is_set(&local_err)) {
- qerror_report_err(local_err);
- error_free(local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
- (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
-
- s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
- s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
- s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
- flags & BDRV_O_UNMAP);
- s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
- s->discard_passthrough[QCOW2_DISCARD_OTHER] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
-
- qemu_opts_del(opts);
-
- if (s->use_lazy_refcounts && s->qcow_version < 3) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require "
- "a qcow2 image with at least qemu 1.1 compatibility level");
- ret = -EINVAL;
- goto fail;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return ret;
-
- fail:
- g_free(s->unknown_header_fields);
- cleanup_unknown_header_ext(bs);
- qcow2_free_snapshots(bs);
- qcow2_refcount_close(bs);
- g_free(s->l1_table);
- if (s->l2_table_cache) {
- qcow2_cache_destroy(bs, s->l2_table_cache);
- }
- g_free(s->cluster_cache);
- qemu_vfree(s->cluster_data);
- return ret;
-}
-
-static int qcow2_set_key(BlockDriverState *bs, const char *key)
-{
- BDRVQcowState *s = bs->opaque;
- uint8_t keybuf[16];
- int len, i;
-
- memset(keybuf, 0, 16);
- len = strlen(key);
- if (len > 16)
- len = 16;
- /* XXX: we could compress the chars to 7 bits to increase
- entropy */
- for(i = 0;i < len;i++) {
- keybuf[i] = key[i];
- }
- s->crypt_method = s->crypt_method_header;
-
- if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
- return -1;
- if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
- return -1;
-#if 0
- /* test */
- {
- uint8_t in[16];
- uint8_t out[16];
- uint8_t tmp[16];
- for(i=0;i<16;i++)
- in[i] = i;
- AES_encrypt(in, tmp, &s->aes_encrypt_key);
- AES_decrypt(tmp, out, &s->aes_decrypt_key);
- for(i = 0; i < 16; i++)
- printf(" %02x", tmp[i]);
- printf("\n");
- for(i = 0; i < 16; i++)
- printf(" %02x", out[i]);
- printf("\n");
- }
-#endif
- return 0;
-}
-
-/* We have nothing to do for QCOW2 reopen, stubs just return
- * success */
-static int qcow2_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t cluster_offset;
- int ret;
-
- *pnum = nb_sectors;
- /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
- * can't pass them on today */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
- qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- *pnum = 0;
- }
-
- return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
-}
-
-/* handle reading after the end of the backing file */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors)
-{
- int n1;
- if ((sector_num + nb_sectors) <= bs->total_sectors)
- return nb_sectors;
- if (sector_num >= bs->total_sectors)
- n1 = 0;
- else
- n1 = bs->total_sectors - sector_num;
-
- qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
-
- return n1;
-}
-
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
- int remaining_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster, n1;
- int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
- uint64_t cluster_offset = 0;
- uint64_t bytes_done = 0;
- QEMUIOVector hd_qiov;
- uint8_t *cluster_data = NULL;
-
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
- qemu_co_mutex_lock(&s->lock);
-
- while (remaining_sectors != 0) {
-
- /* prepare next request */
- cur_nr_sectors = remaining_sectors;
- if (s->crypt_method) {
- cur_nr_sectors = MIN(cur_nr_sectors,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
- }
-
- ret = qcow2_get_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset);
- if (ret < 0) {
- goto fail;
- }
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
-
- switch (ret) {
- case QCOW2_CLUSTER_UNALLOCATED:
-
- if (bs->backing_hd) {
- /* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
- sector_num, cur_nr_sectors);
- if (n1 > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n1, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- }
- } else {
- /* Note: in this case, no need to wait */
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
- }
- break;
-
- case QCOW2_CLUSTER_ZERO:
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
- break;
-
- case QCOW2_CLUSTER_COMPRESSED:
- /* add AIO support for compressed blocks ? */
- ret = qcow2_decompress_cluster(bs, cluster_offset);
- if (ret < 0) {
- goto fail;
- }
-
- qemu_iovec_from_buf(&hd_qiov, 0,
- s->cluster_cache + index_in_cluster * 512,
- 512 * cur_nr_sectors);
- break;
-
- case QCOW2_CLUSTER_NORMAL:
- if ((cluster_offset & 511) != 0) {
- ret = -EIO;
- goto fail;
- }
-
- if (s->crypt_method) {
- /*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
- if (!cluster_data) {
- cluster_data =
- qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- }
-
- assert(cur_nr_sectors <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- 512 * cur_nr_sectors);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
- qemu_iovec_from_buf(qiov, bytes_done,
- cluster_data, 512 * cur_nr_sectors);
- }
- break;
-
- default:
- g_assert_not_reached();
- ret = -EIO;
- goto fail;
- }
-
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- }
- ret = 0;
-
-fail:
- qemu_co_mutex_unlock(&s->lock);
-
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
-
- return ret;
-}
-
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
- int64_t sector_num,
- int remaining_sectors,
- QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- int n_end;
- int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
- uint64_t cluster_offset;
- QEMUIOVector hd_qiov;
- uint64_t bytes_done = 0;
- uint8_t *cluster_data = NULL;
- QCowL2Meta *l2meta = NULL;
-
- trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
- remaining_sectors);
-
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
- s->cluster_cache_offset = -1; /* disable compressed cache */
-
- qemu_co_mutex_lock(&s->lock);
-
- while (remaining_sectors != 0) {
-
- l2meta = NULL;
-
- trace_qcow2_writev_start_part(qemu_coroutine_self());
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n_end = index_in_cluster + remaining_sectors;
- if (s->crypt_method &&
- n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
- n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
- }
-
- ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
- index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
- if (ret < 0) {
- goto fail;
- }
-
- assert((cluster_offset & 511) == 0);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
-
- if (s->crypt_method) {
- if (!cluster_data) {
- cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
- s->cluster_size);
- }
-
- assert(hd_qiov.size <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
-
- qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- cur_nr_sectors * 512);
- }
-
- qemu_co_mutex_unlock(&s->lock);
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- trace_qcow2_writev_data(qemu_coroutine_self(),
- (cluster_offset >> 9) + index_in_cluster);
- ret = bdrv_co_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
-
- while (l2meta != NULL) {
- QCowL2Meta *next;
-
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
- if (ret < 0) {
- goto fail;
- }
-
- /* Take the request off the list of running requests */
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
-
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
- }
-
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
- }
- ret = 0;
-
-fail:
- qemu_co_mutex_unlock(&s->lock);
-
- while (l2meta != NULL) {
- QCowL2Meta *next;
-
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
- }
-
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
- trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
-
- return ret;
-}
-
-static void qcow2_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- g_free(s->l1_table);
-
- qcow2_cache_flush(bs, s->l2_table_cache);
- qcow2_cache_flush(bs, s->refcount_block_cache);
-
- qcow2_mark_clean(bs);
-
- qcow2_cache_destroy(bs, s->l2_table_cache);
- qcow2_cache_destroy(bs, s->refcount_block_cache);
-
- g_free(s->unknown_header_fields);
- cleanup_unknown_header_ext(bs);
-
- g_free(s->cluster_cache);
- qemu_vfree(s->cluster_data);
- qcow2_refcount_close(bs);
- qcow2_free_snapshots(bs);
-}
-
-static void qcow2_invalidate_cache(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int flags = s->flags;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- uint32_t crypt_method = 0;
- QDict *options;
-
- /*
- * Backing files are read-only which makes all of their metadata immutable,
- * that means we don't have to worry about reopening them here.
- */
-
- if (s->crypt_method) {
- crypt_method = s->crypt_method;
- memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
- memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
- }
-
- qcow2_close(bs);
-
- options = qdict_new();
- qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS,
- qbool_from_int(s->use_lazy_refcounts));
-
- memset(s, 0, sizeof(BDRVQcowState));
- qcow2_open(bs, options, flags);
-
- QDECREF(options);
-
- if (crypt_method) {
- s->crypt_method = crypt_method;
- memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
- memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
- }
-}
-
-static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
- size_t len, size_t buflen)
-{
- QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
- size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
-
- if (buflen < ext_len) {
- return -ENOSPC;
- }
-
- *ext_backing_fmt = (QCowExtension) {
- .magic = cpu_to_be32(magic),
- .len = cpu_to_be32(len),
- };
- memcpy(buf + sizeof(QCowExtension), s, len);
-
- return ext_len;
-}
-
-/*
- * Updates the qcow2 header, including the variable length parts of it, i.e.
- * the backing file name and all extensions. qcow2 was not designed to allow
- * such changes, so if we run out of space (we can only use the first cluster)
- * this function may fail.
- *
- * Returns 0 on success, -errno in error cases.
- */
-int qcow2_update_header(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowHeader *header;
- char *buf;
- size_t buflen = s->cluster_size;
- int ret;
- uint64_t total_size;
- uint32_t refcount_table_clusters;
- size_t header_length;
- Qcow2UnknownHeaderExtension *uext;
-
- buf = qemu_blockalign(bs, buflen);
-
- /* Header structure */
- header = (QCowHeader*) buf;
-
- if (buflen < sizeof(*header)) {
- ret = -ENOSPC;
- goto fail;
- }
-
- header_length = sizeof(*header) + s->unknown_header_fields_size;
- total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
-
- *header = (QCowHeader) {
- /* Version 2 fields */
- .magic = cpu_to_be32(QCOW_MAGIC),
- .version = cpu_to_be32(s->qcow_version),
- .backing_file_offset = 0,
- .backing_file_size = 0,
- .cluster_bits = cpu_to_be32(s->cluster_bits),
- .size = cpu_to_be64(total_size),
- .crypt_method = cpu_to_be32(s->crypt_method_header),
- .l1_size = cpu_to_be32(s->l1_size),
- .l1_table_offset = cpu_to_be64(s->l1_table_offset),
- .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
- .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
- .nb_snapshots = cpu_to_be32(s->nb_snapshots),
- .snapshots_offset = cpu_to_be64(s->snapshots_offset),
-
- /* Version 3 fields */
- .incompatible_features = cpu_to_be64(s->incompatible_features),
- .compatible_features = cpu_to_be64(s->compatible_features),
- .autoclear_features = cpu_to_be64(s->autoclear_features),
- .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
- .header_length = cpu_to_be32(header_length),
- };
-
- /* For older versions, write a shorter header */
- switch (s->qcow_version) {
- case 2:
- ret = offsetof(QCowHeader, incompatible_features);
- break;
- case 3:
- ret = sizeof(*header);
- break;
- default:
- ret = -EINVAL;
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- memset(buf, 0, buflen);
-
- /* Preserve any unknown field in the header */
- if (s->unknown_header_fields_size) {
- if (buflen < s->unknown_header_fields_size) {
- ret = -ENOSPC;
- goto fail;
- }
-
- memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
- buf += s->unknown_header_fields_size;
- buflen -= s->unknown_header_fields_size;
- }
-
- /* Backing file format header extension */
- if (*bs->backing_format) {
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
- bs->backing_format, strlen(bs->backing_format),
- buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- }
-
- /* Feature table */
- Qcow2Feature features[] = {
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
- .name = "dirty bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_COMPATIBLE,
- .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
- .name = "lazy refcounts",
- },
- };
-
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
- features, sizeof(features), buflen);
- if (ret < 0) {
- goto fail;
- }
- buf += ret;
- buflen -= ret;
-
- /* Keep unknown header extensions */
- QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
- ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- }
-
- /* End of header extensions */
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
-
- /* Backing file name */
- if (*bs->backing_file) {
- size_t backing_file_len = strlen(bs->backing_file);
-
- if (buflen < backing_file_len) {
- ret = -ENOSPC;
- goto fail;
- }
-
- /* Using strncpy is ok here, since buf is not NUL-terminated. */
- strncpy(buf, bs->backing_file, buflen);
-
- header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
- header->backing_file_size = cpu_to_be32(backing_file_len);
- }
-
- /* Write the new header */
- ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
- if (ret < 0) {
- goto fail;
- }
-
- ret = 0;
-fail:
- qemu_vfree(header);
- return ret;
-}
-
-static int qcow2_change_backing_file(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt)
-{
- pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
- pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
-
- return qcow2_update_header(bs);
-}
-
-static int preallocate(BlockDriverState *bs)
-{
- uint64_t nb_sectors;
- uint64_t offset;
- uint64_t host_offset = 0;
- int num;
- int ret;
- QCowL2Meta *meta;
-
- nb_sectors = bdrv_getlength(bs) >> 9;
- offset = 0;
-
- while (nb_sectors) {
- num = MIN(nb_sectors, INT_MAX >> 9);
- ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
- &host_offset, &meta);
- if (ret < 0) {
- return ret;
- }
-
- ret = qcow2_alloc_cluster_link_l2(bs, meta);
- if (ret < 0) {
- qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
- QCOW2_DISCARD_NEVER);
- return ret;
- }
-
- /* There are no dependent requests, but we need to remove our request
- * from the list of in-flight requests */
- if (meta != NULL) {
- QLIST_REMOVE(meta, next_in_flight);
- }
-
- /* TODO Preallocate data if requested */
-
- nb_sectors -= num;
- offset += num << 9;
- }
-
- /*
- * It is expected that the image file is large enough to actually contain
- * all of the allocated clusters (otherwise we get failing reads after
- * EOF). Extend the image to the last allocated sector.
- */
- if (host_offset != 0) {
- uint8_t buf[512];
- memset(buf, 0, 512);
- ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
- if (ret < 0) {
- return ret;
- }
- }
-
- return 0;
-}
-
-static int qcow2_create2(const char *filename, int64_t total_size,
- const char *backing_file, const char *backing_format,
- int flags, size_t cluster_size, int prealloc,
- QEMUOptionParameter *options, int version)
-{
- /* Calculate cluster_bits */
- int cluster_bits;
- cluster_bits = ffs(cluster_size) - 1;
- if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
- (1 << cluster_bits) != cluster_size)
- {
- error_report(
- "Cluster size must be a power of two between %d and %dk",
- 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
- return -EINVAL;
- }
-
- /*
- * Open the image file and write a minimal qcow2 header.
- *
- * We keep things simple and start with a zero-sized image. We also
- * do without refcount blocks or a L1 table for now. We'll fix the
- * inconsistency later.
- *
- * We do need a refcount table because growing the refcount table means
- * allocating two new refcount blocks - the seconds of which would be at
- * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
- * size for any qcow2 image.
- */
- BlockDriverState* bs;
- QCowHeader header;
- uint8_t* refcount_table;
- int ret;
-
- ret = bdrv_create_file(filename, options);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
- if (ret < 0) {
- return ret;
- }
-
- /* Write the header */
- memset(&header, 0, sizeof(header));
- header.magic = cpu_to_be32(QCOW_MAGIC);
- header.version = cpu_to_be32(version);
- header.cluster_bits = cpu_to_be32(cluster_bits);
- header.size = cpu_to_be64(0);
- header.l1_table_offset = cpu_to_be64(0);
- header.l1_size = cpu_to_be32(0);
- header.refcount_table_offset = cpu_to_be64(cluster_size);
- header.refcount_table_clusters = cpu_to_be32(1);
- header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
- header.header_length = cpu_to_be32(sizeof(header));
-
- if (flags & BLOCK_FLAG_ENCRYPT) {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
- } else {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
- }
-
- if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
- header.compatible_features |=
- cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
- }
-
- ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
- if (ret < 0) {
- goto out;
- }
-
- /* Write an empty refcount table */
- refcount_table = g_malloc0(cluster_size);
- ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
- g_free(refcount_table);
-
- if (ret < 0) {
- goto out;
- }
-
- bdrv_close(bs);
-
- /*
- * And now open the image and make it consistent first (i.e. increase the
- * refcount of the cluster that is occupied by the header and the refcount
- * table)
- */
- BlockDriver* drv = bdrv_find_format("qcow2");
- assert(drv != NULL);
- ret = bdrv_open(bs, filename, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv);
- if (ret < 0) {
- goto out;
- }
-
- ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
- if (ret < 0) {
- goto out;
-
- } else if (ret != 0) {
- error_report("Huh, first cluster in empty image is already in use?");
- abort();
- }
-
- /* Okay, now that we have a valid image, let's give it the right size */
- ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
- if (ret < 0) {
- goto out;
- }
-
- /* Want a backing file? There you go.*/
- if (backing_file) {
- ret = bdrv_change_backing_file(bs, backing_file, backing_format);
- if (ret < 0) {
- goto out;
- }
- }
-
- /* And if we're supposed to preallocate metadata, do that now */
- if (prealloc) {
- BDRVQcowState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = preallocate(bs);
- qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- goto out;
- }
- }
-
- ret = 0;
-out:
- bdrv_delete(bs);
- return ret;
-}
-
-static int qcow2_create(const char *filename, QEMUOptionParameter *options)
-{
- const char *backing_file = NULL;
- const char *backing_fmt = NULL;
- uint64_t sectors = 0;
- int flags = 0;
- size_t cluster_size = DEFAULT_CLUSTER_SIZE;
- int prealloc = 0;
- int version = 2;
-
- /* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- sectors = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
- backing_fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
- flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- cluster_size = options->value.n;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
- if (!options->value.s || !strcmp(options->value.s, "off")) {
- prealloc = 0;
- } else if (!strcmp(options->value.s, "metadata")) {
- prealloc = 1;
- } else {
- fprintf(stderr, "Invalid preallocation mode: '%s'\n",
- options->value.s);
- return -EINVAL;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
- if (!options->value.s || !strcmp(options->value.s, "0.10")) {
- version = 2;
- } else if (!strcmp(options->value.s, "1.1")) {
- version = 3;
- } else {
- fprintf(stderr, "Invalid compatibility level: '%s'\n",
- options->value.s);
- return -EINVAL;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
- flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
- }
- options++;
- }
-
- if (backing_file && prealloc) {
- fprintf(stderr, "Backing file and preallocation cannot be used at "
- "the same time\n");
- return -EINVAL;
- }
-
- if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
- fprintf(stderr, "Lazy refcounts only supported with compatibility "
- "level 1.1 and above (use compat=1.1 or greater)\n");
- return -EINVAL;
- }
-
- return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
- cluster_size, prealloc, options, version);
-}
-
-static int qcow2_make_empty(BlockDriverState *bs)
-{
-#if 0
- /* XXX: not correct */
- BDRVQcowState *s = bs->opaque;
- uint32_t l1_length = s->l1_size * sizeof(uint64_t);
- int ret;
-
- memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
- return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
- if (ret < 0)
- return ret;
-
- l2_cache_reset(bs);
-#endif
- return 0;
-}
-
-static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- int ret;
- BDRVQcowState *s = bs->opaque;
-
- /* Emulate misaligned zero writes */
- if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
- return -ENOTSUP;
- }
-
- /* Whatever is left can use real zero clusters */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
-
- return ret;
-}
-
-static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- int ret;
- BDRVQcowState *s = bs->opaque;
-
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
- return ret;
-}
-
-static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t new_l1_size;
- int ret;
-
- if (offset & 511) {
- error_report("The new size must be a multiple of 512");
- return -EINVAL;
- }
-
- /* cannot proceed if image has snapshots */
- if (s->nb_snapshots) {
- error_report("Can't resize an image which has snapshots");
- return -ENOTSUP;
- }
-
- /* shrinking is currently not supported */
- if (offset < bs->total_sectors * 512) {
- error_report("qcow2 doesn't support shrinking images yet");
- return -ENOTSUP;
- }
-
- new_l1_size = size_to_l1(s, offset);
- ret = qcow2_grow_l1_table(bs, new_l1_size, true);
- if (ret < 0) {
- return ret;
- }
-
- /* write updated header.size */
- offset = cpu_to_be64(offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
- &offset, sizeof(uint64_t));
- if (ret < 0) {
- return ret;
- }
-
- s->l1_vm_state_index = new_l1_size;
- return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
- tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- z_stream strm;
- int ret, out_len;
- uint8_t *out_buf;
- uint64_t cluster_offset;
-
- if (nb_sectors == 0) {
- /* align end of file to a sector boundary to ease reading with
- sector based I/Os */
- cluster_offset = bdrv_getlength(bs->file);
- cluster_offset = (cluster_offset + 511) & ~511;
- bdrv_truncate(bs->file, cluster_offset);
- return 0;
- }
-
- if (nb_sectors != s->cluster_sectors) {
- ret = -EINVAL;
-
- /* Zero-pad last write if image size is not cluster aligned */
- if (sector_num + nb_sectors == bs->total_sectors &&
- nb_sectors < s->cluster_sectors) {
- uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
- memset(pad_buf, 0, s->cluster_size);
- memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
- ret = qcow2_write_compressed(bs, sector_num,
- pad_buf, s->cluster_sectors);
- qemu_vfree(pad_buf);
- }
- return ret;
- }
-
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
- /* best compression, small window, no zlib header */
- memset(&strm, 0, sizeof(strm));
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
- Z_DEFLATED, -12,
- 9, Z_DEFAULT_STRATEGY);
- if (ret != 0) {
- ret = -EINVAL;
- goto fail;
- }
-
- strm.avail_in = s->cluster_size;
- strm.next_in = (uint8_t *)buf;
- strm.avail_out = s->cluster_size;
- strm.next_out = out_buf;
-
- ret = deflate(&strm, Z_FINISH);
- if (ret != Z_STREAM_END && ret != Z_OK) {
- deflateEnd(&strm);
- ret = -EINVAL;
- goto fail;
- }
- out_len = strm.next_out - out_buf;
-
- deflateEnd(&strm);
-
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
- /* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
- if (ret < 0) {
- goto fail;
- }
- } else {
- cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
- sector_num << 9, out_len);
- if (!cluster_offset) {
- ret = -EIO;
- goto fail;
- }
- cluster_offset &= s->cluster_offset_mask;
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = 0;
-fail:
- g_free(out_buf);
- return ret;
-}
-
-static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
- }
-
- if (qcow2_need_accurate_refcounts(s)) {
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
- }
- }
- qemu_co_mutex_unlock(&s->lock);
-
- return 0;
-}
-
-static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
-{
- return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
-}
-
-static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQcowState *s = bs->opaque;
- bdi->cluster_size = s->cluster_size;
- bdi->vm_state_offset = qcow2_vm_state_offset(s);
- return 0;
-}
-
-#if 0
-static void dump_refcounts(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t nb_clusters, k, k1, size;
- int refcount;
-
- size = bdrv_getlength(bs->file);
- nb_clusters = size_to_clusters(s, size);
- for(k = 0; k < nb_clusters;) {
- k1 = k;
- refcount = get_refcount(bs, k);
- k++;
- while (k < nb_clusters && get_refcount(bs, k) == refcount)
- k++;
- printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
- k - k1);
- }
-}
-#endif
-
-static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t pos)
-{
- BDRVQcowState *s = bs->opaque;
- int growable = bs->growable;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- bs->growable = 1;
- ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
- bs->growable = growable;
-
- return ret;
-}
-
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
-{
- BDRVQcowState *s = bs->opaque;
- int growable = bs->growable;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
- bs->growable = 1;
- ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
- bs->growable = growable;
-
- return ret;
-}
-
-static QEMUOptionParameter qcow2_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_COMPAT_LEVEL,
- .type = OPT_STRING,
- .help = "Compatibility level (0.10 or 1.1)"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_BACKING_FMT,
- .type = OPT_STRING,
- .help = "Image format of the base image"
- },
- {
- .name = BLOCK_OPT_ENCRYPT,
- .type = OPT_FLAG,
- .help = "Encrypt the image"
- },
- {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "qcow2 cluster size",
- .value = { .n = DEFAULT_CLUSTER_SIZE },
- },
- {
- .name = BLOCK_OPT_PREALLOC,
- .type = OPT_STRING,
- .help = "Preallocation mode (allowed values: off, metadata)"
- },
- {
- .name = BLOCK_OPT_LAZY_REFCOUNTS,
- .type = OPT_FLAG,
- .help = "Postpone refcount updates",
- },
- { NULL }
-};
-
-static BlockDriver bdrv_qcow2 = {
- .format_name = "qcow2",
- .instance_size = sizeof(BDRVQcowState),
- .bdrv_probe = qcow2_probe,
- .bdrv_open = qcow2_open,
- .bdrv_close = qcow2_close,
- .bdrv_reopen_prepare = qcow2_reopen_prepare,
- .bdrv_create = qcow2_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = qcow2_co_is_allocated,
- .bdrv_set_key = qcow2_set_key,
- .bdrv_make_empty = qcow2_make_empty,
-
- .bdrv_co_readv = qcow2_co_readv,
- .bdrv_co_writev = qcow2_co_writev,
- .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
-
- .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
- .bdrv_co_discard = qcow2_co_discard,
- .bdrv_truncate = qcow2_truncate,
- .bdrv_write_compressed = qcow2_write_compressed,
-
- .bdrv_snapshot_create = qcow2_snapshot_create,
- .bdrv_snapshot_goto = qcow2_snapshot_goto,
- .bdrv_snapshot_delete = qcow2_snapshot_delete,
- .bdrv_snapshot_list = qcow2_snapshot_list,
- .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
- .bdrv_get_info = qcow2_get_info,
-
- .bdrv_save_vmstate = qcow2_save_vmstate,
- .bdrv_load_vmstate = qcow2_load_vmstate,
-
- .bdrv_change_backing_file = qcow2_change_backing_file,
-
- .bdrv_invalidate_cache = qcow2_invalidate_cache,
-
- .create_options = qcow2_create_options,
- .bdrv_check = qcow2_check,
-};
-
-static void bdrv_qcow2_init(void)
-{
- bdrv_register(&bdrv_qcow2);
-}
-
-block_init(bdrv_qcow2_init);
diff --git a/contrib/qemu/block/qcow2.h b/contrib/qemu/block/qcow2.h
deleted file mode 100644
index 3b2d5cda71f..00000000000
--- a/contrib/qemu/block/qcow2.h
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef BLOCK_QCOW2_H
-#define BLOCK_QCOW2_H
-
-#include "qemu/aes.h"
-#include "block/coroutine.h"
-
-//#define DEBUG_ALLOC
-//#define DEBUG_ALLOC2
-//#define DEBUG_EXT
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES 1
-
-#define QCOW_MAX_CRYPT_CLUSTERS 32
-
-/* indicate that the refcount of the referenced cluster is exactly one. */
-#define QCOW_OFLAG_COPIED (1LL << 63)
-/* indicate that the cluster is compressed (they never have the copied flag) */
-#define QCOW_OFLAG_COMPRESSED (1LL << 62)
-/* The cluster reads as all zeros */
-#define QCOW_OFLAG_ZERO (1LL << 0)
-
-#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
-
-#define MIN_CLUSTER_BITS 9
-#define MAX_CLUSTER_BITS 21
-
-#define L2_CACHE_SIZE 16
-
-/* Must be at least 4 to cover all cases of refcount table growth */
-#define REFCOUNT_CACHE_SIZE 4
-
-#define DEFAULT_CLUSTER_SIZE 65536
-
-
-#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request"
-#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot"
-#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other"
-
-typedef struct QCowHeader {
- uint32_t magic;
- uint32_t version;
- uint64_t backing_file_offset;
- uint32_t backing_file_size;
- uint32_t cluster_bits;
- uint64_t size; /* in bytes */
- uint32_t crypt_method;
- uint32_t l1_size; /* XXX: save number of clusters instead ? */
- uint64_t l1_table_offset;
- uint64_t refcount_table_offset;
- uint32_t refcount_table_clusters;
- uint32_t nb_snapshots;
- uint64_t snapshots_offset;
-
- /* The following fields are only valid for version >= 3 */
- uint64_t incompatible_features;
- uint64_t compatible_features;
- uint64_t autoclear_features;
-
- uint32_t refcount_order;
- uint32_t header_length;
-} QCowHeader;
-
-typedef struct QCowSnapshot {
- uint64_t l1_table_offset;
- uint32_t l1_size;
- char *id_str;
- char *name;
- uint64_t disk_size;
- uint64_t vm_state_size;
- uint32_t date_sec;
- uint32_t date_nsec;
- uint64_t vm_clock_nsec;
-} QCowSnapshot;
-
-struct Qcow2Cache;
-typedef struct Qcow2Cache Qcow2Cache;
-
-typedef struct Qcow2UnknownHeaderExtension {
- uint32_t magic;
- uint32_t len;
- QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
- uint8_t data[];
-} Qcow2UnknownHeaderExtension;
-
-enum {
- QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
- QCOW2_FEAT_TYPE_COMPATIBLE = 1,
- QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
-};
-
-/* Incompatible feature bits */
-enum {
- QCOW2_INCOMPAT_DIRTY_BITNR = 0,
- QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
-
- QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
-};
-
-/* Compatible feature bits */
-enum {
- QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
- QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
-
- QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
-};
-
-enum qcow2_discard_type {
- QCOW2_DISCARD_NEVER = 0,
- QCOW2_DISCARD_ALWAYS,
- QCOW2_DISCARD_REQUEST,
- QCOW2_DISCARD_SNAPSHOT,
- QCOW2_DISCARD_OTHER,
- QCOW2_DISCARD_MAX
-};
-
-typedef struct Qcow2Feature {
- uint8_t type;
- uint8_t bit;
- char name[46];
-} QEMU_PACKED Qcow2Feature;
-
-typedef struct Qcow2DiscardRegion {
- BlockDriverState *bs;
- uint64_t offset;
- uint64_t bytes;
- QTAILQ_ENTRY(Qcow2DiscardRegion) next;
-} Qcow2DiscardRegion;
-
-typedef struct BDRVQcowState {
- int cluster_bits;
- int cluster_size;
- int cluster_sectors;
- int l2_bits;
- int l2_size;
- int l1_size;
- int l1_vm_state_index;
- int csize_shift;
- int csize_mask;
- uint64_t cluster_offset_mask;
- uint64_t l1_table_offset;
- uint64_t *l1_table;
-
- Qcow2Cache* l2_table_cache;
- Qcow2Cache* refcount_block_cache;
-
- uint8_t *cluster_cache;
- uint8_t *cluster_data;
- uint64_t cluster_cache_offset;
- QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
-
- uint64_t *refcount_table;
- uint64_t refcount_table_offset;
- uint32_t refcount_table_size;
- int64_t free_cluster_index;
- int64_t free_byte_offset;
-
- CoMutex lock;
-
- uint32_t crypt_method; /* current crypt method, 0 if no key yet */
- uint32_t crypt_method_header;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- uint64_t snapshots_offset;
- int snapshots_size;
- int nb_snapshots;
- QCowSnapshot *snapshots;
-
- int flags;
- int qcow_version;
- bool use_lazy_refcounts;
-
- bool discard_passthrough[QCOW2_DISCARD_MAX];
-
- uint64_t incompatible_features;
- uint64_t compatible_features;
- uint64_t autoclear_features;
-
- size_t unknown_header_fields_size;
- void* unknown_header_fields;
- QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
- QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
- bool cache_discards;
-} BDRVQcowState;
-
-/* XXX: use std qcow open function ? */
-typedef struct QCowCreateState {
- int cluster_size;
- int cluster_bits;
- uint16_t *refcount_block;
- uint64_t *refcount_table;
- int64_t l1_table_offset;
- int64_t refcount_table_offset;
- int64_t refcount_block_offset;
-} QCowCreateState;
-
-struct QCowAIOCB;
-
-typedef struct Qcow2COWRegion {
- /**
- * Offset of the COW region in bytes from the start of the first cluster
- * touched by the request.
- */
- uint64_t offset;
-
- /** Number of sectors to copy */
- int nb_sectors;
-} Qcow2COWRegion;
-
-/**
- * Describes an in-flight (part of a) write request that writes to clusters
- * that are not referenced in their L2 table yet.
- */
-typedef struct QCowL2Meta
-{
- /** Guest offset of the first newly allocated cluster */
- uint64_t offset;
-
- /** Host offset of the first newly allocated cluster */
- uint64_t alloc_offset;
-
- /**
- * Number of sectors from the start of the first allocated cluster to
- * the end of the (possibly shortened) request
- */
- int nb_available;
-
- /** Number of newly allocated clusters */
- int nb_clusters;
-
- /**
- * Requests that overlap with this allocation and wait to be restarted
- * when the allocating request has completed.
- */
- CoQueue dependent_requests;
-
- /**
- * The COW Region between the start of the first allocated cluster and the
- * area the guest actually writes to.
- */
- Qcow2COWRegion cow_start;
-
- /**
- * The COW Region between the area the guest actually writes to and the
- * end of the last allocated cluster.
- */
- Qcow2COWRegion cow_end;
-
- /** Pointer to next L2Meta of the same write request */
- struct QCowL2Meta *next;
-
- QLIST_ENTRY(QCowL2Meta) next_in_flight;
-} QCowL2Meta;
-
-enum {
- QCOW2_CLUSTER_UNALLOCATED,
- QCOW2_CLUSTER_NORMAL,
- QCOW2_CLUSTER_COMPRESSED,
- QCOW2_CLUSTER_ZERO
-};
-
-#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
-
-#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
-
-static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
-{
- return offset & ~(s->cluster_size - 1);
-}
-
-static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
-{
- return offset & (s->cluster_size - 1);
-}
-
-static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
-{
- return (size + (s->cluster_size - 1)) >> s->cluster_bits;
-}
-
-static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
-{
- int shift = s->cluster_bits + s->l2_bits;
- return (size + (1ULL << shift) - 1) >> shift;
-}
-
-static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
-{
- return (offset >> s->cluster_bits) & (s->l2_size - 1);
-}
-
-static inline int64_t align_offset(int64_t offset, int n)
-{
- offset = (offset + n - 1) & ~(n - 1);
- return offset;
-}
-
-static inline int qcow2_get_cluster_type(uint64_t l2_entry)
-{
- if (l2_entry & QCOW_OFLAG_COMPRESSED) {
- return QCOW2_CLUSTER_COMPRESSED;
- } else if (l2_entry & QCOW_OFLAG_ZERO) {
- return QCOW2_CLUSTER_ZERO;
- } else if (!(l2_entry & L2E_OFFSET_MASK)) {
- return QCOW2_CLUSTER_UNALLOCATED;
- } else {
- return QCOW2_CLUSTER_NORMAL;
- }
-}
-
-/* Check whether refcounts are eager or lazy */
-static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
-{
- return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
-}
-
-static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
-{
- return m->offset + m->cow_start.offset;
-}
-
-static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
-{
- return m->offset + m->cow_end.offset
- + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
-}
-
-// FIXME Need qcow2_ prefix to global functions
-
-/* qcow2.c functions */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors);
-
-int qcow2_mark_dirty(BlockDriverState *bs);
-int qcow2_update_header(BlockDriverState *bs);
-
-/* qcow2-refcount.c functions */
-int qcow2_refcount_init(BlockDriverState *bs);
-void qcow2_refcount_close(BlockDriverState *bs);
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters);
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
-void qcow2_free_clusters(BlockDriverState *bs,
- int64_t offset, int64_t size,
- enum qcow2_discard_type type);
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
- int nb_clusters, enum qcow2_discard_type type);
-
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
- int64_t l1_table_offset, int l1_size, int addend);
-
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix);
-
-void qcow2_process_discards(BlockDriverState *bs, int ret);
-
-/* qcow2-cluster.c functions */
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
- bool exact_size);
-void qcow2_l2_cache_reset(BlockDriverState *bs);
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key);
-
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset);
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
- uint64_t offset,
- int compressed_size);
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
-
-/* qcow2-snapshot.c functions */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);
-
-void qcow2_free_snapshots(BlockDriverState *bs);
-int qcow2_read_snapshots(BlockDriverState *bs);
-
-/* qcow2-cache.c functions */
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
-
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
- Qcow2Cache *dependency);
-void qcow2_cache_depends_on_flush(Qcow2Cache *c);
-
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table);
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table);
-int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
-
-#endif
diff --git a/contrib/qemu/block/qed-check.c b/contrib/qemu/block/qed-check.c
deleted file mode 100644
index b473dcd61f6..00000000000
--- a/contrib/qemu/block/qed-check.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Consistency Check
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-typedef struct {
- BDRVQEDState *s;
- BdrvCheckResult *result;
- bool fix; /* whether to fix invalid offsets */
-
- uint64_t nclusters;
- uint32_t *used_clusters; /* referenced cluster bitmap */
-
- QEDRequest request;
-} QEDCheck;
-
-static bool qed_test_bit(uint32_t *bitmap, uint64_t n) {
- return !!(bitmap[n / 32] & (1 << (n % 32)));
-}
-
-static void qed_set_bit(uint32_t *bitmap, uint64_t n) {
- bitmap[n / 32] |= 1 << (n % 32);
-}
-
-/**
- * Set bitmap bits for clusters
- *
- * @check: Check structure
- * @offset: Starting offset in bytes
- * @n: Number of clusters
- */
-static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset,
- unsigned int n)
-{
- uint64_t cluster = qed_bytes_to_clusters(check->s, offset);
- unsigned int corruptions = 0;
-
- while (n-- != 0) {
- /* Clusters should only be referenced once */
- if (qed_test_bit(check->used_clusters, cluster)) {
- corruptions++;
- }
-
- qed_set_bit(check->used_clusters, cluster);
- cluster++;
- }
-
- check->result->corruptions += corruptions;
- return corruptions == 0;
-}
-
-/**
- * Check an L2 table
- *
- * @ret: Number of invalid cluster offsets
- */
-static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
-{
- BDRVQEDState *s = check->s;
- unsigned int i, num_invalid = 0;
- uint64_t last_offset = 0;
-
- for (i = 0; i < s->table_nelems; i++) {
- uint64_t offset = table->offsets[i];
-
- if (qed_offset_is_unalloc_cluster(offset) ||
- qed_offset_is_zero_cluster(offset)) {
- continue;
- }
- check->result->bfi.allocated_clusters++;
- if (last_offset && (last_offset + s->header.cluster_size != offset)) {
- check->result->bfi.fragmented_clusters++;
- }
- last_offset = offset;
-
- /* Detect invalid cluster offset */
- if (!qed_check_cluster_offset(s, offset)) {
- if (check->fix) {
- table->offsets[i] = 0;
- check->result->corruptions_fixed++;
- } else {
- check->result->corruptions++;
- }
-
- num_invalid++;
- continue;
- }
-
- qed_set_used_clusters(check, offset, 1);
- }
-
- return num_invalid;
-}
-
-/**
- * Descend tables and check each cluster is referenced once only
- */
-static int qed_check_l1_table(QEDCheck *check, QEDTable *table)
-{
- BDRVQEDState *s = check->s;
- unsigned int i, num_invalid_l1 = 0;
- int ret, last_error = 0;
-
- /* Mark L1 table clusters used */
- qed_set_used_clusters(check, s->header.l1_table_offset,
- s->header.table_size);
-
- for (i = 0; i < s->table_nelems; i++) {
- unsigned int num_invalid_l2;
- uint64_t offset = table->offsets[i];
-
- if (qed_offset_is_unalloc_cluster(offset)) {
- continue;
- }
-
- /* Detect invalid L2 offset */
- if (!qed_check_table_offset(s, offset)) {
- /* Clear invalid offset */
- if (check->fix) {
- table->offsets[i] = 0;
- check->result->corruptions_fixed++;
- } else {
- check->result->corruptions++;
- }
-
- num_invalid_l1++;
- continue;
- }
-
- if (!qed_set_used_clusters(check, offset, s->header.table_size)) {
- continue; /* skip an invalid table */
- }
-
- ret = qed_read_l2_table_sync(s, &check->request, offset);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- continue;
- }
-
- num_invalid_l2 = qed_check_l2_table(check,
- check->request.l2_table->table);
-
- /* Write out fixed L2 table */
- if (num_invalid_l2 > 0 && check->fix) {
- ret = qed_write_l2_table_sync(s, &check->request, 0,
- s->table_nelems, false);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- continue;
- }
- }
- }
-
- /* Drop reference to final table */
- qed_unref_l2_cache_entry(check->request.l2_table);
- check->request.l2_table = NULL;
-
- /* Write out fixed L1 table */
- if (num_invalid_l1 > 0 && check->fix) {
- ret = qed_write_l1_table_sync(s, 0, s->table_nelems);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- }
- }
-
- return last_error;
-}
-
-/**
- * Check for unreferenced (leaked) clusters
- */
-static void qed_check_for_leaks(QEDCheck *check)
-{
- BDRVQEDState *s = check->s;
- uint64_t i;
-
- for (i = s->header.header_size; i < check->nclusters; i++) {
- if (!qed_test_bit(check->used_clusters, i)) {
- check->result->leaks++;
- }
- }
-}
-
-/**
- * Mark an image clean once it passes check or has been repaired
- */
-static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
-{
- /* Skip if there were unfixable corruptions or I/O errors */
- if (result->corruptions > 0 || result->check_errors > 0) {
- return;
- }
-
- /* Skip if image is already marked clean */
- if (!(s->header.features & QED_F_NEED_CHECK)) {
- return;
- }
-
- /* Ensure fixes reach storage before clearing check bit */
- bdrv_flush(s->bs);
-
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header_sync(s);
-}
-
-int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
-{
- QEDCheck check = {
- .s = s,
- .result = result,
- .nclusters = qed_bytes_to_clusters(s, s->file_size),
- .request = { .l2_table = NULL },
- .fix = fix,
- };
- int ret;
-
- check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
- sizeof(check.used_clusters[0]));
-
- check.result->bfi.total_clusters =
- (s->header.image_size + s->header.cluster_size - 1) /
- s->header.cluster_size;
- ret = qed_check_l1_table(&check, s->l1_table);
- if (ret == 0) {
- /* Only check for leaks if entire image was scanned successfully */
- qed_check_for_leaks(&check);
-
- if (fix) {
- qed_check_mark_clean(s, result);
- }
- }
-
- g_free(check.used_clusters);
- return ret;
-}
diff --git a/contrib/qemu/block/qed-cluster.c b/contrib/qemu/block/qed-cluster.c
deleted file mode 100644
index f64b2af8f7e..00000000000
--- a/contrib/qemu/block/qed-cluster.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Cluster functions
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-/**
- * Count the number of contiguous data clusters
- *
- * @s: QED state
- * @table: L2 table
- * @index: First cluster index
- * @n: Maximum number of clusters
- * @offset: Set to first cluster offset
- *
- * This function scans tables for contiguous clusters. A contiguous run of
- * clusters may be allocated, unallocated, or zero.
- */
-static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
- QEDTable *table,
- unsigned int index,
- unsigned int n,
- uint64_t *offset)
-{
- unsigned int end = MIN(index + n, s->table_nelems);
- uint64_t last = table->offsets[index];
- unsigned int i;
-
- *offset = last;
-
- for (i = index + 1; i < end; i++) {
- if (qed_offset_is_unalloc_cluster(last)) {
- /* Counting unallocated clusters */
- if (!qed_offset_is_unalloc_cluster(table->offsets[i])) {
- break;
- }
- } else if (qed_offset_is_zero_cluster(last)) {
- /* Counting zero clusters */
- if (!qed_offset_is_zero_cluster(table->offsets[i])) {
- break;
- }
- } else {
- /* Counting allocated clusters */
- if (table->offsets[i] != last + s->header.cluster_size) {
- break;
- }
- last = table->offsets[i];
- }
- }
- return i - index;
-}
-
-typedef struct {
- BDRVQEDState *s;
- uint64_t pos;
- size_t len;
-
- QEDRequest *request;
-
- /* User callback */
- QEDFindClusterFunc *cb;
- void *opaque;
-} QEDFindClusterCB;
-
-static void qed_find_cluster_cb(void *opaque, int ret)
-{
- QEDFindClusterCB *find_cluster_cb = opaque;
- BDRVQEDState *s = find_cluster_cb->s;
- QEDRequest *request = find_cluster_cb->request;
- uint64_t offset = 0;
- size_t len = 0;
- unsigned int index;
- unsigned int n;
-
- if (ret) {
- goto out;
- }
-
- index = qed_l2_index(s, find_cluster_cb->pos);
- n = qed_bytes_to_clusters(s,
- qed_offset_into_cluster(s, find_cluster_cb->pos) +
- find_cluster_cb->len);
- n = qed_count_contiguous_clusters(s, request->l2_table->table,
- index, n, &offset);
-
- if (qed_offset_is_unalloc_cluster(offset)) {
- ret = QED_CLUSTER_L2;
- } else if (qed_offset_is_zero_cluster(offset)) {
- ret = QED_CLUSTER_ZERO;
- } else if (qed_check_cluster_offset(s, offset)) {
- ret = QED_CLUSTER_FOUND;
- } else {
- ret = -EINVAL;
- }
-
- len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
- qed_offset_into_cluster(s, find_cluster_cb->pos));
-
-out:
- find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
- g_free(find_cluster_cb);
-}
-
-/**
- * Find the offset of a data cluster
- *
- * @s: QED state
- * @request: L2 cache entry
- * @pos: Byte position in device
- * @len: Number of bytes
- * @cb: Completion function
- * @opaque: User data for completion function
- *
- * This function translates a position in the block device to an offset in the
- * image file. It invokes the cb completion callback to report back the
- * translated offset or unallocated range in the image file.
- *
- * If the L2 table exists, request->l2_table points to the L2 table cache entry
- * and the caller must free the reference when they are finished. The cache
- * entry is exposed in this way to avoid callers having to read the L2 table
- * again later during request processing. If request->l2_table is non-NULL it
- * will be unreferenced before taking on the new cache entry.
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
- size_t len, QEDFindClusterFunc *cb, void *opaque)
-{
- QEDFindClusterCB *find_cluster_cb;
- uint64_t l2_offset;
-
- /* Limit length to L2 boundary. Requests are broken up at the L2 boundary
- * so that a request acts on one L2 table at a time.
- */
- len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
-
- l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
- if (qed_offset_is_unalloc_cluster(l2_offset)) {
- cb(opaque, QED_CLUSTER_L1, 0, len);
- return;
- }
- if (!qed_check_table_offset(s, l2_offset)) {
- cb(opaque, -EINVAL, 0, 0);
- return;
- }
-
- find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
- find_cluster_cb->s = s;
- find_cluster_cb->pos = pos;
- find_cluster_cb->len = len;
- find_cluster_cb->cb = cb;
- find_cluster_cb->opaque = opaque;
- find_cluster_cb->request = request;
-
- qed_read_l2_table(s, request, l2_offset,
- qed_find_cluster_cb, find_cluster_cb);
-}
diff --git a/contrib/qemu/block/qed-gencb.c b/contrib/qemu/block/qed-gencb.c
deleted file mode 100644
index 7d7ac1ffc8e..00000000000
--- a/contrib/qemu/block/qed-gencb.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
-{
- GenericCB *gencb = g_malloc(len);
- gencb->cb = cb;
- gencb->opaque = opaque;
- return gencb;
-}
-
-void gencb_complete(void *opaque, int ret)
-{
- GenericCB *gencb = opaque;
- BlockDriverCompletionFunc *cb = gencb->cb;
- void *user_opaque = gencb->opaque;
-
- g_free(gencb);
- cb(user_opaque, ret);
-}
diff --git a/contrib/qemu/block/qed-l2-cache.c b/contrib/qemu/block/qed-l2-cache.c
deleted file mode 100644
index e9b2aae44d9..00000000000
--- a/contrib/qemu/block/qed-l2-cache.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * QEMU Enhanced Disk Format L2 Cache
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-/*
- * L2 table cache usage is as follows:
- *
- * An open image has one L2 table cache that is used to avoid accessing the
- * image file for recently referenced L2 tables.
- *
- * Cluster offset lookup translates the logical offset within the block device
- * to a cluster offset within the image file. This is done by indexing into
- * the L1 and L2 tables which store cluster offsets. It is here where the L2
- * table cache serves up recently referenced L2 tables.
- *
- * If there is a cache miss, that L2 table is read from the image file and
- * committed to the cache. Subsequent accesses to that L2 table will be served
- * from the cache until the table is evicted from the cache.
- *
- * L2 tables are also committed to the cache when new L2 tables are allocated
- * in the image file. Since the L2 table cache is write-through, the new L2
- * table is first written out to the image file and then committed to the
- * cache.
- *
- * Multiple I/O requests may be using an L2 table cache entry at any given
- * time. That means an entry may be in use across several requests and
- * reference counting is needed to free the entry at the correct time. In
- * particular, an entry evicted from the cache will only be freed once all
- * references are dropped.
- *
- * An in-flight I/O request will hold a reference to a L2 table cache entry for
- * the period during which it needs to access the L2 table. This includes
- * cluster offset lookup, L2 table allocation, and L2 table update when a new
- * data cluster has been allocated.
- *
- * An interesting case occurs when two requests need to access an L2 table that
- * is not in the cache. Since the operation to read the table from the image
- * file takes some time to complete, both requests may see a cache miss and
- * start reading the L2 table from the image file. The first to finish will
- * commit its L2 table into the cache. When the second tries to commit its
- * table will be deleted in favor of the existing cache entry.
- */
-
-#include "trace.h"
-#include "qed.h"
-
-/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */
-#define MAX_L2_CACHE_SIZE 50
-
-/**
- * Initialize the L2 cache
- */
-void qed_init_l2_cache(L2TableCache *l2_cache)
-{
- QTAILQ_INIT(&l2_cache->entries);
- l2_cache->n_entries = 0;
-}
-
-/**
- * Free the L2 cache
- */
-void qed_free_l2_cache(L2TableCache *l2_cache)
-{
- CachedL2Table *entry, *next_entry;
-
- QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) {
- qemu_vfree(entry->table);
- g_free(entry);
- }
-}
-
-/**
- * Allocate an uninitialized entry from the cache
- *
- * The returned entry has a reference count of 1 and is owned by the caller.
- * The caller must allocate the actual table field for this entry and it must
- * be freeable using qemu_vfree().
- */
-CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
-{
- CachedL2Table *entry;
-
- entry = g_malloc0(sizeof(*entry));
- entry->ref++;
-
- trace_qed_alloc_l2_cache_entry(l2_cache, entry);
-
- return entry;
-}
-
-/**
- * Decrease an entry's reference count and free if necessary when the reference
- * count drops to zero.
- */
-void qed_unref_l2_cache_entry(CachedL2Table *entry)
-{
- if (!entry) {
- return;
- }
-
- entry->ref--;
- trace_qed_unref_l2_cache_entry(entry, entry->ref);
- if (entry->ref == 0) {
- qemu_vfree(entry->table);
- g_free(entry);
- }
-}
-
-/**
- * Find an entry in the L2 cache. This may return NULL and it's up to the
- * caller to satisfy the cache miss.
- *
- * For a cached entry, this function increases the reference count and returns
- * the entry.
- */
-CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
-{
- CachedL2Table *entry;
-
- QTAILQ_FOREACH(entry, &l2_cache->entries, node) {
- if (entry->offset == offset) {
- trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref);
- entry->ref++;
- return entry;
- }
- }
- return NULL;
-}
-
-/**
- * Commit an L2 cache entry into the cache. This is meant to be used as part of
- * the process to satisfy a cache miss. A caller would allocate an entry which
- * is not actually in the L2 cache and then once the entry was valid and
- * present on disk, the entry can be committed into the cache.
- *
- * Since the cache is write-through, it's important that this function is not
- * called until the entry is present on disk and the L1 has been updated to
- * point to the entry.
- *
- * N.B. This function steals a reference to the l2_table from the caller so the
- * caller must obtain a new reference by issuing a call to
- * qed_find_l2_cache_entry().
- */
-void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
-{
- CachedL2Table *entry;
-
- entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset);
- if (entry) {
- qed_unref_l2_cache_entry(entry);
- qed_unref_l2_cache_entry(l2_table);
- return;
- }
-
- /* Evict an unused cache entry so we have space. If all entries are in use
- * we can grow the cache temporarily and we try to shrink back down later.
- */
- if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
- CachedL2Table *next;
- QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
- if (entry->ref > 1) {
- continue;
- }
-
- QTAILQ_REMOVE(&l2_cache->entries, entry, node);
- l2_cache->n_entries--;
- qed_unref_l2_cache_entry(entry);
-
- /* Stop evicting when we've shrunk back to max size */
- if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
- break;
- }
- }
- }
-
- l2_cache->n_entries++;
- QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node);
-}
diff --git a/contrib/qemu/block/qed-table.c b/contrib/qemu/block/qed-table.c
deleted file mode 100644
index 76d2dcccf81..00000000000
--- a/contrib/qemu/block/qed-table.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Table I/O
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "trace.h"
-#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
-#include "qed.h"
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEDTable *table;
-
- struct iovec iov;
- QEMUIOVector qiov;
-} QEDReadTableCB;
-
-static void qed_read_table_cb(void *opaque, int ret)
-{
- QEDReadTableCB *read_table_cb = opaque;
- QEDTable *table = read_table_cb->table;
- int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
- int i;
-
- /* Handle I/O error */
- if (ret) {
- goto out;
- }
-
- /* Byteswap offsets */
- for (i = 0; i < noffsets; i++) {
- table->offsets[i] = le64_to_cpu(table->offsets[i]);
- }
-
-out:
- /* Completion */
- trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
- gencb_complete(&read_table_cb->gencb, ret);
-}
-
-static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
- cb, opaque);
- QEMUIOVector *qiov = &read_table_cb->qiov;
-
- trace_qed_read_table(s, offset, table);
-
- read_table_cb->s = s;
- read_table_cb->table = table;
- read_table_cb->iov.iov_base = table->offsets,
- read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
-
- qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
- bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
- qiov->size / BDRV_SECTOR_SIZE,
- qed_read_table_cb, read_table_cb);
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEDTable *orig_table;
- QEDTable *table;
- bool flush; /* flush after write? */
-
- struct iovec iov;
- QEMUIOVector qiov;
-} QEDWriteTableCB;
-
-static void qed_write_table_cb(void *opaque, int ret)
-{
- QEDWriteTableCB *write_table_cb = opaque;
-
- trace_qed_write_table_cb(write_table_cb->s,
- write_table_cb->orig_table,
- write_table_cb->flush,
- ret);
-
- if (ret) {
- goto out;
- }
-
- if (write_table_cb->flush) {
- /* We still need to flush first */
- write_table_cb->flush = false;
- bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
- write_table_cb);
- return;
- }
-
-out:
- qemu_vfree(write_table_cb->table);
- gencb_complete(&write_table_cb->gencb, ret);
-}
-
-/**
- * Write out an updated part or all of a table
- *
- * @s: QED state
- * @offset: Offset of table in image file, in bytes
- * @table: Table
- * @index: Index of first element
- * @n: Number of elements
- * @flush: Whether or not to sync to disk
- * @cb: Completion function
- * @opaque: Argument for completion function
- */
-static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDWriteTableCB *write_table_cb;
- unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
- unsigned int start, end, i;
- size_t len_bytes;
-
- trace_qed_write_table(s, offset, table, index, n);
-
- /* Calculate indices of the first and one after last elements */
- start = index & ~sector_mask;
- end = (index + n + sector_mask) & ~sector_mask;
-
- len_bytes = (end - start) * sizeof(uint64_t);
-
- write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
- write_table_cb->s = s;
- write_table_cb->orig_table = table;
- write_table_cb->flush = flush;
- write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
- write_table_cb->iov.iov_base = write_table_cb->table->offsets;
- write_table_cb->iov.iov_len = len_bytes;
- qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
-
- /* Byteswap table */
- for (i = start; i < end; i++) {
- uint64_t le_offset = cpu_to_le64(table->offsets[i]);
- write_table_cb->table->offsets[i - start] = le_offset;
- }
-
- /* Adjust for offset into table */
- offset += start * sizeof(uint64_t);
-
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
- &write_table_cb->qiov,
- write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
- qed_write_table_cb, write_table_cb);
-}
-
-/**
- * Propagate return value from async callback
- */
-static void qed_sync_cb(void *opaque, int ret)
-{
- *(int *)opaque = ret;
-}
-
-int qed_read_l1_table_sync(BDRVQEDState *s)
-{
- int ret = -EINPROGRESS;
-
- qed_read_table(s, s->header.l1_table_offset,
- s->l1_table, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
- qed_write_table(s, s->header.l1_table_offset,
- s->l1_table, index, n, false, cb, opaque);
-}
-
-int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
- unsigned int n)
-{
- int ret = -EINPROGRESS;
-
- qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- uint64_t l2_offset;
- QEDRequest *request;
-} QEDReadL2TableCB;
-
-static void qed_read_l2_table_cb(void *opaque, int ret)
-{
- QEDReadL2TableCB *read_l2_table_cb = opaque;
- QEDRequest *request = read_l2_table_cb->request;
- BDRVQEDState *s = read_l2_table_cb->s;
- CachedL2Table *l2_table = request->l2_table;
- uint64_t l2_offset = read_l2_table_cb->l2_offset;
-
- if (ret) {
- /* can't trust loaded L2 table anymore */
- qed_unref_l2_cache_entry(l2_table);
- request->l2_table = NULL;
- } else {
- l2_table->offset = l2_offset;
-
- qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
- /* This is guaranteed to succeed because we just committed the entry
- * to the cache.
- */
- request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
- assert(request->l2_table != NULL);
- }
-
- gencb_complete(&read_l2_table_cb->gencb, ret);
-}
-
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDReadL2TableCB *read_l2_table_cb;
-
- qed_unref_l2_cache_entry(request->l2_table);
-
- /* Check for cached L2 entry */
- request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
- if (request->l2_table) {
- cb(opaque, 0);
- return;
- }
-
- request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
- request->l2_table->table = qed_alloc_table(s);
-
- read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
- read_l2_table_cb->s = s;
- read_l2_table_cb->l2_offset = offset;
- read_l2_table_cb->request = request;
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
- qed_read_table(s, offset, request->l2_table->table,
- qed_read_l2_table_cb, read_l2_table_cb);
-}
-
-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
-{
- int ret = -EINPROGRESS;
-
- qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
- qed_write_table(s, request->l2_table->offset,
- request->l2_table->table, index, n, flush, cb, opaque);
-}
-
-int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush)
-{
- int ret = -EINPROGRESS;
-
- qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
diff --git a/contrib/qemu/block/qed.c b/contrib/qemu/block/qed.c
deleted file mode 100644
index f767b0528ce..00000000000
--- a/contrib/qemu/block/qed.c
+++ /dev/null
@@ -1,1596 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/timer.h"
-#include "trace.h"
-#include "qed.h"
-#include "qapi/qmp/qerror.h"
-#include "migration/migration.h"
-
-static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- QEDAIOCB *acb = (QEDAIOCB *)blockacb;
- bool finished = false;
-
- /* Wait for the request to finish */
- acb->finished = &finished;
- while (!finished) {
- qemu_aio_wait();
- }
-}
-
-static const AIOCBInfo qed_aiocb_info = {
- .aiocb_size = sizeof(QEDAIOCB),
- .cancel = qed_aio_cancel,
-};
-
-static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
- const char *filename)
-{
- const QEDHeader *header = (const QEDHeader *)buf;
-
- if (buf_size < sizeof(*header)) {
- return 0;
- }
- if (le32_to_cpu(header->magic) != QED_MAGIC) {
- return 0;
- }
- return 100;
-}
-
-/**
- * Check whether an image format is raw
- *
- * @fmt: Backing file format, may be NULL
- */
-static bool qed_fmt_is_raw(const char *fmt)
-{
- return fmt && strcmp(fmt, "raw") == 0;
-}
-
-static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
-{
- cpu->magic = le32_to_cpu(le->magic);
- cpu->cluster_size = le32_to_cpu(le->cluster_size);
- cpu->table_size = le32_to_cpu(le->table_size);
- cpu->header_size = le32_to_cpu(le->header_size);
- cpu->features = le64_to_cpu(le->features);
- cpu->compat_features = le64_to_cpu(le->compat_features);
- cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
- cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
- cpu->image_size = le64_to_cpu(le->image_size);
- cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
- cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
-}
-
-static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
-{
- le->magic = cpu_to_le32(cpu->magic);
- le->cluster_size = cpu_to_le32(cpu->cluster_size);
- le->table_size = cpu_to_le32(cpu->table_size);
- le->header_size = cpu_to_le32(cpu->header_size);
- le->features = cpu_to_le64(cpu->features);
- le->compat_features = cpu_to_le64(cpu->compat_features);
- le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
- le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
- le->image_size = cpu_to_le64(cpu->image_size);
- le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
- le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
-}
-
-int qed_write_header_sync(BDRVQEDState *s)
-{
- QEDHeader le;
- int ret;
-
- qed_header_cpu_to_le(&s->header, &le);
- ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
- if (ret != sizeof(le)) {
- return ret;
- }
- return 0;
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- struct iovec iov;
- QEMUIOVector qiov;
- int nsectors;
- uint8_t *buf;
-} QEDWriteHeaderCB;
-
-static void qed_write_header_cb(void *opaque, int ret)
-{
- QEDWriteHeaderCB *write_header_cb = opaque;
-
- qemu_vfree(write_header_cb->buf);
- gencb_complete(write_header_cb, ret);
-}
-
-static void qed_write_header_read_cb(void *opaque, int ret)
-{
- QEDWriteHeaderCB *write_header_cb = opaque;
- BDRVQEDState *s = write_header_cb->s;
-
- if (ret) {
- qed_write_header_cb(write_header_cb, ret);
- return;
- }
-
- /* Update header */
- qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
-
- bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
- write_header_cb->nsectors, qed_write_header_cb,
- write_header_cb);
-}
-
-/**
- * Update header in-place (does not rewrite backing filename or other strings)
- *
- * This function only updates known header fields in-place and does not affect
- * extra data after the QED header.
- */
-static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
- void *opaque)
-{
- /* We must write full sectors for O_DIRECT but cannot necessarily generate
- * the data following the header if an unrecognized compat feature is
- * active. Therefore, first read the sectors containing the header, update
- * them, and write back.
- */
-
- int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
- BDRV_SECTOR_SIZE;
- size_t len = nsectors * BDRV_SECTOR_SIZE;
- QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
- cb, opaque);
-
- write_header_cb->s = s;
- write_header_cb->nsectors = nsectors;
- write_header_cb->buf = qemu_blockalign(s->bs, len);
- write_header_cb->iov.iov_base = write_header_cb->buf;
- write_header_cb->iov.iov_len = len;
- qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
-
- bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
- qed_write_header_read_cb, write_header_cb);
-}
-
-static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
-{
- uint64_t table_entries;
- uint64_t l2_size;
-
- table_entries = (table_size * cluster_size) / sizeof(uint64_t);
- l2_size = table_entries * cluster_size;
-
- return l2_size * table_entries;
-}
-
-static bool qed_is_cluster_size_valid(uint32_t cluster_size)
-{
- if (cluster_size < QED_MIN_CLUSTER_SIZE ||
- cluster_size > QED_MAX_CLUSTER_SIZE) {
- return false;
- }
- if (cluster_size & (cluster_size - 1)) {
- return false; /* not power of 2 */
- }
- return true;
-}
-
-static bool qed_is_table_size_valid(uint32_t table_size)
-{
- if (table_size < QED_MIN_TABLE_SIZE ||
- table_size > QED_MAX_TABLE_SIZE) {
- return false;
- }
- if (table_size & (table_size - 1)) {
- return false; /* not power of 2 */
- }
- return true;
-}
-
-static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
- uint32_t table_size)
-{
- if (image_size % BDRV_SECTOR_SIZE != 0) {
- return false; /* not multiple of sector size */
- }
- if (image_size > qed_max_image_size(cluster_size, table_size)) {
- return false; /* image is too large */
- }
- return true;
-}
-
-/**
- * Read a string of known length from the image file
- *
- * @file: Image file
- * @offset: File offset to start of string, in bytes
- * @n: String length in bytes
- * @buf: Destination buffer
- * @buflen: Destination buffer length in bytes
- * @ret: 0 on success, -errno on failure
- *
- * The string is NUL-terminated.
- */
-static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
- char *buf, size_t buflen)
-{
- int ret;
- if (n >= buflen) {
- return -EINVAL;
- }
- ret = bdrv_pread(file, offset, buf, n);
- if (ret < 0) {
- return ret;
- }
- buf[n] = '\0';
- return 0;
-}
-
-/**
- * Allocate new clusters
- *
- * @s: QED state
- * @n: Number of contiguous clusters to allocate
- * @ret: Offset of first allocated cluster
- *
- * This function only produces the offset where the new clusters should be
- * written. It updates BDRVQEDState but does not make any changes to the image
- * file.
- */
-static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
-{
- uint64_t offset = s->file_size;
- s->file_size += n * s->header.cluster_size;
- return offset;
-}
-
-QEDTable *qed_alloc_table(BDRVQEDState *s)
-{
- /* Honor O_DIRECT memory alignment requirements */
- return qemu_blockalign(s->bs,
- s->header.cluster_size * s->header.table_size);
-}
-
-/**
- * Allocate a new zeroed L2 table
- */
-static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
-{
- CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
-
- l2_table->table = qed_alloc_table(s);
- l2_table->offset = qed_alloc_clusters(s, s->header.table_size);
-
- memset(l2_table->table->offsets, 0,
- s->header.cluster_size * s->header.table_size);
- return l2_table;
-}
-
-static void qed_aio_next_io(void *opaque, int ret);
-
-static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
-{
- assert(!s->allocating_write_reqs_plugged);
-
- s->allocating_write_reqs_plugged = true;
-}
-
-static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
-{
- QEDAIOCB *acb;
-
- assert(s->allocating_write_reqs_plugged);
-
- s->allocating_write_reqs_plugged = false;
-
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
- if (acb) {
- qed_aio_next_io(acb, 0);
- }
-}
-
-static void qed_finish_clear_need_check(void *opaque, int ret)
-{
- /* Do nothing */
-}
-
-static void qed_flush_after_clear_need_check(void *opaque, int ret)
-{
- BDRVQEDState *s = opaque;
-
- bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
-
- /* No need to wait until flush completes */
- qed_unplug_allocating_write_reqs(s);
-}
-
-static void qed_clear_need_check(void *opaque, int ret)
-{
- BDRVQEDState *s = opaque;
-
- if (ret) {
- qed_unplug_allocating_write_reqs(s);
- return;
- }
-
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header(s, qed_flush_after_clear_need_check, s);
-}
-
-static void qed_need_check_timer_cb(void *opaque)
-{
- BDRVQEDState *s = opaque;
-
- /* The timer should only fire when allocating writes have drained */
- assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
-
- trace_qed_need_check_timer_cb(s);
-
- qed_plug_allocating_write_reqs(s);
-
- /* Ensure writes are on disk before clearing flag */
- bdrv_aio_flush(s->bs, qed_clear_need_check, s);
-}
-
-static void qed_start_need_check_timer(BDRVQEDState *s)
-{
- trace_qed_start_need_check_timer(s);
-
- /* Use vm_clock so we don't alter the image file while suspended for
- * migration.
- */
- qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
- get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
-}
-
-/* It's okay to call this multiple times or when no timer is started */
-static void qed_cancel_need_check_timer(BDRVQEDState *s)
-{
- trace_qed_cancel_need_check_timer(s);
- qemu_del_timer(s->need_check_timer);
-}
-
-static void bdrv_qed_rebind(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- s->bs = bs;
-}
-
-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQEDState *s = bs->opaque;
- QEDHeader le_header;
- int64_t file_size;
- int ret;
-
- s->bs = bs;
- QSIMPLEQ_INIT(&s->allocating_write_reqs);
-
- ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
- if (ret < 0) {
- return ret;
- }
- qed_header_le_to_cpu(&le_header, &s->header);
-
- if (s->header.magic != QED_MAGIC) {
- return -EMEDIUMTYPE;
- }
- if (s->header.features & ~QED_FEATURE_MASK) {
- /* image uses unsupported feature bits */
- char buf[64];
- snprintf(buf, sizeof(buf), "%" PRIx64,
- s->header.features & ~QED_FEATURE_MASK);
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "QED", buf);
- return -ENOTSUP;
- }
- if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
- return -EINVAL;
- }
-
- /* Round down file size to the last cluster */
- file_size = bdrv_getlength(bs->file);
- if (file_size < 0) {
- return file_size;
- }
- s->file_size = qed_start_of_cluster(s, file_size);
-
- if (!qed_is_table_size_valid(s->header.table_size)) {
- return -EINVAL;
- }
- if (!qed_is_image_size_valid(s->header.image_size,
- s->header.cluster_size,
- s->header.table_size)) {
- return -EINVAL;
- }
- if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
- return -EINVAL;
- }
-
- s->table_nelems = (s->header.cluster_size * s->header.table_size) /
- sizeof(uint64_t);
- s->l2_shift = ffs(s->header.cluster_size) - 1;
- s->l2_mask = s->table_nelems - 1;
- s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
-
- if ((s->header.features & QED_F_BACKING_FILE)) {
- if ((uint64_t)s->header.backing_filename_offset +
- s->header.backing_filename_size >
- s->header.cluster_size * s->header.header_size) {
- return -EINVAL;
- }
-
- ret = qed_read_string(bs->file, s->header.backing_filename_offset,
- s->header.backing_filename_size, bs->backing_file,
- sizeof(bs->backing_file));
- if (ret < 0) {
- return ret;
- }
-
- if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
- pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
- }
- }
-
- /* Reset unknown autoclear feature bits. This is a backwards
- * compatibility mechanism that allows images to be opened by older
- * programs, which "knock out" unknown feature bits. When an image is
- * opened by a newer program again it can detect that the autoclear
- * feature is no longer valid.
- */
- if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
- !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
- s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
-
- ret = qed_write_header_sync(s);
- if (ret) {
- return ret;
- }
-
- /* From here on only known autoclear feature bits are valid */
- bdrv_flush(bs->file);
- }
-
- s->l1_table = qed_alloc_table(s);
- qed_init_l2_cache(&s->l2_cache);
-
- ret = qed_read_l1_table_sync(s);
- if (ret) {
- goto out;
- }
-
- /* If image was not closed cleanly, check consistency */
- if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
- /* Read-only images cannot be fixed. There is no risk of corruption
- * since write operations are not possible. Therefore, allow
- * potentially inconsistent images to be opened read-only. This can
- * aid data recovery from an otherwise inconsistent image.
- */
- if (!bdrv_is_read_only(bs->file) &&
- !(flags & BDRV_O_INCOMING)) {
- BdrvCheckResult result = {0};
-
- ret = qed_check(s, &result, true);
- if (ret) {
- goto out;
- }
- }
- }
-
- s->need_check_timer = qemu_new_timer_ns(vm_clock,
- qed_need_check_timer_cb, s);
-
-out:
- if (ret) {
- qed_free_l2_cache(&s->l2_cache);
- qemu_vfree(s->l1_table);
- }
- return ret;
-}
-
-/* We have nothing to do for QED reopen, stubs just return
- * success */
-static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static void bdrv_qed_close(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
-
- qed_cancel_need_check_timer(s);
- qemu_free_timer(s->need_check_timer);
-
- /* Ensure writes reach stable storage */
- bdrv_flush(bs->file);
-
- /* Clean shutdown, no check required on next open */
- if (s->header.features & QED_F_NEED_CHECK) {
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header_sync(s);
- }
-
- qed_free_l2_cache(&s->l2_cache);
- qemu_vfree(s->l1_table);
-}
-
-static int qed_create(const char *filename, uint32_t cluster_size,
- uint64_t image_size, uint32_t table_size,
- const char *backing_file, const char *backing_fmt)
-{
- QEDHeader header = {
- .magic = QED_MAGIC,
- .cluster_size = cluster_size,
- .table_size = table_size,
- .header_size = 1,
- .features = 0,
- .compat_features = 0,
- .l1_table_offset = cluster_size,
- .image_size = image_size,
- };
- QEDHeader le_header;
- uint8_t *l1_table = NULL;
- size_t l1_size = header.cluster_size * header.table_size;
- int ret = 0;
- BlockDriverState *bs = NULL;
-
- ret = bdrv_create_file(filename, NULL);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB);
- if (ret < 0) {
- return ret;
- }
-
- /* File must start empty and grow, check truncate is supported */
- ret = bdrv_truncate(bs, 0);
- if (ret < 0) {
- goto out;
- }
-
- if (backing_file) {
- header.features |= QED_F_BACKING_FILE;
- header.backing_filename_offset = sizeof(le_header);
- header.backing_filename_size = strlen(backing_file);
-
- if (qed_fmt_is_raw(backing_fmt)) {
- header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
- }
- }
-
- qed_header_cpu_to_le(&header, &le_header);
- ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
- if (ret < 0) {
- goto out;
- }
- ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
- header.backing_filename_size);
- if (ret < 0) {
- goto out;
- }
-
- l1_table = g_malloc0(l1_size);
- ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
- if (ret < 0) {
- goto out;
- }
-
- ret = 0; /* success */
-out:
- g_free(l1_table);
- bdrv_delete(bs);
- return ret;
-}
-
-static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
-{
- uint64_t image_size = 0;
- uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
- uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
- const char *backing_file = NULL;
- const char *backing_fmt = NULL;
-
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- image_size = options->value.n;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
- backing_fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- cluster_size = options->value.n;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
- if (options->value.n) {
- table_size = options->value.n;
- }
- }
- options++;
- }
-
- if (!qed_is_cluster_size_valid(cluster_size)) {
- fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
- QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
- return -EINVAL;
- }
- if (!qed_is_table_size_valid(table_size)) {
- fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
- QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
- return -EINVAL;
- }
- if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
- fprintf(stderr, "QED image size must be a non-zero multiple of "
- "cluster size and less than %" PRIu64 " bytes\n",
- qed_max_image_size(cluster_size, table_size));
- return -EINVAL;
- }
-
- return qed_create(filename, cluster_size, image_size, table_size,
- backing_file, backing_fmt);
-}
-
-typedef struct {
- Coroutine *co;
- int is_allocated;
- int *pnum;
-} QEDIsAllocatedCB;
-
-static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
-{
- QEDIsAllocatedCB *cb = opaque;
- *cb->pnum = len / BDRV_SECTOR_SIZE;
- cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
- if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
- }
-}
-
-static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- BDRVQEDState *s = bs->opaque;
- uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
- size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
- QEDIsAllocatedCB cb = {
- .is_allocated = -1,
- .pnum = pnum,
- };
- QEDRequest request = { .l2_table = NULL };
-
- qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
-
- /* Now sleep if the callback wasn't invoked immediately */
- while (cb.is_allocated == -1) {
- cb.co = qemu_coroutine_self();
- qemu_coroutine_yield();
- }
-
- qed_unref_l2_cache_entry(request.l2_table);
-
- return cb.is_allocated;
-}
-
-static int bdrv_qed_make_empty(BlockDriverState *bs)
-{
- return -ENOTSUP;
-}
-
-static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
-{
- return acb->common.bs->opaque;
-}
-
-/**
- * Read from the backing file or zero-fill if no backing file
- *
- * @s: QED state
- * @pos: Byte position in device
- * @qiov: Destination I/O vector
- * @cb: Completion function
- * @opaque: User data for completion function
- *
- * This function reads qiov->size bytes starting at pos from the backing file.
- * If there is no backing file then zeroes are read.
- */
-static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
- QEMUIOVector *qiov,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- uint64_t backing_length = 0;
- size_t size;
-
- /* If there is a backing file, get its length. Treat the absence of a
- * backing file like a zero length backing file.
- */
- if (s->bs->backing_hd) {
- int64_t l = bdrv_getlength(s->bs->backing_hd);
- if (l < 0) {
- cb(opaque, l);
- return;
- }
- backing_length = l;
- }
-
- /* Zero all sectors if reading beyond the end of the backing file */
- if (pos >= backing_length ||
- pos + qiov->size > backing_length) {
- qemu_iovec_memset(qiov, 0, 0, qiov->size);
- }
-
- /* Complete now if there are no backing file sectors to read */
- if (pos >= backing_length) {
- cb(opaque, 0);
- return;
- }
-
- /* If the read straddles the end of the backing file, shorten it */
- size = MIN((uint64_t)backing_length - pos, qiov->size);
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
- bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
- qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEMUIOVector qiov;
- struct iovec iov;
- uint64_t offset;
-} CopyFromBackingFileCB;
-
-static void qed_copy_from_backing_file_cb(void *opaque, int ret)
-{
- CopyFromBackingFileCB *copy_cb = opaque;
- qemu_vfree(copy_cb->iov.iov_base);
- gencb_complete(&copy_cb->gencb, ret);
-}
-
-static void qed_copy_from_backing_file_write(void *opaque, int ret)
-{
- CopyFromBackingFileCB *copy_cb = opaque;
- BDRVQEDState *s = copy_cb->s;
-
- if (ret) {
- qed_copy_from_backing_file_cb(copy_cb, ret);
- return;
- }
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
- bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
- &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
- qed_copy_from_backing_file_cb, copy_cb);
-}
-
-/**
- * Copy data from backing file into the image
- *
- * @s: QED state
- * @pos: Byte position in device
- * @len: Number of bytes
- * @offset: Byte offset in image file
- * @cb: Completion function
- * @opaque: User data for completion function
- */
-static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
- uint64_t len, uint64_t offset,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- CopyFromBackingFileCB *copy_cb;
-
- /* Skip copy entirely if there is no work to do */
- if (len == 0) {
- cb(opaque, 0);
- return;
- }
-
- copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
- copy_cb->s = s;
- copy_cb->offset = offset;
- copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
- copy_cb->iov.iov_len = len;
- qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
-
- qed_read_backing_file(s, pos, &copy_cb->qiov,
- qed_copy_from_backing_file_write, copy_cb);
-}
-
-/**
- * Link one or more contiguous clusters into a table
- *
- * @s: QED state
- * @table: L2 table
- * @index: First cluster index
- * @n: Number of contiguous clusters
- * @cluster: First cluster offset
- *
- * The cluster offset may be an allocated byte offset in the image file, the
- * zero cluster marker, or the unallocated cluster marker.
- */
-static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
- unsigned int n, uint64_t cluster)
-{
- int i;
- for (i = index; i < index + n; i++) {
- table->offsets[i] = cluster;
- if (!qed_offset_is_unalloc_cluster(cluster) &&
- !qed_offset_is_zero_cluster(cluster)) {
- cluster += s->header.cluster_size;
- }
- }
-}
-
-static void qed_aio_complete_bh(void *opaque)
-{
- QEDAIOCB *acb = opaque;
- BlockDriverCompletionFunc *cb = acb->common.cb;
- void *user_opaque = acb->common.opaque;
- int ret = acb->bh_ret;
- bool *finished = acb->finished;
-
- qemu_bh_delete(acb->bh);
- qemu_aio_release(acb);
-
- /* Invoke callback */
- cb(user_opaque, ret);
-
- /* Signal cancel completion */
- if (finished) {
- *finished = true;
- }
-}
-
-static void qed_aio_complete(QEDAIOCB *acb, int ret)
-{
- BDRVQEDState *s = acb_to_s(acb);
-
- trace_qed_aio_complete(s, acb, ret);
-
- /* Free resources */
- qemu_iovec_destroy(&acb->cur_qiov);
- qed_unref_l2_cache_entry(acb->request.l2_table);
-
- /* Free the buffer we may have allocated for zero writes */
- if (acb->flags & QED_AIOCB_ZERO) {
- qemu_vfree(acb->qiov->iov[0].iov_base);
- acb->qiov->iov[0].iov_base = NULL;
- }
-
- /* Arrange for a bh to invoke the completion function */
- acb->bh_ret = ret;
- acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
- qemu_bh_schedule(acb->bh);
-
- /* Start next allocating write request waiting behind this one. Note that
- * requests enqueue themselves when they first hit an unallocated cluster
- * but they wait until the entire request is finished before waking up the
- * next request in the queue. This ensures that we don't cycle through
- * requests multiple times but rather finish one at a time completely.
- */
- if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
- QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
- if (acb) {
- qed_aio_next_io(acb, 0);
- } else if (s->header.features & QED_F_NEED_CHECK) {
- qed_start_need_check_timer(s);
- }
- }
-}
-
-/**
- * Commit the current L2 table to the cache
- */
-static void qed_commit_l2_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- CachedL2Table *l2_table = acb->request.l2_table;
- uint64_t l2_offset = l2_table->offset;
-
- qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
- /* This is guaranteed to succeed because we just committed the entry to the
- * cache.
- */
- acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
- assert(acb->request.l2_table != NULL);
-
- qed_aio_next_io(opaque, ret);
-}
-
-/**
- * Update L1 table with new L2 table offset and write it out
- */
-static void qed_aio_write_l1_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- int index;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- index = qed_l1_index(s, acb->cur_pos);
- s->l1_table->offsets[index] = acb->request.l2_table->offset;
-
- qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
-}
-
-/**
- * Update L2 table with new cluster offsets and write them out
- */
-static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
-{
- BDRVQEDState *s = acb_to_s(acb);
- bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
- int index;
-
- if (ret) {
- goto err;
- }
-
- if (need_alloc) {
- qed_unref_l2_cache_entry(acb->request.l2_table);
- acb->request.l2_table = qed_new_l2_table(s);
- }
-
- index = qed_l2_index(s, acb->cur_pos);
- qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
- offset);
-
- if (need_alloc) {
- /* Write out the whole new L2 table */
- qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
- qed_aio_write_l1_update, acb);
- } else {
- /* Write out only the updated part of the L2 table */
- qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
- qed_aio_next_io, acb);
- }
- return;
-
-err:
- qed_aio_complete(acb, ret);
-}
-
-static void qed_aio_write_l2_update_cb(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
-}
-
-/**
- * Flush new data clusters before updating the L2 table
- *
- * This flush is necessary when a backing file is in use. A crash during an
- * allocating write could result in empty clusters in the image. If the write
- * only touched a subregion of the cluster, then backing image sectors have
- * been lost in the untouched region. The solution is to flush after writing a
- * new data cluster and before updating the L2 table.
- */
-static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
-
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
- qed_aio_complete(acb, -EIO);
- }
-}
-
-/**
- * Write data to the image file
- */
-static void qed_aio_write_main(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t offset = acb->cur_cluster +
- qed_offset_into_cluster(s, acb->cur_pos);
- BlockDriverCompletionFunc *next_fn;
-
- trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
- next_fn = qed_aio_next_io;
- } else {
- if (s->bs->backing_hd) {
- next_fn = qed_aio_write_flush_before_l2_update;
- } else {
- next_fn = qed_aio_write_l2_update_cb;
- }
- }
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- next_fn, acb);
-}
-
-/**
- * Populate back untouched region of new data cluster
- */
-static void qed_aio_write_postfill(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t start = acb->cur_pos + acb->cur_qiov.size;
- uint64_t len =
- qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
- uint64_t offset = acb->cur_cluster +
- qed_offset_into_cluster(s, acb->cur_pos) +
- acb->cur_qiov.size;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- trace_qed_aio_write_postfill(s, acb, start, len, offset);
- qed_copy_from_backing_file(s, start, len, offset,
- qed_aio_write_main, acb);
-}
-
-/**
- * Populate front untouched region of new data cluster
- */
-static void qed_aio_write_prefill(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
- uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
-
- trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
- qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
- qed_aio_write_postfill, acb);
-}
-
-/**
- * Check if the QED_F_NEED_CHECK bit should be set during allocating write
- */
-static bool qed_should_set_need_check(BDRVQEDState *s)
-{
- /* The flush before L2 update path ensures consistency */
- if (s->bs->backing_hd) {
- return false;
- }
-
- return !(s->header.features & QED_F_NEED_CHECK);
-}
-
-static void qed_aio_write_zero_cluster(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- qed_aio_write_l2_update(acb, 0, 1);
-}
-
-/**
- * Write new data cluster
- *
- * @acb: Write request
- * @len: Length in bytes
- *
- * This path is taken when writing to previously unallocated clusters.
- */
-static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
-{
- BDRVQEDState *s = acb_to_s(acb);
- BlockDriverCompletionFunc *cb;
-
- /* Cancel timer when the first allocating request comes in */
- if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
- qed_cancel_need_check_timer(s);
- }
-
- /* Freeze this request if another allocating write is in progress */
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
- QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
- }
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
- s->allocating_write_reqs_plugged) {
- return; /* wait for existing request to finish */
- }
-
- acb->cur_nclusters = qed_bytes_to_clusters(s,
- qed_offset_into_cluster(s, acb->cur_pos) + len);
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- if (acb->flags & QED_AIOCB_ZERO) {
- /* Skip ahead if the clusters are already zero */
- if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
- qed_aio_next_io(acb, 0);
- return;
- }
-
- cb = qed_aio_write_zero_cluster;
- } else {
- cb = qed_aio_write_prefill;
- acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
- }
-
- if (qed_should_set_need_check(s)) {
- s->header.features |= QED_F_NEED_CHECK;
- qed_write_header(s, cb, acb);
- } else {
- cb(acb, 0);
- }
-}
-
-/**
- * Write data cluster in place
- *
- * @acb: Write request
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * This path is taken when writing to already allocated clusters.
- */
-static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
-{
- /* Allocate buffer for zero writes */
- if (acb->flags & QED_AIOCB_ZERO) {
- struct iovec *iov = acb->qiov->iov;
-
- if (!iov->iov_base) {
- iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
- memset(iov->iov_base, 0, iov->iov_len);
- }
- }
-
- /* Calculate the I/O vector */
- acb->cur_cluster = offset;
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- /* Do the actual write */
- qed_aio_write_main(acb, 0);
-}
-
-/**
- * Write data cluster
- *
- * @opaque: Write request
- * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- * or -errno
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * Callback from qed_find_cluster().
- */
-static void qed_aio_write_data(void *opaque, int ret,
- uint64_t offset, size_t len)
-{
- QEDAIOCB *acb = opaque;
-
- trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len);
-
- acb->find_cluster_ret = ret;
-
- switch (ret) {
- case QED_CLUSTER_FOUND:
- qed_aio_write_inplace(acb, offset, len);
- break;
-
- case QED_CLUSTER_L2:
- case QED_CLUSTER_L1:
- case QED_CLUSTER_ZERO:
- qed_aio_write_alloc(acb, len);
- break;
-
- default:
- qed_aio_complete(acb, ret);
- break;
- }
-}
-
-/**
- * Read data cluster
- *
- * @opaque: Read request
- * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- * or -errno
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * Callback from qed_find_cluster().
- */
-static void qed_aio_read_data(void *opaque, int ret,
- uint64_t offset, size_t len)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- BlockDriverState *bs = acb->common.bs;
-
- /* Adjust offset into cluster */
- offset += qed_offset_into_cluster(s, acb->cur_pos);
-
- trace_qed_aio_read_data(s, acb, ret, offset, len);
-
- if (ret < 0) {
- goto err;
- }
-
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- /* Handle zero cluster and backing file reads */
- if (ret == QED_CLUSTER_ZERO) {
- qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
- qed_aio_next_io(acb, 0);
- return;
- } else if (ret != QED_CLUSTER_FOUND) {
- qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
- qed_aio_next_io, acb);
- return;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- qed_aio_next_io, acb);
- return;
-
-err:
- qed_aio_complete(acb, ret);
-}
-
-/**
- * Begin next I/O or complete the request
- */
-static void qed_aio_next_io(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
- qed_aio_write_data : qed_aio_read_data;
-
- trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
-
- /* Handle I/O error */
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- acb->qiov_offset += acb->cur_qiov.size;
- acb->cur_pos += acb->cur_qiov.size;
- qemu_iovec_reset(&acb->cur_qiov);
-
- /* Complete request */
- if (acb->cur_pos >= acb->end_pos) {
- qed_aio_complete(acb, 0);
- return;
- }
-
- /* Find next cluster and start I/O */
- qed_find_cluster(s, &acb->request,
- acb->cur_pos, acb->end_pos - acb->cur_pos,
- io_fn, acb);
-}
-
-static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque, int flags)
-{
- QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
-
- trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
- opaque, flags);
-
- acb->flags = flags;
- acb->finished = NULL;
- acb->qiov = qiov;
- acb->qiov_offset = 0;
- acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
- acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
- acb->request.l2_table = NULL;
- qemu_iovec_init(&acb->cur_qiov, qiov->niov);
-
- /* Start request */
- qed_aio_next_io(acb, 0);
- return &acb->common;
-}
-
-static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
- opaque, QED_AIOCB_WRITE);
-}
-
-typedef struct {
- Coroutine *co;
- int ret;
- bool done;
-} QEDWriteZeroesCB;
-
-static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
-{
- QEDWriteZeroesCB *cb = opaque;
-
- cb->done = true;
- cb->ret = ret;
- if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
- }
-}
-
-static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors)
-{
- BlockDriverAIOCB *blockacb;
- BDRVQEDState *s = bs->opaque;
- QEDWriteZeroesCB cb = { .done = false };
- QEMUIOVector qiov;
- struct iovec iov;
-
- /* Refuse if there are untouched backing file sectors */
- if (bs->backing_hd) {
- if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
- if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
- }
-
- /* Zero writes start without an I/O buffer. If a buffer becomes necessary
- * then it will be allocated during request processing.
- */
- iov.iov_base = NULL,
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
- qed_co_write_zeroes_cb, &cb,
- QED_AIOCB_WRITE | QED_AIOCB_ZERO);
- if (!blockacb) {
- return -EIO;
- }
- if (!cb.done) {
- cb.co = qemu_coroutine_self();
- qemu_coroutine_yield();
- }
- assert(cb.done);
- return cb.ret;
-}
-
-static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
-{
- BDRVQEDState *s = bs->opaque;
- uint64_t old_image_size;
- int ret;
-
- if (!qed_is_image_size_valid(offset, s->header.cluster_size,
- s->header.table_size)) {
- return -EINVAL;
- }
-
- /* Shrinking is currently not supported */
- if ((uint64_t)offset < s->header.image_size) {
- return -ENOTSUP;
- }
-
- old_image_size = s->header.image_size;
- s->header.image_size = offset;
- ret = qed_write_header_sync(s);
- if (ret < 0) {
- s->header.image_size = old_image_size;
- }
- return ret;
-}
-
-static int64_t bdrv_qed_getlength(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- return s->header.image_size;
-}
-
-static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQEDState *s = bs->opaque;
-
- memset(bdi, 0, sizeof(*bdi));
- bdi->cluster_size = s->header.cluster_size;
- bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
- return 0;
-}
-
-static int bdrv_qed_change_backing_file(BlockDriverState *bs,
- const char *backing_file,
- const char *backing_fmt)
-{
- BDRVQEDState *s = bs->opaque;
- QEDHeader new_header, le_header;
- void *buffer;
- size_t buffer_len, backing_file_len;
- int ret;
-
- /* Refuse to set backing filename if unknown compat feature bits are
- * active. If the image uses an unknown compat feature then we may not
- * know the layout of data following the header structure and cannot safely
- * add a new string.
- */
- if (backing_file && (s->header.compat_features &
- ~QED_COMPAT_FEATURE_MASK)) {
- return -ENOTSUP;
- }
-
- memcpy(&new_header, &s->header, sizeof(new_header));
-
- new_header.features &= ~(QED_F_BACKING_FILE |
- QED_F_BACKING_FORMAT_NO_PROBE);
-
- /* Adjust feature flags */
- if (backing_file) {
- new_header.features |= QED_F_BACKING_FILE;
-
- if (qed_fmt_is_raw(backing_fmt)) {
- new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
- }
- }
-
- /* Calculate new header size */
- backing_file_len = 0;
-
- if (backing_file) {
- backing_file_len = strlen(backing_file);
- }
-
- buffer_len = sizeof(new_header);
- new_header.backing_filename_offset = buffer_len;
- new_header.backing_filename_size = backing_file_len;
- buffer_len += backing_file_len;
-
- /* Make sure we can rewrite header without failing */
- if (buffer_len > new_header.header_size * new_header.cluster_size) {
- return -ENOSPC;
- }
-
- /* Prepare new header */
- buffer = g_malloc(buffer_len);
-
- qed_header_cpu_to_le(&new_header, &le_header);
- memcpy(buffer, &le_header, sizeof(le_header));
- buffer_len = sizeof(le_header);
-
- if (backing_file) {
- memcpy(buffer + buffer_len, backing_file, backing_file_len);
- buffer_len += backing_file_len;
- }
-
- /* Write new header */
- ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
- g_free(buffer);
- if (ret == 0) {
- memcpy(&s->header, &new_header, sizeof(new_header));
- }
- return ret;
-}
-
-static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
-
- bdrv_qed_close(bs);
- memset(s, 0, sizeof(BDRVQEDState));
- bdrv_qed_open(bs, NULL, bs->open_flags);
-}
-
-static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
- BdrvCheckMode fix)
-{
- BDRVQEDState *s = bs->opaque;
-
- return qed_check(s, result, !!fix);
-}
-
-static QEMUOptionParameter qed_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size (in bytes)"
- }, {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- }, {
- .name = BLOCK_OPT_BACKING_FMT,
- .type = OPT_STRING,
- .help = "Image format of the base image"
- }, {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "Cluster size (in bytes)",
- .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
- }, {
- .name = BLOCK_OPT_TABLE_SIZE,
- .type = OPT_SIZE,
- .help = "L1/L2 table size (in clusters)"
- },
- { /* end of list */ }
-};
-
-static BlockDriver bdrv_qed = {
- .format_name = "qed",
- .instance_size = sizeof(BDRVQEDState),
- .create_options = qed_create_options,
-
- .bdrv_probe = bdrv_qed_probe,
- .bdrv_rebind = bdrv_qed_rebind,
- .bdrv_open = bdrv_qed_open,
- .bdrv_close = bdrv_qed_close,
- .bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
- .bdrv_create = bdrv_qed_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
- .bdrv_make_empty = bdrv_qed_make_empty,
- .bdrv_aio_readv = bdrv_qed_aio_readv,
- .bdrv_aio_writev = bdrv_qed_aio_writev,
- .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
- .bdrv_truncate = bdrv_qed_truncate,
- .bdrv_getlength = bdrv_qed_getlength,
- .bdrv_get_info = bdrv_qed_get_info,
- .bdrv_change_backing_file = bdrv_qed_change_backing_file,
- .bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
- .bdrv_check = bdrv_qed_check,
-};
-
-static void bdrv_qed_init(void)
-{
- bdrv_register(&bdrv_qed);
-}
-
-block_init(bdrv_qed_init);
diff --git a/contrib/qemu/block/qed.h b/contrib/qemu/block/qed.h
deleted file mode 100644
index 2b4ddedf313..00000000000
--- a/contrib/qemu/block/qed.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef BLOCK_QED_H
-#define BLOCK_QED_H
-
-#include "block/block_int.h"
-
-/* The layout of a QED file is as follows:
- *
- * +--------+----------+----------+----------+-----+
- * | header | L1 table | cluster0 | cluster1 | ... |
- * +--------+----------+----------+----------+-----+
- *
- * There is a 2-level pagetable for cluster allocation:
- *
- * +----------+
- * | L1 table |
- * +----------+
- * ,------' | '------.
- * +----------+ | +----------+
- * | L2 table | ... | L2 table |
- * +----------+ +----------+
- * ,------' | '------.
- * +----------+ | +----------+
- * | Data | ... | Data |
- * +----------+ +----------+
- *
- * The L1 table is fixed size and always present. L2 tables are allocated on
- * demand. The L1 table size determines the maximum possible image size; it
- * can be influenced using the cluster_size and table_size values.
- *
- * All fields are little-endian on disk.
- */
-
-enum {
- QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
-
- /* The image supports a backing file */
- QED_F_BACKING_FILE = 0x01,
-
- /* The image needs a consistency check before use */
- QED_F_NEED_CHECK = 0x02,
-
- /* The backing file format must not be probed, treat as raw image */
- QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
-
- /* Feature bits must be used when the on-disk format changes */
- QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
- QED_F_NEED_CHECK |
- QED_F_BACKING_FORMAT_NO_PROBE,
- QED_COMPAT_FEATURE_MASK = 0, /* supported compat feature bits */
- QED_AUTOCLEAR_FEATURE_MASK = 0, /* supported autoclear feature bits */
-
- /* Data is stored in groups of sectors called clusters. Cluster size must
- * be large to avoid keeping too much metadata. I/O requests that have
- * sub-cluster size will require read-modify-write.
- */
- QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
- QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
- QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
-
- /* Allocated clusters are tracked using a 2-level pagetable. Table size is
- * a multiple of clusters so large maximum image sizes can be supported
- * without jacking up the cluster size too much.
- */
- QED_MIN_TABLE_SIZE = 1, /* in clusters */
- QED_MAX_TABLE_SIZE = 16,
- QED_DEFAULT_TABLE_SIZE = 4,
-
- /* Delay to flush and clean image after last allocating write completes */
- QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */
-};
-
-typedef struct {
- uint32_t magic; /* QED\0 */
-
- uint32_t cluster_size; /* in bytes */
- uint32_t table_size; /* for L1 and L2 tables, in clusters */
- uint32_t header_size; /* in clusters */
-
- uint64_t features; /* format feature bits */
- uint64_t compat_features; /* compatible feature bits */
- uint64_t autoclear_features; /* self-resetting feature bits */
-
- uint64_t l1_table_offset; /* in bytes */
- uint64_t image_size; /* total logical image size, in bytes */
-
- /* if (features & QED_F_BACKING_FILE) */
- uint32_t backing_filename_offset; /* in bytes from start of header */
- uint32_t backing_filename_size; /* in bytes */
-} QEDHeader;
-
-typedef struct {
- uint64_t offsets[0]; /* in bytes */
-} QEDTable;
-
-/* The L2 cache is a simple write-through cache for L2 structures */
-typedef struct CachedL2Table {
- QEDTable *table;
- uint64_t offset; /* offset=0 indicates an invalidate entry */
- QTAILQ_ENTRY(CachedL2Table) node;
- int ref;
-} CachedL2Table;
-
-typedef struct {
- QTAILQ_HEAD(, CachedL2Table) entries;
- unsigned int n_entries;
-} L2TableCache;
-
-typedef struct QEDRequest {
- CachedL2Table *l2_table;
-} QEDRequest;
-
-enum {
- QED_AIOCB_WRITE = 0x0001, /* read or write? */
- QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */
-};
-
-typedef struct QEDAIOCB {
- BlockDriverAIOCB common;
- QEMUBH *bh;
- int bh_ret; /* final return status for completion bh */
- QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
- int flags; /* QED_AIOCB_* bits ORed together */
- bool *finished; /* signal for cancel completion */
- uint64_t end_pos; /* request end on block device, in bytes */
-
- /* User scatter-gather list */
- QEMUIOVector *qiov;
- size_t qiov_offset; /* byte count already processed */
-
- /* Current cluster scatter-gather list */
- QEMUIOVector cur_qiov;
- uint64_t cur_pos; /* position on block device, in bytes */
- uint64_t cur_cluster; /* cluster offset in image file */
- unsigned int cur_nclusters; /* number of clusters being accessed */
- int find_cluster_ret; /* used for L1/L2 update */
-
- QEDRequest request;
-} QEDAIOCB;
-
-typedef struct {
- BlockDriverState *bs; /* device */
- uint64_t file_size; /* length of image file, in bytes */
-
- QEDHeader header; /* always cpu-endian */
- QEDTable *l1_table;
- L2TableCache l2_cache; /* l2 table cache */
- uint32_t table_nelems;
- uint32_t l1_shift;
- uint32_t l2_shift;
- uint32_t l2_mask;
-
- /* Allocating write request queue */
- QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
- bool allocating_write_reqs_plugged;
-
- /* Periodic flush and clear need check flag */
- QEMUTimer *need_check_timer;
-} BDRVQEDState;
-
-enum {
- QED_CLUSTER_FOUND, /* cluster found */
- QED_CLUSTER_ZERO, /* zero cluster found */
- QED_CLUSTER_L2, /* cluster missing in L2 */
- QED_CLUSTER_L1, /* cluster missing in L1 */
-};
-
-/**
- * qed_find_cluster() completion callback
- *
- * @opaque: User data for completion callback
- * @ret: QED_CLUSTER_FOUND Success
- * QED_CLUSTER_L2 Data cluster unallocated in L2
- * QED_CLUSTER_L1 L2 unallocated in L1
- * -errno POSIX error occurred
- * @offset: Data cluster offset
- * @len: Contiguous bytes starting from cluster offset
- *
- * This function is invoked when qed_find_cluster() completes.
- *
- * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
- * in the image file.
- *
- * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
- * table offset, respectively. len is number of contiguous unallocated bytes.
- */
-typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
-
-/**
- * Generic callback for chaining async callbacks
- */
-typedef struct {
- BlockDriverCompletionFunc *cb;
- void *opaque;
-} GenericCB;
-
-void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
-void gencb_complete(void *opaque, int ret);
-
-/**
- * Header functions
- */
-int qed_write_header_sync(BDRVQEDState *s);
-
-/**
- * L2 cache functions
- */
-void qed_init_l2_cache(L2TableCache *l2_cache);
-void qed_free_l2_cache(L2TableCache *l2_cache);
-CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
-void qed_unref_l2_cache_entry(CachedL2Table *entry);
-CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
-void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
-
-/**
- * Table I/O functions
- */
-int qed_read_l1_table_sync(BDRVQEDState *s);
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
- BlockDriverCompletionFunc *cb, void *opaque);
-int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
- unsigned int n);
-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- uint64_t offset);
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
- BlockDriverCompletionFunc *cb, void *opaque);
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque);
-int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush);
-
-/**
- * Cluster functions
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
- size_t len, QEDFindClusterFunc *cb, void *opaque);
-
-/**
- * Consistency check
- */
-int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
-
-QEDTable *qed_alloc_table(BDRVQEDState *s);
-
-/**
- * Round down to the start of a cluster
- */
-static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
-{
- return offset & ~(uint64_t)(s->header.cluster_size - 1);
-}
-
-static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
-{
- return offset & (s->header.cluster_size - 1);
-}
-
-static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
-{
- return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
- (s->header.cluster_size - 1);
-}
-
-static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
-{
- return pos >> s->l1_shift;
-}
-
-static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
-{
- return (pos >> s->l2_shift) & s->l2_mask;
-}
-
-/**
- * Test if a cluster offset is valid
- */
-static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
-{
- uint64_t header_size = (uint64_t)s->header.header_size *
- s->header.cluster_size;
-
- if (offset & (s->header.cluster_size - 1)) {
- return false;
- }
- return offset >= header_size && offset < s->file_size;
-}
-
-/**
- * Test if a table offset is valid
- */
-static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
-{
- uint64_t end_offset = offset + (s->header.table_size - 1) *
- s->header.cluster_size;
-
- /* Overflow check */
- if (end_offset <= offset) {
- return false;
- }
-
- return qed_check_cluster_offset(s, offset) &&
- qed_check_cluster_offset(s, end_offset);
-}
-
-static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
- uint64_t offset)
-{
- if (qed_offset_into_cluster(s, offset)) {
- return false;
- }
- return true;
-}
-
-static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
-{
- if (offset == 0) {
- return true;
- }
- return false;
-}
-
-static inline bool qed_offset_is_zero_cluster(uint64_t offset)
-{
- if (offset == 1) {
- return true;
- }
- return false;
-}
-
-#endif /* BLOCK_QED_H */
diff --git a/contrib/qemu/block/snapshot.c b/contrib/qemu/block/snapshot.c
deleted file mode 100644
index 6c6d9deea1f..00000000000
--- a/contrib/qemu/block/snapshot.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Block layer snapshot related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/snapshot.h"
-#include "block/block_int.h"
-
-int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
- const char *name)
-{
- QEMUSnapshotInfo *sn_tab, *sn;
- int nb_sns, i, ret;
-
- ret = -ENOENT;
- nb_sns = bdrv_snapshot_list(bs, &sn_tab);
- if (nb_sns < 0) {
- return ret;
- }
- for (i = 0; i < nb_sns; i++) {
- sn = &sn_tab[i];
- if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
- *sn_info = *sn;
- ret = 0;
- break;
- }
- }
- g_free(sn_tab);
- return ret;
-}
-
-int bdrv_can_snapshot(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
- return 0;
- }
-
- if (!drv->bdrv_snapshot_create) {
- if (bs->file != NULL) {
- return bdrv_can_snapshot(bs->file);
- }
- return 0;
- }
-
- return 1;
-}
-
-int bdrv_snapshot_create(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_create) {
- return drv->bdrv_snapshot_create(bs, sn_info);
- }
- if (bs->file) {
- return bdrv_snapshot_create(bs->file, sn_info);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_goto(BlockDriverState *bs,
- const char *snapshot_id)
-{
- BlockDriver *drv = bs->drv;
- int ret, open_ret;
-
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_goto) {
- return drv->bdrv_snapshot_goto(bs, snapshot_id);
- }
-
- if (bs->file) {
- drv->bdrv_close(bs);
- ret = bdrv_snapshot_goto(bs->file, snapshot_id);
- open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
- if (open_ret < 0) {
- bdrv_delete(bs->file);
- bs->drv = NULL;
- return open_ret;
- }
- return ret;
- }
-
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_delete) {
- return drv->bdrv_snapshot_delete(bs, snapshot_id);
- }
- if (bs->file) {
- return bdrv_snapshot_delete(bs->file, snapshot_id);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_list(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_list) {
- return drv->bdrv_snapshot_list(bs, psn_info);
- }
- if (bs->file) {
- return bdrv_snapshot_list(bs->file, psn_info);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_load_tmp(BlockDriverState *bs,
- const char *snapshot_name)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (!bs->read_only) {
- return -EINVAL;
- }
- if (drv->bdrv_snapshot_load_tmp) {
- return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
- }
- return -ENOTSUP;
-}
diff --git a/contrib/qemu/config-host.h b/contrib/qemu/config-host.h
deleted file mode 100644
index 6b5c8da1243..00000000000
--- a/contrib/qemu/config-host.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* Automatically generated by create_config - do not modify */
-#define CONFIG_QEMU_CONFDIR "/usr/local/etc/qemu"
-#define CONFIG_QEMU_DATADIR "/usr/local/share/qemu"
-#define CONFIG_QEMU_DOCDIR "/usr/local/share/doc/qemu"
-#define CONFIG_QEMU_LOCALSTATEDIR "/usr/local/var"
-#define CONFIG_QEMU_HELPERDIR "/usr/local/libexec"
-#define CONFIG_QEMU_LOCALEDIR "/usr/local/share/locale"
-#define HOST_X86_64 1
-#define CONFIG_QEMU_LDST_OPTIMIZATION 1
-#define CONFIG_POSIX 1
-#define CONFIG_LINUX 1
-#define CONFIG_SLIRP 1
-#define CONFIG_SMBD_COMMAND "/usr/sbin/smbd"
-#define CONFIG_AUDIO_DRIVERS \
- &oss_audio_driver,\
-
-#define CONFIG_OSS 1
-#define CONFIG_BDRV_RW_WHITELIST\
- NULL
-#define CONFIG_BDRV_RO_WHITELIST\
- NULL
-#define CONFIG_VNC 1
-#define CONFIG_VNC_TLS 1
-#define CONFIG_VNC_SASL 1
-#define CONFIG_VNC_WS 1
-#define CONFIG_FNMATCH 1
-#define CONFIG_UUID 1
-#define CONFIG_XFS 1
-#define QEMU_VERSION "1.5.50"
-#define QEMU_PKGVERSION ""
-#define CONFIG_CURSES 1
-#define CONFIG_UTIMENSAT 1
-#define CONFIG_PIPE2 1
-#define CONFIG_ACCEPT4 1
-#define CONFIG_SPLICE 1
-#define CONFIG_EVENTFD 1
-#define CONFIG_FALLOCATE 1
-#define CONFIG_FALLOCATE_PUNCH_HOLE 1
-#define CONFIG_SYNC_FILE_RANGE 1
-#define CONFIG_FIEMAP 1
-#define CONFIG_DUP3 1
-#define CONFIG_EPOLL 1
-#define CONFIG_EPOLL_CREATE1 1
-#define CONFIG_EPOLL_PWAIT 1
-#define CONFIG_SENDFILE 1
-#define CONFIG_INOTIFY 1
-#define CONFIG_INOTIFY1 1
-#define CONFIG_BYTESWAP_H 1
-#define CONFIG_CURL 1
-#define CONFIG_LINUX_AIO 1
-#define CONFIG_ATTR 1
-#define CONFIG_VHOST_SCSI 1
-#define CONFIG_IOVEC 1
-#define CONFIG_PREADV 1
-#define CONFIG_FDT 1
-#define CONFIG_SIGNALFD 1
-#define CONFIG_FDATASYNC 1
-#define CONFIG_MADVISE 1
-#define CONFIG_POSIX_MADVISE 1
-#define CONFIG_SIGEV_THREAD_ID 1
-#define CONFIG_UNAME_RELEASE ""
-#define CONFIG_QOM_CAST_DEBUG 1
-#define CONFIG_COROUTINE_BACKEND ucontext
-#define CONFIG_OPEN_BY_HANDLE 1
-#define CONFIG_LINUX_MAGIC_H 1
-#define CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE 1
-#define CONFIG_HAS_ENVIRON 1
-#define CONFIG_CPUID_H 1
-#define CONFIG_VIRTIO_BLK_DATA_PLANE $(CONFIG_VIRTIO)
-#define CONFIG_TRACE_NOP 1
-#define CONFIG_TRACE_FILE trace
-#define CONFIG_TRACE_DEFAULT 1
diff --git a/contrib/qemu/coroutine-ucontext.c b/contrib/qemu/coroutine-ucontext.c
deleted file mode 100644
index 4bf2cde279b..00000000000
--- a/contrib/qemu/coroutine-ucontext.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * ucontext coroutine initialization code
- *
- * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
- * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.0 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
-#ifdef _FORTIFY_SOURCE
-#undef _FORTIFY_SOURCE
-#endif
-#include <stdlib.h>
-#include <setjmp.h>
-#include <stdint.h>
-#include <pthread.h>
-#include <ucontext.h>
-#include "qemu-common.h"
-#include "block/coroutine_int.h"
-
-#ifdef CONFIG_VALGRIND_H
-#include <valgrind/valgrind.h>
-#endif
-
-typedef struct {
- Coroutine base;
- void *stack;
- sigjmp_buf env;
-
-#ifdef CONFIG_VALGRIND_H
- unsigned int valgrind_stack_id;
-#endif
-
-} CoroutineUContext;
-
-/**
- * Per-thread coroutine bookkeeping
- */
-typedef struct {
- /** Currently executing coroutine */
- Coroutine *current;
-
- /** The default coroutine */
- CoroutineUContext leader;
-} CoroutineThreadState;
-
-static pthread_key_t thread_state_key;
-
-/*
- * va_args to makecontext() must be type 'int', so passing
- * the pointer we need may require several int args. This
- * union is a quick hack to let us do that
- */
-union cc_arg {
- void *p;
- int i[2];
-};
-
-static CoroutineThreadState *coroutine_get_thread_state(void)
-{
- CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
- if (!s) {
- s = g_malloc0(sizeof(*s));
- s->current = &s->leader.base;
- pthread_setspecific(thread_state_key, s);
- }
- return s;
-}
-
-static void qemu_coroutine_thread_cleanup(void *opaque)
-{
- CoroutineThreadState *s = opaque;
-
- g_free(s);
-}
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
- int ret;
-
- ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
- if (ret != 0) {
- fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
- abort();
- }
-}
-
-static void coroutine_trampoline(int i0, int i1)
-{
- union cc_arg arg;
- CoroutineUContext *self;
- Coroutine *co;
-
- arg.i[0] = i0;
- arg.i[1] = i1;
- self = arg.p;
- co = &self->base;
-
- /* Initialize longjmp environment and switch back the caller */
- if (!sigsetjmp(self->env, 0)) {
- siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
- }
-
- while (true) {
- co->entry(co->entry_arg);
- qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
- }
-}
-
-Coroutine *qemu_coroutine_new(void)
-{
- const size_t stack_size = 1 << 20;
- CoroutineUContext *co;
- ucontext_t old_uc, uc;
- sigjmp_buf old_env;
- union cc_arg arg = {0};
-
- /* The ucontext functions preserve signal masks which incurs a
- * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
- * preserve signal masks but only works on the current stack.
- * Since we need a way to create and switch to a new stack, use
- * the ucontext functions for that but sigsetjmp()/siglongjmp() for
- * everything else.
- */
-
- if (getcontext(&uc) == -1) {
- abort();
- }
-
- co = g_malloc0(sizeof(*co));
- co->stack = g_malloc(stack_size);
- co->base.entry_arg = &old_env; /* stash away our jmp_buf */
-
- uc.uc_link = &old_uc;
- uc.uc_stack.ss_sp = co->stack;
- uc.uc_stack.ss_size = stack_size;
- uc.uc_stack.ss_flags = 0;
-
-#ifdef CONFIG_VALGRIND_H
- co->valgrind_stack_id =
- VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
-#endif
-
- arg.p = co;
-
- makecontext(&uc, (void (*)(void))coroutine_trampoline,
- 2, arg.i[0], arg.i[1]);
-
- /* swapcontext() in, siglongjmp() back out */
- if (!sigsetjmp(old_env, 0)) {
- swapcontext(&old_uc, &uc);
- }
- return &co->base;
-}
-
-#ifdef CONFIG_VALGRIND_H
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-/* Work around an unused variable in the valgrind.h macro... */
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-static inline void valgrind_stack_deregister(CoroutineUContext *co)
-{
- VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
-}
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-#pragma GCC diagnostic pop
-#endif
-#endif
-
-void qemu_coroutine_delete(Coroutine *co_)
-{
- CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
-
-#ifdef CONFIG_VALGRIND_H
- valgrind_stack_deregister(co);
-#endif
-
- g_free(co->stack);
- g_free(co);
-}
-
-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
- CoroutineAction action)
-{
- CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
- CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
- CoroutineThreadState *s = coroutine_get_thread_state();
- int ret;
-
- s->current = to_;
-
- ret = sigsetjmp(from->env, 0);
- if (ret == 0) {
- siglongjmp(to->env, action);
- }
- return ret;
-}
-
-Coroutine *qemu_coroutine_self(void)
-{
- CoroutineThreadState *s = coroutine_get_thread_state();
-
- return s->current;
-}
-
-bool qemu_in_coroutine(void)
-{
- CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
- return s && s->current->caller;
-}
diff --git a/contrib/qemu/include/block/aio.h b/contrib/qemu/include/block/aio.h
deleted file mode 100644
index 183679374fa..00000000000
--- a/contrib/qemu/include/block/aio.h
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * QEMU aio implementation
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_AIO_H
-#define QEMU_AIO_H
-
-#include "qemu-common.h"
-#include "qemu/queue.h"
-#include "qemu/event_notifier.h"
-
-typedef struct BlockDriverAIOCB BlockDriverAIOCB;
-typedef void BlockDriverCompletionFunc(void *opaque, int ret);
-
-typedef struct AIOCBInfo {
- void (*cancel)(BlockDriverAIOCB *acb);
- size_t aiocb_size;
-} AIOCBInfo;
-
-struct BlockDriverAIOCB {
- const AIOCBInfo *aiocb_info;
- BlockDriverState *bs;
- BlockDriverCompletionFunc *cb;
- void *opaque;
-};
-
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
-void qemu_aio_release(void *p);
-
-typedef struct AioHandler AioHandler;
-typedef void QEMUBHFunc(void *opaque);
-typedef void IOHandler(void *opaque);
-
-typedef struct AioContext {
- GSource source;
-
- /* The list of registered AIO handlers */
- QLIST_HEAD(, AioHandler) aio_handlers;
-
- /* This is a simple lock used to protect the aio_handlers list.
- * Specifically, it's used to ensure that no callbacks are removed while
- * we're walking and dispatching callbacks.
- */
- int walking_handlers;
-
- /* Anchor of the list of Bottom Halves belonging to the context */
- struct QEMUBH *first_bh;
-
- /* A simple lock used to protect the first_bh list, and ensure that
- * no callbacks are removed while we're walking and dispatching callbacks.
- */
- int walking_bh;
-
- /* Used for aio_notify. */
- EventNotifier notifier;
-
- /* GPollFDs for aio_poll() */
- GArray *pollfds;
-
- /* Thread pool for performing work and receiving completion callbacks */
- struct ThreadPool *thread_pool;
-} AioContext;
-
-/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
-typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
-
-/**
- * aio_context_new: Allocate a new AioContext.
- *
- * AioContext provide a mini event-loop that can be waited on synchronously.
- * They also provide bottom halves, a service to execute a piece of code
- * as soon as possible.
- */
-AioContext *aio_context_new(void);
-
-/**
- * aio_context_ref:
- * @ctx: The AioContext to operate on.
- *
- * Add a reference to an AioContext.
- */
-void aio_context_ref(AioContext *ctx);
-
-/**
- * aio_context_unref:
- * @ctx: The AioContext to operate on.
- *
- * Drop a reference to an AioContext.
- */
-void aio_context_unref(AioContext *ctx);
-
-/**
- * aio_bh_new: Allocate a new bottom half structure.
- *
- * Bottom halves are lightweight callbacks whose invocation is guaranteed
- * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
- * is opaque and must be allocated prior to its use.
- */
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
-
-/**
- * aio_notify: Force processing of pending events.
- *
- * Similar to signaling a condition variable, aio_notify forces
- * aio_wait to exit, so that the next call will re-examine pending events.
- * The caller of aio_notify will usually call aio_wait again very soon,
- * or go through another iteration of the GLib main loop. Hence, aio_notify
- * also has the side effect of recalculating the sets of file descriptors
- * that the main loop waits for.
- *
- * Calling aio_notify is rarely necessary, because for example scheduling
- * a bottom half calls it already.
- */
-void aio_notify(AioContext *ctx);
-
-/**
- * aio_bh_poll: Poll bottom halves for an AioContext.
- *
- * These are internal functions used by the QEMU main loop.
- */
-int aio_bh_poll(AioContext *ctx);
-
-/**
- * qemu_bh_schedule: Schedule a bottom half.
- *
- * Scheduling a bottom half interrupts the main loop and causes the
- * execution of the callback that was passed to qemu_bh_new.
- *
- * Bottom halves that are scheduled from a bottom half handler are instantly
- * invoked. This can create an infinite loop if a bottom half handler
- * schedules itself.
- *
- * @bh: The bottom half to be scheduled.
- */
-void qemu_bh_schedule(QEMUBH *bh);
-
-/**
- * qemu_bh_cancel: Cancel execution of a bottom half.
- *
- * Canceling execution of a bottom half undoes the effect of calls to
- * qemu_bh_schedule without freeing its resources yet. While cancellation
- * itself is also wait-free and thread-safe, it can of course race with the
- * loop that executes bottom halves unless you are holding the iothread
- * mutex. This makes it mostly useless if you are not holding the mutex.
- *
- * @bh: The bottom half to be canceled.
- */
-void qemu_bh_cancel(QEMUBH *bh);
-
-/**
- *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
- *
- * Deleting a bottom half frees the memory that was allocated for it by
- * qemu_bh_new. It also implies canceling the bottom half if it was
- * scheduled.
- *
- * @bh: The bottom half to be deleted.
- */
-void qemu_bh_delete(QEMUBH *bh);
-
-/* Return whether there are any pending callbacks from the GSource
- * attached to the AioContext.
- *
- * This is used internally in the implementation of the GSource.
- */
-bool aio_pending(AioContext *ctx);
-
-/* Progress in completing AIO work to occur. This can issue new pending
- * aio as a result of executing I/O completion or bh callbacks.
- *
- * If there is no pending AIO operation or completion (bottom half),
- * return false. If there are pending AIO operations of bottom halves,
- * return true.
- *
- * If there are no pending bottom halves, but there are pending AIO
- * operations, it may not be possible to make any progress without
- * blocking. If @blocking is true, this function will wait until one
- * or more AIO events have completed, to ensure something has moved
- * before returning.
- */
-bool aio_poll(AioContext *ctx, bool blocking);
-
-#ifdef CONFIG_POSIX
-/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
-typedef int (AioFlushHandler)(void *opaque);
-
-/* Register a file descriptor and associated callbacks. Behaves very similarly
- * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
- * be invoked when using qemu_aio_wait().
- *
- * Code that invokes AIO completion functions should rely on this function
- * instead of qemu_set_fd_handler[2].
- */
-void aio_set_fd_handler(AioContext *ctx,
- int fd,
- IOHandler *io_read,
- IOHandler *io_write,
- AioFlushHandler *io_flush,
- void *opaque);
-#endif
-
-/* Register an event notifier and associated callbacks. Behaves very similarly
- * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
- * will be invoked when using qemu_aio_wait().
- *
- * Code that invokes AIO completion functions should rely on this function
- * instead of event_notifier_set_handler.
- */
-void aio_set_event_notifier(AioContext *ctx,
- EventNotifier *notifier,
- EventNotifierHandler *io_read,
- AioFlushEventNotifierHandler *io_flush);
-
-/* Return a GSource that lets the main loop poll the file descriptors attached
- * to this AioContext.
- */
-GSource *aio_get_g_source(AioContext *ctx);
-
-/* Return the ThreadPool bound to this AioContext */
-struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
-
-/* Functions to operate on the main QEMU AioContext. */
-
-bool qemu_aio_wait(void);
-void qemu_aio_set_event_notifier(EventNotifier *notifier,
- EventNotifierHandler *io_read,
- AioFlushEventNotifierHandler *io_flush);
-
-#ifdef CONFIG_POSIX
-void qemu_aio_set_fd_handler(int fd,
- IOHandler *io_read,
- IOHandler *io_write,
- AioFlushHandler *io_flush,
- void *opaque);
-#endif
-
-#endif
diff --git a/contrib/qemu/include/block/block.h b/contrib/qemu/include/block/block.h
deleted file mode 100644
index b6b9014a9ce..00000000000
--- a/contrib/qemu/include/block/block.h
+++ /dev/null
@@ -1,443 +0,0 @@
-#ifndef BLOCK_H
-#define BLOCK_H
-
-#include "block/aio.h"
-#include "qemu-common.h"
-#include "qemu/option.h"
-#include "block/coroutine.h"
-#include "qapi/qmp/qobject.h"
-#include "qapi-types.h"
-
-/* block.c */
-typedef struct BlockDriver BlockDriver;
-typedef struct BlockJob BlockJob;
-
-typedef struct BlockDriverInfo {
- /* in bytes, 0 if irrelevant */
- int cluster_size;
- /* offset at which the VM state can be saved (0 if not possible) */
- int64_t vm_state_offset;
- bool is_dirty;
-} BlockDriverInfo;
-
-typedef struct BlockFragInfo {
- uint64_t allocated_clusters;
- uint64_t total_clusters;
- uint64_t fragmented_clusters;
- uint64_t compressed_clusters;
-} BlockFragInfo;
-
-/* Callbacks for block device models */
-typedef struct BlockDevOps {
- /*
- * Runs when virtual media changed (monitor commands eject, change)
- * Argument load is true on load and false on eject.
- * Beware: doesn't run when a host device's physical media
- * changes. Sure would be useful if it did.
- * Device models with removable media must implement this callback.
- */
- void (*change_media_cb)(void *opaque, bool load);
- /*
- * Runs when an eject request is issued from the monitor, the tray
- * is closed, and the medium is locked.
- * Device models that do not implement is_medium_locked will not need
- * this callback. Device models that can lock the medium or tray might
- * want to implement the callback and unlock the tray when "force" is
- * true, even if they do not support eject requests.
- */
- void (*eject_request_cb)(void *opaque, bool force);
- /*
- * Is the virtual tray open?
- * Device models implement this only when the device has a tray.
- */
- bool (*is_tray_open)(void *opaque);
- /*
- * Is the virtual medium locked into the device?
- * Device models implement this only when device has such a lock.
- */
- bool (*is_medium_locked)(void *opaque);
- /*
- * Runs when the size changed (e.g. monitor command block_resize)
- */
- void (*resize_cb)(void *opaque);
-} BlockDevOps;
-
-#define BDRV_O_RDWR 0x0002
-#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
-#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
-#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
-#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
-#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
-#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
-#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
-#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
-#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
-#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
-#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
-
-#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
-
-#define BDRV_SECTOR_BITS 9
-#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
-#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
-
-typedef enum {
- BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
-} BlockErrorAction;
-
-typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
-
-typedef struct BDRVReopenState {
- BlockDriverState *bs;
- int flags;
- void *opaque;
-} BDRVReopenState;
-
-
-void bdrv_iostatus_enable(BlockDriverState *bs);
-void bdrv_iostatus_reset(BlockDriverState *bs);
-void bdrv_iostatus_disable(BlockDriverState *bs);
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
-void bdrv_info_print(Monitor *mon, const QObject *data);
-void bdrv_info(Monitor *mon, QObject **ret_data);
-void bdrv_stats_print(Monitor *mon, const QObject *data);
-void bdrv_info_stats(Monitor *mon, QObject **ret_data);
-
-/* disk I/O throttling */
-void bdrv_io_limits_enable(BlockDriverState *bs);
-void bdrv_io_limits_disable(BlockDriverState *bs);
-bool bdrv_io_limits_enabled(BlockDriverState *bs);
-
-void bdrv_init(void);
-void bdrv_init_with_whitelist(void);
-BlockDriver *bdrv_find_protocol(const char *filename,
- bool allow_protocol_prefix);
-BlockDriver *bdrv_find_format(const char *format_name);
-BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
- bool readonly);
-int bdrv_create(BlockDriver *drv, const char* filename,
- QEMUOptionParameter *options);
-int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
-BlockDriverState *bdrv_new(const char *device_name);
-void bdrv_make_anon(BlockDriverState *bs);
-void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
-void bdrv_delete(BlockDriverState *bs);
-int bdrv_parse_cache_flags(const char *mode, int *flags);
-int bdrv_parse_discard_flags(const char *mode, int *flags);
-int bdrv_file_open(BlockDriverState **pbs, const char *filename,
- QDict *options, int flags);
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *options);
-int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
- int flags, BlockDriver *drv);
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, int flags);
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
-int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
-void bdrv_reopen_commit(BDRVReopenState *reopen_state);
-void bdrv_reopen_abort(BDRVReopenState *reopen_state);
-void bdrv_close(BlockDriverState *bs);
-void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
-int bdrv_attach_dev(BlockDriverState *bs, void *dev);
-void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
-void bdrv_detach_dev(BlockDriverState *bs, void *dev);
-void *bdrv_get_attached_dev(BlockDriverState *bs);
-void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
- void *opaque);
-void bdrv_dev_eject_request(BlockDriverState *bs, bool force);
-bool bdrv_dev_has_removable_media(BlockDriverState *bs);
-bool bdrv_dev_is_tray_open(BlockDriverState *bs);
-bool bdrv_dev_is_medium_locked(BlockDriverState *bs);
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
-int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
-int bdrv_pread(BlockDriverState *bs, int64_t offset,
- void *buf, int count);
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int count);
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
- const void *buf, int count);
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
-/*
- * Efficiently zero a region of the disk image. Note that this is a regular
- * I/O request like read or write and should have a reasonable size. This
- * function is not suitable for zeroing the entire image in a single request
- * because it may allocate memory for the entire region.
- */
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors);
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum);
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- int64_t sector_num,
- int nb_sectors, int *pnum);
-BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
- const char *backing_file);
-int bdrv_get_backing_file_depth(BlockDriverState *bs);
-int bdrv_truncate(BlockDriverState *bs, int64_t offset);
-int64_t bdrv_getlength(BlockDriverState *bs);
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-int bdrv_commit(BlockDriverState *bs);
-int bdrv_commit_all(void);
-int bdrv_change_backing_file(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt);
-void bdrv_register(BlockDriver *bdrv);
-int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
- BlockDriverState *base);
-BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
- BlockDriverState *bs);
-BlockDriverState *bdrv_find_base(BlockDriverState *bs);
-
-
-typedef struct BdrvCheckResult {
- int corruptions;
- int leaks;
- int check_errors;
- int corruptions_fixed;
- int leaks_fixed;
- int64_t image_end_offset;
- BlockFragInfo bfi;
-} BdrvCheckResult;
-
-typedef enum {
- BDRV_FIX_LEAKS = 1,
- BDRV_FIX_ERRORS = 2,
-} BdrvCheckMode;
-
-int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
-
-/* async block I/O */
-typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
- int sector_num);
-BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
-BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-void bdrv_aio_cancel(BlockDriverAIOCB *acb);
-
-typedef struct BlockRequest {
- /* Fields to be filled by multiwrite caller */
- int64_t sector;
- int nb_sectors;
- QEMUIOVector *qiov;
- BlockDriverCompletionFunc *cb;
- void *opaque;
-
- /* Filled by multiwrite implementation */
- int error;
-} BlockRequest;
-
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs);
-
-/* sg packet commands */
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
-BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockDriverCompletionFunc *cb, void *opaque);
-
-/* Invalidate any cached metadata used by image formats */
-void bdrv_invalidate_cache(BlockDriverState *bs);
-void bdrv_invalidate_cache_all(void);
-
-void bdrv_clear_incoming_migration_all(void);
-
-/* Ensure contents are flushed to disk. */
-int bdrv_flush(BlockDriverState *bs);
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
-int bdrv_flush_all(void);
-void bdrv_close_all(void);
-void bdrv_drain_all(void);
-
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
-int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
-int bdrv_has_zero_init_1(BlockDriverState *bs);
-int bdrv_has_zero_init(BlockDriverState *bs);
-int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum);
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- int64_t sector_num, int nb_sectors, int *pnum);
-
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error);
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
- bool is_read, int error);
-int bdrv_is_read_only(BlockDriverState *bs);
-int bdrv_is_sg(BlockDriverState *bs);
-int bdrv_enable_write_cache(BlockDriverState *bs);
-void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
-int bdrv_is_inserted(BlockDriverState *bs);
-int bdrv_media_changed(BlockDriverState *bs);
-void bdrv_lock_medium(BlockDriverState *bs, bool locked);
-void bdrv_eject(BlockDriverState *bs, bool eject_flag);
-const char *bdrv_get_format_name(BlockDriverState *bs);
-BlockDriverState *bdrv_find(const char *name);
-BlockDriverState *bdrv_next(BlockDriverState *bs);
-void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
- void *opaque);
-int bdrv_is_encrypted(BlockDriverState *bs);
-int bdrv_key_required(BlockDriverState *bs);
-int bdrv_set_key(BlockDriverState *bs, const char *key);
-int bdrv_query_missing_keys(void);
-void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
- void *opaque);
-const char *bdrv_get_device_name(BlockDriverState *bs);
-int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors);
-
-const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
-void bdrv_get_backing_filename(BlockDriverState *bs,
- char *filename, int filename_size);
-void bdrv_get_full_backing_filename(BlockDriverState *bs,
- char *dest, size_t sz);
-int bdrv_is_snapshot(BlockDriverState *bs);
-
-int path_is_absolute(const char *path);
-void path_combine(char *dest, int dest_size,
- const char *base_path,
- const char *filename);
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size);
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
-
-void bdrv_img_create(const char *filename, const char *fmt,
- const char *base_filename, const char *base_fmt,
- char *options, uint64_t img_size, int flags,
- Error **errp, bool quiet);
-
-void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
-void *qemu_blockalign(BlockDriverState *bs, size_t size);
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-
-struct HBitmapIter;
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
-int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
-int64_t bdrv_get_dirty_count(BlockDriverState *bs);
-
-void bdrv_enable_copy_on_read(BlockDriverState *bs);
-void bdrv_disable_copy_on_read(BlockDriverState *bs);
-
-void bdrv_set_in_use(BlockDriverState *bs, int in_use);
-int bdrv_in_use(BlockDriverState *bs);
-
-#ifdef CONFIG_LINUX_AIO
-int raw_get_aio_fd(BlockDriverState *bs);
-#else
-static inline int raw_get_aio_fd(BlockDriverState *bs)
-{
- return -ENOTSUP;
-}
-#endif
-
-enum BlockAcctType {
- BDRV_ACCT_READ,
- BDRV_ACCT_WRITE,
- BDRV_ACCT_FLUSH,
- BDRV_MAX_IOTYPE,
-};
-
-typedef struct BlockAcctCookie {
- int64_t bytes;
- int64_t start_time_ns;
- enum BlockAcctType type;
-} BlockAcctCookie;
-
-void bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
- int64_t bytes, enum BlockAcctType type);
-void bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie);
-
-typedef enum {
- BLKDBG_L1_UPDATE,
-
- BLKDBG_L1_GROW_ALLOC_TABLE,
- BLKDBG_L1_GROW_WRITE_TABLE,
- BLKDBG_L1_GROW_ACTIVATE_TABLE,
-
- BLKDBG_L2_LOAD,
- BLKDBG_L2_UPDATE,
- BLKDBG_L2_UPDATE_COMPRESSED,
- BLKDBG_L2_ALLOC_COW_READ,
- BLKDBG_L2_ALLOC_WRITE,
-
- BLKDBG_READ_AIO,
- BLKDBG_READ_BACKING_AIO,
- BLKDBG_READ_COMPRESSED,
-
- BLKDBG_WRITE_AIO,
- BLKDBG_WRITE_COMPRESSED,
-
- BLKDBG_VMSTATE_LOAD,
- BLKDBG_VMSTATE_SAVE,
-
- BLKDBG_COW_READ,
- BLKDBG_COW_WRITE,
-
- BLKDBG_REFTABLE_LOAD,
- BLKDBG_REFTABLE_GROW,
-
- BLKDBG_REFBLOCK_LOAD,
- BLKDBG_REFBLOCK_UPDATE,
- BLKDBG_REFBLOCK_UPDATE_PART,
- BLKDBG_REFBLOCK_ALLOC,
- BLKDBG_REFBLOCK_ALLOC_HOOKUP,
- BLKDBG_REFBLOCK_ALLOC_WRITE,
- BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS,
- BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE,
- BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE,
-
- BLKDBG_CLUSTER_ALLOC,
- BLKDBG_CLUSTER_ALLOC_BYTES,
- BLKDBG_CLUSTER_FREE,
-
- BLKDBG_FLUSH_TO_OS,
- BLKDBG_FLUSH_TO_DISK,
-
- BLKDBG_EVENT_MAX,
-} BlkDebugEvent;
-
-#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
-void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
-
-int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
- const char *tag);
-int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
-bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-
-#endif
diff --git a/contrib/qemu/include/block/block_int.h b/contrib/qemu/include/block/block_int.h
deleted file mode 100644
index c6ac871e210..00000000000
--- a/contrib/qemu/include/block/block_int.h
+++ /dev/null
@@ -1,421 +0,0 @@
-/*
- * QEMU System Emulator block driver
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef BLOCK_INT_H
-#define BLOCK_INT_H
-
-#include "block/block.h"
-#include "qemu/option.h"
-#include "qemu/queue.h"
-#include "block/coroutine.h"
-#include "qemu/timer.h"
-#include "qapi-types.h"
-#include "qapi/qmp/qerror.h"
-#include "monitor/monitor.h"
-#include "qemu/hbitmap.h"
-#include "block/snapshot.h"
-
-#define BLOCK_FLAG_ENCRYPT 1
-#define BLOCK_FLAG_COMPAT6 4
-#define BLOCK_FLAG_LAZY_REFCOUNTS 8
-
-#define BLOCK_IO_LIMIT_READ 0
-#define BLOCK_IO_LIMIT_WRITE 1
-#define BLOCK_IO_LIMIT_TOTAL 2
-
-#define BLOCK_IO_SLICE_TIME 100000000
-#define NANOSECONDS_PER_SECOND 1000000000.0
-
-#define BLOCK_OPT_SIZE "size"
-#define BLOCK_OPT_ENCRYPT "encryption"
-#define BLOCK_OPT_COMPAT6 "compat6"
-#define BLOCK_OPT_BACKING_FILE "backing_file"
-#define BLOCK_OPT_BACKING_FMT "backing_fmt"
-#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
-#define BLOCK_OPT_TABLE_SIZE "table_size"
-#define BLOCK_OPT_PREALLOC "preallocation"
-#define BLOCK_OPT_SUBFMT "subformat"
-#define BLOCK_OPT_COMPAT_LEVEL "compat"
-#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
-
-typedef struct BdrvTrackedRequest {
- BlockDriverState *bs;
- int64_t sector_num;
- int nb_sectors;
- bool is_write;
- QLIST_ENTRY(BdrvTrackedRequest) list;
- Coroutine *co; /* owner, used for deadlock detection */
- CoQueue wait_queue; /* coroutines blocked on this request */
-} BdrvTrackedRequest;
-
-
-typedef struct BlockIOLimit {
- int64_t bps[3];
- int64_t iops[3];
-} BlockIOLimit;
-
-typedef struct BlockIOBaseValue {
- uint64_t bytes[2];
- uint64_t ios[2];
-} BlockIOBaseValue;
-
-struct BlockDriver {
- const char *format_name;
- int instance_size;
- int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
- int (*bdrv_probe_device)(const char *filename);
-
- /* Any driver implementing this callback is expected to be able to handle
- * NULL file names in its .bdrv_open() implementation */
- void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
-
- /* For handling image reopen for split or non-split files */
- int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
- void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
-
- int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags);
- int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags);
- int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
- int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
- void (*bdrv_close)(BlockDriverState *bs);
- void (*bdrv_rebind)(BlockDriverState *bs);
- int (*bdrv_create)(const char *filename, QEMUOptionParameter *options);
- int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
- int (*bdrv_make_empty)(BlockDriverState *bs);
- /* aio */
- BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
- BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
- BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque);
- BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-
- int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
- int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
- /*
- * Efficiently zero a region of the disk image. Typically an image format
- * would use a compact metadata representation to implement this. This
- * function pointer may be NULL and .bdrv_co_writev() will be called
- * instead.
- */
- int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
- int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
- int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum);
-
- /*
- * Invalidate any cached meta-data.
- */
- void (*bdrv_invalidate_cache)(BlockDriverState *bs);
-
- /*
- * Flushes all data that was already written to the OS all the way down to
- * the disk (for example raw-posix calls fsync()).
- */
- int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
-
- /*
- * Flushes all internal caches to the OS. The data may still sit in a
- * writeback cache of the host OS, but it will survive a crash of the qemu
- * process.
- */
- int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
-
- const char *protocol_name;
- int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
- int64_t (*bdrv_getlength)(BlockDriverState *bs);
- int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
- int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
-
- int (*bdrv_snapshot_create)(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
- int (*bdrv_snapshot_goto)(BlockDriverState *bs,
- const char *snapshot_id);
- int (*bdrv_snapshot_delete)(BlockDriverState *bs, const char *snapshot_id);
- int (*bdrv_snapshot_list)(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info);
- int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
- const char *snapshot_name);
- int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
-
- int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t pos);
- int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
-
- int (*bdrv_change_backing_file)(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt);
-
- /* removable device specific */
- int (*bdrv_is_inserted)(BlockDriverState *bs);
- int (*bdrv_media_changed)(BlockDriverState *bs);
- void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
- void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
-
- /* to control generic scsi devices */
- int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
- BlockDriverAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockDriverCompletionFunc *cb, void *opaque);
-
- /* List of options for creating images, terminated by name == NULL */
- QEMUOptionParameter *create_options;
-
-
- /*
- * Returns 0 for completed check, -errno for internal errors.
- * The check results are stored in result.
- */
- int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
- BdrvCheckMode fix);
-
- void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
-
- /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
- int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
- const char *tag);
- int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
- bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
-
- /*
- * Returns 1 if newly created images are guaranteed to contain only
- * zeros, 0 otherwise.
- */
- int (*bdrv_has_zero_init)(BlockDriverState *bs);
-
- QLIST_ENTRY(BlockDriver) list;
-};
-
-/*
- * Note: the function bdrv_append() copies and swaps contents of
- * BlockDriverStates, so if you add new fields to this struct, please
- * inspect bdrv_append() to determine if the new fields need to be
- * copied as well.
- */
-struct BlockDriverState {
- int64_t total_sectors; /* if we are reading a disk image, give its
- size in sectors */
- int read_only; /* if true, the media is read only */
- int open_flags; /* flags used to open the file, re-used for re-open */
- int encrypted; /* if true, the media is encrypted */
- int valid_key; /* if true, a valid encryption key has been set */
- int sg; /* if true, the device is a /dev/sg* */
- int copy_on_read; /* if true, copy read backing sectors into image
- note this is a reference count */
-
- BlockDriver *drv; /* NULL means no media */
- void *opaque;
-
- void *dev; /* attached device model, if any */
- /* TODO change to DeviceState when all users are qdevified */
- const BlockDevOps *dev_ops;
- void *dev_opaque;
-
- char filename[1024];
- char backing_file[1024]; /* if non zero, the image is a diff of
- this file image */
- char backing_format[16]; /* if non-zero and backing_file exists */
- int is_temporary;
-
- BlockDriverState *backing_hd;
- BlockDriverState *file;
-
- NotifierList close_notifiers;
-
- /* Callback before write request is processed */
- NotifierWithReturnList before_write_notifiers;
-
- /* number of in-flight copy-on-read requests */
- unsigned int copy_on_read_in_flight;
-
- /* the time for latest disk I/O */
- int64_t slice_start;
- int64_t slice_end;
- BlockIOLimit io_limits;
- BlockIOBaseValue slice_submitted;
- CoQueue throttled_reqs;
- QEMUTimer *block_timer;
- bool io_limits_enabled;
-
- /* I/O stats (display with "info blockstats"). */
- uint64_t nr_bytes[BDRV_MAX_IOTYPE];
- uint64_t nr_ops[BDRV_MAX_IOTYPE];
- uint64_t total_time_ns[BDRV_MAX_IOTYPE];
- uint64_t wr_highest_sector;
-
- /* Whether the disk can expand beyond total_sectors */
- int growable;
-
- /* the memory alignment required for the buffers handled by this driver */
- int buffer_alignment;
-
- /* do we need to tell the quest if we have a volatile write cache? */
- int enable_write_cache;
-
- /* NOTE: the following infos are only hints for real hardware
- drivers. They are not used by the block driver */
- BlockdevOnError on_read_error, on_write_error;
- bool iostatus_enabled;
- BlockDeviceIoStatus iostatus;
- char device_name[32];
- HBitmap *dirty_bitmap;
- int in_use; /* users other than guest access, eg. block migration */
- QTAILQ_ENTRY(BlockDriverState) list;
-
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
-
- /* long-running background operation */
- BlockJob *job;
-
- QDict *options;
-};
-
-int get_tmp_filename(char *filename, int size);
-
-void bdrv_set_io_limits(BlockDriverState *bs,
- BlockIOLimit *io_limits);
-
-/**
- * bdrv_add_before_write_notifier:
- *
- * Register a callback that is invoked before write requests are processed but
- * after any throttling or waiting for overlapping requests.
- */
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
- NotifierWithReturn *notifier);
-
-/**
- * bdrv_get_aio_context:
- *
- * Returns: the currently bound #AioContext
- */
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
-
-#ifdef _WIN32
-int is_windows_drive(const char *filename);
-#endif
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
- enum MonitorEvent ev,
- BlockErrorAction action, bool is_read);
-
-/**
- * stream_start:
- * @bs: Block device to operate on.
- * @base: Block device that will become the new base, or %NULL to
- * flatten the whole backing file chain onto @bs.
- * @base_id: The file name that will be written to @bs as the new
- * backing file if the job completes. Ignored if @base is %NULL.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Start a streaming operation on @bs. Clusters that are unallocated
- * in @bs, but allocated in any image between @base and @bs (both
- * exclusive) will be written to @bs. At the end of a successful
- * streaming job, the backing file of @bs will be changed to
- * @base_id in the written image and to @base in the live BlockDriverState.
- */
-void stream_start(BlockDriverState *bs, BlockDriverState *base,
- const char *base_id, int64_t speed, BlockdevOnError on_error,
- BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/**
- * commit_start:
- * @bs: Top Block device
- * @base: Block device that will be written into, and become the new top
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- */
-void commit_start(BlockDriverState *bs, BlockDriverState *base,
- BlockDriverState *top, int64_t speed,
- BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/*
- * mirror_start:
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @granularity: The chosen granularity for the dirty bitmap.
- * @buf_size: The amount of data that can be in flight at one time.
- * @mode: Whether to collapse all images in the chain to the target.
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Start a mirroring operation on @bs. Clusters that are allocated
- * in @bs will be written to @bs until the job is cancelled or
- * manually completed. At the end of a successful mirroring job,
- * @bs will be switched to read from @target.
- */
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, int64_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/*
- * backup_start:
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- *
- * Start a backup operation on @bs. Clusters in @bs are written to @target
- * until the job is cancelled or manually completed.
- */
-void backup_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- BlockDriverCompletionFunc *cb, void *opaque,
- Error **errp);
-
-#endif /* BLOCK_INT_H */
diff --git a/contrib/qemu/include/block/blockjob.h b/contrib/qemu/include/block/blockjob.h
deleted file mode 100644
index c290d07bba0..00000000000
--- a/contrib/qemu/include/block/blockjob.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Declarations for long-running block device operations
- *
- * Copyright (c) 2011 IBM Corp.
- * Copyright (c) 2012 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef BLOCKJOB_H
-#define BLOCKJOB_H 1
-
-#include "block/block.h"
-
-/**
- * BlockJobType:
- *
- * A class type for block job objects.
- */
-typedef struct BlockJobType {
- /** Derived BlockJob struct size */
- size_t instance_size;
-
- /** String describing the operation, part of query-block-jobs QMP API */
- const char *job_type;
-
- /** Optional callback for job types that support setting a speed limit */
- void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
-
- /** Optional callback for job types that need to forward I/O status reset */
- void (*iostatus_reset)(BlockJob *job);
-
- /**
- * Optional callback for job types whose completion must be triggered
- * manually.
- */
- void (*complete)(BlockJob *job, Error **errp);
-} BlockJobType;
-
-/**
- * BlockJob:
- *
- * Long-running operation on a BlockDriverState.
- */
-struct BlockJob {
- /** The job type, including the job vtable. */
- const BlockJobType *job_type;
-
- /** The block device on which the job is operating. */
- BlockDriverState *bs;
-
- /**
- * The coroutine that executes the job. If not NULL, it is
- * reentered when busy is false and the job is cancelled.
- */
- Coroutine *co;
-
- /**
- * Set to true if the job should cancel itself. The flag must
- * always be tested just before toggling the busy flag from false
- * to true. After a job has been cancelled, it should only yield
- * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
- */
- bool cancelled;
-
- /**
- * Set to true if the job is either paused, or will pause itself
- * as soon as possible (if busy == true).
- */
- bool paused;
-
- /**
- * Set to false by the job while it is in a quiescent state, where
- * no I/O is pending and the job has yielded on any condition
- * that is not detected by #qemu_aio_wait, such as a timer.
- */
- bool busy;
-
- /** Status that is published by the query-block-jobs QMP API */
- BlockDeviceIoStatus iostatus;
-
- /** Offset that is published by the query-block-jobs QMP API */
- int64_t offset;
-
- /** Length that is published by the query-block-jobs QMP API */
- int64_t len;
-
- /** Speed that was set with @block_job_set_speed. */
- int64_t speed;
-
- /** The completion function that will be called when the job completes. */
- BlockDriverCompletionFunc *cb;
-
- /** The opaque value that is passed to the completion function. */
- void *opaque;
-};
-
-/**
- * block_job_create:
- * @job_type: The class object for the newly-created job.
- * @bs: The block
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Create a new long-running block device job and return it. The job
- * will call @cb asynchronously when the job completes. Note that
- * @bs may have been closed at the time the @cb it is called. If
- * this is the case, the job may be reported as either cancelled or
- * completed.
- *
- * This function is not part of the public job interface; it should be
- * called from a wrapper that is specific to the job type.
- */
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
- int64_t speed, BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp);
-
-/**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @clock: The clock to sleep on.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds. Canceling the job will interrupt the wait immediately.
- */
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
-
-/**
- * block_job_completed:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_completed(BlockJob *job, int ret);
-
-/**
- * block_job_set_speed:
- * @job: The job to set the speed for.
- * @speed: The new value
- * @errp: Error object.
- *
- * Set a rate-limiting parameter for the job; the actual meaning may
- * vary depending on the job type.
- */
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the specified job.
- */
-void block_job_cancel(BlockJob *job);
-
-/**
- * block_job_complete:
- * @job: The job to be completed.
- * @errp: Error object.
- *
- * Asynchronously complete the specified job.
- */
-void block_job_complete(BlockJob *job, Error **errp);
-
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
-/**
- * block_job_query:
- * @job: The job to get information about.
- *
- * Return information about a job.
- */
-BlockJobInfo *block_job_query(BlockJob *job);
-
-/**
- * block_job_pause:
- * @job: The job to be paused.
- *
- * Asynchronously pause the specified job.
- */
-void block_job_pause(BlockJob *job);
-
-/**
- * block_job_resume:
- * @job: The job to be resumed.
- *
- * Resume the specified job.
- */
-void block_job_resume(BlockJob *job);
-
-/**
- * qobject_from_block_job:
- * @job: The job whose information is requested.
- *
- * Return a QDict corresponding to @job's query-block-jobs entry.
- */
-QObject *qobject_from_block_job(BlockJob *job);
-
-/**
- * block_job_ready:
- * @job: The job which is now ready to complete.
- *
- * Send a BLOCK_JOB_READY event for the specified job.
- */
-void block_job_ready(BlockJob *job);
-
-/**
- * block_job_is_paused:
- * @job: The job being queried.
- *
- * Returns whether the job is currently paused, or will pause
- * as soon as it reaches a sleeping point.
- */
-bool block_job_is_paused(BlockJob *job);
-
-/**
- * block_job_cancel_sync:
- * @job: The job to be canceled.
- *
- * Synchronously cancel the job. The completion callback is called
- * before the function returns. The job may actually complete
- * instead of canceling itself; the circumstances under which this
- * happens depend on the kind of job that is active.
- *
- * Returns the return value from the job if the job actually completed
- * during the call, or -ECANCELED if it was canceled.
- */
-int block_job_cancel_sync(BlockJob *job);
-
-/**
- * block_job_iostatus_reset:
- * @job: The job whose I/O status should be reset.
- *
- * Reset I/O status on @job and on BlockDriverState objects it uses,
- * other than job->bs.
- */
-void block_job_iostatus_reset(BlockJob *job);
-
-/**
- * block_job_error_action:
- * @job: The job to signal an error for.
- * @bs: The block device on which to set an I/O error.
- * @on_err: The error action setting.
- * @is_read: Whether the operation was a read.
- * @error: The error that was reported.
- *
- * Report an I/O error for a block job and possibly stop the VM. Return the
- * action that was selected based on @on_err and @error.
- */
-BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
- BlockdevOnError on_err,
- int is_read, int error);
-#endif
diff --git a/contrib/qemu/include/block/coroutine.h b/contrib/qemu/include/block/coroutine.h
deleted file mode 100644
index 377805a3b08..00000000000
--- a/contrib/qemu/include/block/coroutine.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * QEMU coroutine implementation
- *
- * Copyright IBM, Corp. 2011
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Kevin Wolf <kwolf@redhat.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef QEMU_COROUTINE_H
-#define QEMU_COROUTINE_H
-
-#include <stdbool.h>
-#include "qemu/queue.h"
-#include "qemu/timer.h"
-
-/**
- * Coroutines are a mechanism for stack switching and can be used for
- * cooperative userspace threading. These functions provide a simple but
- * useful flavor of coroutines that is suitable for writing sequential code,
- * rather than callbacks, for operations that need to give up control while
- * waiting for events to complete.
- *
- * These functions are re-entrant and may be used outside the global mutex.
- */
-
-/**
- * Mark a function that executes in coroutine context
- *
- * Functions that execute in coroutine context cannot be called directly from
- * normal functions. In the future it would be nice to enable compiler or
- * static checker support for catching such errors. This annotation might make
- * it possible and in the meantime it serves as documentation.
- *
- * For example:
- *
- * static void coroutine_fn foo(void) {
- * ....
- * }
- */
-#define coroutine_fn
-
-typedef struct Coroutine Coroutine;
-
-/**
- * Coroutine entry point
- *
- * When the coroutine is entered for the first time, opaque is passed in as an
- * argument.
- *
- * When this function returns, the coroutine is destroyed automatically and
- * execution continues in the caller who last entered the coroutine.
- */
-typedef void coroutine_fn CoroutineEntry(void *opaque);
-
-/**
- * Create a new coroutine
- *
- * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
- */
-Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
-
-/**
- * Transfer control to a coroutine
- *
- * The opaque argument is passed as the argument to the entry point when
- * entering the coroutine for the first time. It is subsequently ignored.
- */
-void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
-
-/**
- * Transfer control back to a coroutine's caller
- *
- * This function does not return until the coroutine is re-entered using
- * qemu_coroutine_enter().
- */
-void coroutine_fn qemu_coroutine_yield(void);
-
-/**
- * Get the currently executing coroutine
- */
-Coroutine *coroutine_fn qemu_coroutine_self(void);
-
-/**
- * Return whether or not currently inside a coroutine
- *
- * This can be used to write functions that work both when in coroutine context
- * and when not in coroutine context. Note that such functions cannot use the
- * coroutine_fn annotation since they work outside coroutine context.
- */
-bool qemu_in_coroutine(void);
-
-
-
-/**
- * CoQueues are a mechanism to queue coroutines in order to continue executing
- * them later. They provide the fundamental primitives on which coroutine locks
- * are built.
- */
-typedef struct CoQueue {
- QTAILQ_HEAD(, Coroutine) entries;
- AioContext *ctx;
-} CoQueue;
-
-/**
- * Initialise a CoQueue. This must be called before any other operation is used
- * on the CoQueue.
- */
-void qemu_co_queue_init(CoQueue *queue);
-
-/**
- * Adds the current coroutine to the CoQueue and transfers control to the
- * caller of the coroutine.
- */
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
-
-/**
- * Adds the current coroutine to the head of the CoQueue and transfers control to the
- * caller of the coroutine.
- */
-void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue);
-
-/**
- * Restarts the next coroutine in the CoQueue and removes it from the queue.
- *
- * Returns true if a coroutine was restarted, false if the queue is empty.
- */
-bool qemu_co_queue_next(CoQueue *queue);
-
-/**
- * Restarts all coroutines in the CoQueue and leaves the queue empty.
- */
-void qemu_co_queue_restart_all(CoQueue *queue);
-
-/**
- * Checks if the CoQueue is empty.
- */
-bool qemu_co_queue_empty(CoQueue *queue);
-
-
-/**
- * Provides a mutex that can be used to synchronise coroutines
- */
-typedef struct CoMutex {
- bool locked;
- CoQueue queue;
-} CoMutex;
-
-/**
- * Initialises a CoMutex. This must be called before any other operation is used
- * on the CoMutex.
- */
-void qemu_co_mutex_init(CoMutex *mutex);
-
-/**
- * Locks the mutex. If the lock cannot be taken immediately, control is
- * transferred to the caller of the current coroutine.
- */
-void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
-
-/**
- * Unlocks the mutex and schedules the next coroutine that was waiting for this
- * lock to be run.
- */
-void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
-
-typedef struct CoRwlock {
- bool writer;
- int reader;
- CoQueue queue;
-} CoRwlock;
-
-/**
- * Initialises a CoRwlock. This must be called before any other operation
- * is used on the CoRwlock
- */
-void qemu_co_rwlock_init(CoRwlock *lock);
-
-/**
- * Read locks the CoRwlock. If the lock cannot be taken immediately because
- * of a parallel writer, control is transferred to the caller of the current
- * coroutine.
- */
-void qemu_co_rwlock_rdlock(CoRwlock *lock);
-
-/**
- * Write Locks the mutex. If the lock cannot be taken immediately because
- * of a parallel reader, control is transferred to the caller of the current
- * coroutine.
- */
-void qemu_co_rwlock_wrlock(CoRwlock *lock);
-
-/**
- * Unlocks the read/write lock and schedules the next coroutine that was
- * waiting for this lock to be run.
- */
-void qemu_co_rwlock_unlock(CoRwlock *lock);
-
-/**
- * Yield the coroutine for a given duration
- *
- * Note this function uses timers and hence only works when a main loop is in
- * use. See main-loop.h and do not use from qemu-tool programs.
- */
-void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns);
-
-/**
- * Yield until a file descriptor becomes readable
- *
- * Note that this function clobbers the handlers for the file descriptor.
- */
-void coroutine_fn yield_until_fd_readable(int fd);
-#endif /* QEMU_COROUTINE_H */
diff --git a/contrib/qemu/include/block/coroutine_int.h b/contrib/qemu/include/block/coroutine_int.h
deleted file mode 100644
index f133d65af86..00000000000
--- a/contrib/qemu/include/block/coroutine_int.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Coroutine internals
- *
- * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef QEMU_COROUTINE_INT_H
-#define QEMU_COROUTINE_INT_H
-
-#include "qemu/queue.h"
-#include "block/coroutine.h"
-
-typedef enum {
- COROUTINE_YIELD = 1,
- COROUTINE_TERMINATE = 2,
-} CoroutineAction;
-
-struct Coroutine {
- CoroutineEntry *entry;
- void *entry_arg;
- Coroutine *caller;
- QSLIST_ENTRY(Coroutine) pool_next;
-
- /* Coroutines that should be woken up when we yield or terminate */
- QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
- QTAILQ_ENTRY(Coroutine) co_queue_next;
-};
-
-Coroutine *qemu_coroutine_new(void);
-void qemu_coroutine_delete(Coroutine *co);
-CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
- CoroutineAction action);
-void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
-
-#endif
diff --git a/contrib/qemu/include/block/snapshot.h b/contrib/qemu/include/block/snapshot.h
deleted file mode 100644
index eaf61f0326e..00000000000
--- a/contrib/qemu/include/block/snapshot.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Block layer snapshot related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef SNAPSHOT_H
-#define SNAPSHOT_H
-
-#include "qemu-common.h"
-
-typedef struct QEMUSnapshotInfo {
- char id_str[128]; /* unique snapshot id */
- /* the following fields are informative. They are not needed for
- the consistency of the snapshot */
- char name[256]; /* user chosen name */
- uint64_t vm_state_size; /* VM state info size */
- uint32_t date_sec; /* UTC date of the snapshot */
- uint32_t date_nsec;
- uint64_t vm_clock_nsec; /* VM clock relative to boot */
-} QEMUSnapshotInfo;
-
-int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
- const char *name);
-int bdrv_can_snapshot(BlockDriverState *bs);
-int bdrv_snapshot_create(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
-int bdrv_snapshot_goto(BlockDriverState *bs,
- const char *snapshot_id);
-int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
-int bdrv_snapshot_list(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info);
-int bdrv_snapshot_load_tmp(BlockDriverState *bs,
- const char *snapshot_name);
-#endif
diff --git a/contrib/qemu/include/config.h b/contrib/qemu/include/config.h
deleted file mode 100644
index e20f78696a1..00000000000
--- a/contrib/qemu/include/config.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "config-host.h"
-#include "config-target.h"
diff --git a/contrib/qemu/include/exec/cpu-common.h b/contrib/qemu/include/exec/cpu-common.h
deleted file mode 100644
index e4996e19c32..00000000000
--- a/contrib/qemu/include/exec/cpu-common.h
+++ /dev/null
@@ -1,124 +0,0 @@
-#ifndef CPU_COMMON_H
-#define CPU_COMMON_H 1
-
-/* CPU interfaces that are target independent. */
-
-#ifndef CONFIG_USER_ONLY
-#include "exec/hwaddr.h"
-#endif
-
-#ifndef NEED_CPU_H
-#include "exec/poison.h"
-#endif
-
-#include "qemu/bswap.h"
-#include "qemu/queue.h"
-
-/**
- * CPUListState:
- * @cpu_fprintf: Print function.
- * @file: File to print to using @cpu_fprint.
- *
- * State commonly used for iterating over CPU models.
- */
-typedef struct CPUListState {
- fprintf_function cpu_fprintf;
- FILE *file;
-} CPUListState;
-
-#if !defined(CONFIG_USER_ONLY)
-
-enum device_endian {
- DEVICE_NATIVE_ENDIAN,
- DEVICE_BIG_ENDIAN,
- DEVICE_LITTLE_ENDIAN,
-};
-
-/* address in the RAM (different from a physical address) */
-#if defined(CONFIG_XEN_BACKEND)
-typedef uint64_t ram_addr_t;
-# define RAM_ADDR_MAX UINT64_MAX
-# define RAM_ADDR_FMT "%" PRIx64
-#else
-typedef uintptr_t ram_addr_t;
-# define RAM_ADDR_MAX UINTPTR_MAX
-# define RAM_ADDR_FMT "%" PRIxPTR
-#endif
-
-/* memory API */
-
-typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
-typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
-
-void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
-/* This should not be used by devices. */
-MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
-void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
-
-void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
- int len, int is_write);
-static inline void cpu_physical_memory_read(hwaddr addr,
- void *buf, int len)
-{
- cpu_physical_memory_rw(addr, buf, len, 0);
-}
-static inline void cpu_physical_memory_write(hwaddr addr,
- const void *buf, int len)
-{
- cpu_physical_memory_rw(addr, (void *)buf, len, 1);
-}
-void *cpu_physical_memory_map(hwaddr addr,
- hwaddr *plen,
- int is_write);
-void cpu_physical_memory_unmap(void *buffer, hwaddr len,
- int is_write, hwaddr access_len);
-void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
-
-bool cpu_physical_memory_is_io(hwaddr phys_addr);
-
-/* Coalesced MMIO regions are areas where write operations can be reordered.
- * This usually implies that write operations are side-effect free. This allows
- * batching which can make a major impact on performance when using
- * virtualization.
- */
-void qemu_flush_coalesced_mmio_buffer(void);
-
-uint32_t ldub_phys(hwaddr addr);
-uint32_t lduw_le_phys(hwaddr addr);
-uint32_t lduw_be_phys(hwaddr addr);
-uint32_t ldl_le_phys(hwaddr addr);
-uint32_t ldl_be_phys(hwaddr addr);
-uint64_t ldq_le_phys(hwaddr addr);
-uint64_t ldq_be_phys(hwaddr addr);
-void stb_phys(hwaddr addr, uint32_t val);
-void stw_le_phys(hwaddr addr, uint32_t val);
-void stw_be_phys(hwaddr addr, uint32_t val);
-void stl_le_phys(hwaddr addr, uint32_t val);
-void stl_be_phys(hwaddr addr, uint32_t val);
-void stq_le_phys(hwaddr addr, uint64_t val);
-void stq_be_phys(hwaddr addr, uint64_t val);
-
-#ifdef NEED_CPU_H
-uint32_t lduw_phys(hwaddr addr);
-uint32_t ldl_phys(hwaddr addr);
-uint64_t ldq_phys(hwaddr addr);
-void stl_phys_notdirty(hwaddr addr, uint32_t val);
-void stw_phys(hwaddr addr, uint32_t val);
-void stl_phys(hwaddr addr, uint32_t val);
-void stq_phys(hwaddr addr, uint64_t val);
-#endif
-
-void cpu_physical_memory_write_rom(hwaddr addr,
- const uint8_t *buf, int len);
-
-extern struct MemoryRegion io_mem_rom;
-extern struct MemoryRegion io_mem_notdirty;
-
-typedef void (RAMBlockIterFunc)(void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque);
-
-void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
-
-#endif
-
-#endif /* !CPU_COMMON_H */
diff --git a/contrib/qemu/include/exec/hwaddr.h b/contrib/qemu/include/exec/hwaddr.h
deleted file mode 100644
index c9eb78fba18..00000000000
--- a/contrib/qemu/include/exec/hwaddr.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Define hwaddr if it exists. */
-
-#ifndef HWADDR_H
-#define HWADDR_H
-
-#define HWADDR_BITS 64
-/* hwaddr is the type of a physical address (its size can
- be different from 'target_ulong'). */
-
-typedef uint64_t hwaddr;
-#define HWADDR_MAX UINT64_MAX
-#define TARGET_FMT_plx "%016" PRIx64
-#define HWADDR_PRId PRId64
-#define HWADDR_PRIi PRIi64
-#define HWADDR_PRIo PRIo64
-#define HWADDR_PRIu PRIu64
-#define HWADDR_PRIx PRIx64
-#define HWADDR_PRIX PRIX64
-
-#endif
diff --git a/contrib/qemu/include/exec/poison.h b/contrib/qemu/include/exec/poison.h
deleted file mode 100644
index 2341a750413..00000000000
--- a/contrib/qemu/include/exec/poison.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* Poison identifiers that should not be used when building
- target independent device code. */
-
-#ifndef HW_POISON_H
-#define HW_POISON_H
-#ifdef __GNUC__
-
-#pragma GCC poison TARGET_I386
-#pragma GCC poison TARGET_X86_64
-#pragma GCC poison TARGET_ALPHA
-#pragma GCC poison TARGET_ARM
-#pragma GCC poison TARGET_CRIS
-#pragma GCC poison TARGET_LM32
-#pragma GCC poison TARGET_M68K
-#pragma GCC poison TARGET_MIPS
-#pragma GCC poison TARGET_MIPS64
-#pragma GCC poison TARGET_OPENRISC
-#pragma GCC poison TARGET_PPC
-#pragma GCC poison TARGET_PPCEMB
-#pragma GCC poison TARGET_PPC64
-#pragma GCC poison TARGET_ABI32
-#pragma GCC poison TARGET_SH4
-#pragma GCC poison TARGET_SPARC
-#pragma GCC poison TARGET_SPARC64
-
-#pragma GCC poison TARGET_WORDS_BIGENDIAN
-#pragma GCC poison BSWAP_NEEDED
-
-#pragma GCC poison TARGET_LONG_BITS
-#pragma GCC poison TARGET_FMT_lx
-#pragma GCC poison TARGET_FMT_ld
-
-#pragma GCC poison TARGET_PAGE_SIZE
-#pragma GCC poison TARGET_PAGE_MASK
-#pragma GCC poison TARGET_PAGE_BITS
-#pragma GCC poison TARGET_PAGE_ALIGN
-
-#pragma GCC poison CPUArchState
-#pragma GCC poison env
-
-#pragma GCC poison lduw_phys
-#pragma GCC poison ldl_phys
-#pragma GCC poison ldq_phys
-#pragma GCC poison stl_phys_notdirty
-#pragma GCC poison stw_phys
-#pragma GCC poison stl_phys
-#pragma GCC poison stq_phys
-
-#pragma GCC poison CPU_INTERRUPT_HARD
-#pragma GCC poison CPU_INTERRUPT_EXITTB
-#pragma GCC poison CPU_INTERRUPT_HALT
-#pragma GCC poison CPU_INTERRUPT_DEBUG
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_0
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_1
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_2
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_3
-#pragma GCC poison CPU_INTERRUPT_TGT_EXT_4
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_0
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_1
-#pragma GCC poison CPU_INTERRUPT_TGT_INT_2
-
-#endif
-#endif
diff --git a/contrib/qemu/include/fpu/softfloat.h b/contrib/qemu/include/fpu/softfloat.h
deleted file mode 100644
index f3927e2419f..00000000000
--- a/contrib/qemu/include/fpu/softfloat.h
+++ /dev/null
@@ -1,641 +0,0 @@
-/*
- * QEMU float support
- *
- * Derived from SoftFloat.
- */
-
-/*============================================================================
-
-This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
-Package, Release 2b.
-
-Written by John R. Hauser. This work was made possible in part by the
-International Computer Science Institute, located at Suite 600, 1947 Center
-Street, Berkeley, California 94704. Funding was partially provided by the
-National Science Foundation under grant MIP-9311980. The original version
-of this code was written as part of a project to build a fixed-point vector
-processor in collaboration with the University of California at Berkeley,
-overseen by Profs. Nelson Morgan and John Wawrzynek. More information
-is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
-arithmetic/SoftFloat.html'.
-
-THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
-been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
-RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
-AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
-COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
-EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
-INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
-OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
-
-Derivative works are acceptable, even for commercial purposes, so long as
-(1) the source code for the derivative work includes prominent notice that
-the work is derivative, and (2) the source code includes prominent notice with
-these four paragraphs for those parts of this code that are retained.
-
-=============================================================================*/
-
-#ifndef SOFTFLOAT_H
-#define SOFTFLOAT_H
-
-#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
-#include <sunmath.h>
-#endif
-
-#include <inttypes.h>
-#include "config-host.h"
-#include "qemu/osdep.h"
-
-/*----------------------------------------------------------------------------
-| Each of the following `typedef's defines the most convenient type that holds
-| integers of at least as many bits as specified. For example, `uint8' should
-| be the most convenient type that can hold unsigned integers of as many as
-| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
-| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
-| to the same as `int'.
-*----------------------------------------------------------------------------*/
-typedef uint8_t flag;
-typedef uint8_t uint8;
-typedef int8_t int8;
-typedef unsigned int uint32;
-typedef signed int int32;
-typedef uint64_t uint64;
-typedef int64_t int64;
-
-#define LIT64( a ) a##LL
-#define INLINE static inline
-
-#define STATUS_PARAM , float_status *status
-#define STATUS(field) status->field
-#define STATUS_VAR , status
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point ordering relations
-*----------------------------------------------------------------------------*/
-enum {
- float_relation_less = -1,
- float_relation_equal = 0,
- float_relation_greater = 1,
- float_relation_unordered = 2
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point types.
-*----------------------------------------------------------------------------*/
-/* Use structures for soft-float types. This prevents accidentally mixing
- them with native int/float types. A sufficiently clever compiler and
- sane ABI should be able to see though these structs. However
- x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */
-//#define USE_SOFTFLOAT_STRUCT_TYPES
-#ifdef USE_SOFTFLOAT_STRUCT_TYPES
-typedef struct {
- uint16_t v;
-} float16;
-#define float16_val(x) (((float16)(x)).v)
-#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
-#define const_float16(x) { x }
-typedef struct {
- uint32_t v;
-} float32;
-/* The cast ensures an error if the wrong type is passed. */
-#define float32_val(x) (((float32)(x)).v)
-#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
-#define const_float32(x) { x }
-typedef struct {
- uint64_t v;
-} float64;
-#define float64_val(x) (((float64)(x)).v)
-#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
-#define const_float64(x) { x }
-#else
-typedef uint16_t float16;
-typedef uint32_t float32;
-typedef uint64_t float64;
-#define float16_val(x) (x)
-#define float32_val(x) (x)
-#define float64_val(x) (x)
-#define make_float16(x) (x)
-#define make_float32(x) (x)
-#define make_float64(x) (x)
-#define const_float16(x) (x)
-#define const_float32(x) (x)
-#define const_float64(x) (x)
-#endif
-typedef struct {
- uint64_t low;
- uint16_t high;
-} floatx80;
-#define make_floatx80(exp, mant) ((floatx80) { mant, exp })
-#define make_floatx80_init(exp, mant) { .low = mant, .high = exp }
-typedef struct {
-#ifdef HOST_WORDS_BIGENDIAN
- uint64_t high, low;
-#else
- uint64_t low, high;
-#endif
-} float128;
-#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
-#define make_float128_init(high_, low_) { .high = high_, .low = low_ }
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point underflow tininess-detection mode.
-*----------------------------------------------------------------------------*/
-enum {
- float_tininess_after_rounding = 0,
- float_tininess_before_rounding = 1
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point rounding mode.
-*----------------------------------------------------------------------------*/
-enum {
- float_round_nearest_even = 0,
- float_round_down = 1,
- float_round_up = 2,
- float_round_to_zero = 3
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point exception flags.
-*----------------------------------------------------------------------------*/
-enum {
- float_flag_invalid = 1,
- float_flag_divbyzero = 4,
- float_flag_overflow = 8,
- float_flag_underflow = 16,
- float_flag_inexact = 32,
- float_flag_input_denormal = 64,
- float_flag_output_denormal = 128
-};
-
-typedef struct float_status {
- signed char float_detect_tininess;
- signed char float_rounding_mode;
- signed char float_exception_flags;
- signed char floatx80_rounding_precision;
- /* should denormalised results go to zero and set the inexact flag? */
- flag flush_to_zero;
- /* should denormalised inputs go to zero and set the input_denormal flag? */
- flag flush_inputs_to_zero;
- flag default_nan_mode;
-} float_status;
-
-void set_float_rounding_mode(int val STATUS_PARAM);
-void set_float_exception_flags(int val STATUS_PARAM);
-INLINE void set_float_detect_tininess(int val STATUS_PARAM)
-{
- STATUS(float_detect_tininess) = val;
-}
-INLINE void set_flush_to_zero(flag val STATUS_PARAM)
-{
- STATUS(flush_to_zero) = val;
-}
-INLINE void set_flush_inputs_to_zero(flag val STATUS_PARAM)
-{
- STATUS(flush_inputs_to_zero) = val;
-}
-INLINE void set_default_nan_mode(flag val STATUS_PARAM)
-{
- STATUS(default_nan_mode) = val;
-}
-INLINE int get_float_exception_flags(float_status *status)
-{
- return STATUS(float_exception_flags);
-}
-void set_floatx80_rounding_precision(int val STATUS_PARAM);
-
-/*----------------------------------------------------------------------------
-| Routine to raise any or all of the software IEC/IEEE floating-point
-| exception flags.
-*----------------------------------------------------------------------------*/
-void float_raise( int8 flags STATUS_PARAM);
-
-/*----------------------------------------------------------------------------
-| Options to indicate which negations to perform in float*_muladd()
-| Using these differs from negating an input or output before calling
-| the muladd function in that this means that a NaN doesn't have its
-| sign bit inverted before it is propagated.
-*----------------------------------------------------------------------------*/
-enum {
- float_muladd_negate_c = 1,
- float_muladd_negate_product = 2,
- float_muladd_negate_result = 4,
-};
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE integer-to-floating-point conversion routines.
-*----------------------------------------------------------------------------*/
-float32 int32_to_float32( int32 STATUS_PARAM );
-float64 int32_to_float64( int32 STATUS_PARAM );
-float32 uint32_to_float32( uint32 STATUS_PARAM );
-float64 uint32_to_float64( uint32 STATUS_PARAM );
-floatx80 int32_to_floatx80( int32 STATUS_PARAM );
-float128 int32_to_float128( int32 STATUS_PARAM );
-float32 int64_to_float32( int64 STATUS_PARAM );
-float32 uint64_to_float32( uint64 STATUS_PARAM );
-float64 int64_to_float64( int64 STATUS_PARAM );
-float64 uint64_to_float64( uint64 STATUS_PARAM );
-floatx80 int64_to_floatx80( int64 STATUS_PARAM );
-float128 int64_to_float128( int64 STATUS_PARAM );
-float128 uint64_to_float128( uint64 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software half-precision conversion routines.
-*----------------------------------------------------------------------------*/
-float16 float32_to_float16( float32, flag STATUS_PARAM );
-float32 float16_to_float32( float16, flag STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software half-precision operations.
-*----------------------------------------------------------------------------*/
-int float16_is_quiet_nan( float16 );
-int float16_is_signaling_nan( float16 );
-float16 float16_maybe_silence_nan( float16 );
-
-INLINE int float16_is_any_nan(float16 a)
-{
- return ((float16_val(a) & ~0x8000) > 0x7c00);
-}
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated half-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const float16 float16_default_nan;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM);
-uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM);
-int32 float32_to_int32( float32 STATUS_PARAM );
-int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
-uint32 float32_to_uint32( float32 STATUS_PARAM );
-uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
-int64 float32_to_int64( float32 STATUS_PARAM );
-int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
-float64 float32_to_float64( float32 STATUS_PARAM );
-floatx80 float32_to_floatx80( float32 STATUS_PARAM );
-float128 float32_to_float128( float32 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision operations.
-*----------------------------------------------------------------------------*/
-float32 float32_round_to_int( float32 STATUS_PARAM );
-float32 float32_add( float32, float32 STATUS_PARAM );
-float32 float32_sub( float32, float32 STATUS_PARAM );
-float32 float32_mul( float32, float32 STATUS_PARAM );
-float32 float32_div( float32, float32 STATUS_PARAM );
-float32 float32_rem( float32, float32 STATUS_PARAM );
-float32 float32_muladd(float32, float32, float32, int STATUS_PARAM);
-float32 float32_sqrt( float32 STATUS_PARAM );
-float32 float32_exp2( float32 STATUS_PARAM );
-float32 float32_log2( float32 STATUS_PARAM );
-int float32_eq( float32, float32 STATUS_PARAM );
-int float32_le( float32, float32 STATUS_PARAM );
-int float32_lt( float32, float32 STATUS_PARAM );
-int float32_unordered( float32, float32 STATUS_PARAM );
-int float32_eq_quiet( float32, float32 STATUS_PARAM );
-int float32_le_quiet( float32, float32 STATUS_PARAM );
-int float32_lt_quiet( float32, float32 STATUS_PARAM );
-int float32_unordered_quiet( float32, float32 STATUS_PARAM );
-int float32_compare( float32, float32 STATUS_PARAM );
-int float32_compare_quiet( float32, float32 STATUS_PARAM );
-float32 float32_min(float32, float32 STATUS_PARAM);
-float32 float32_max(float32, float32 STATUS_PARAM);
-int float32_is_quiet_nan( float32 );
-int float32_is_signaling_nan( float32 );
-float32 float32_maybe_silence_nan( float32 );
-float32 float32_scalbn( float32, int STATUS_PARAM );
-
-INLINE float32 float32_abs(float32 a)
-{
- /* Note that abs does *not* handle NaN specially, nor does
- * it flush denormal inputs to zero.
- */
- return make_float32(float32_val(a) & 0x7fffffff);
-}
-
-INLINE float32 float32_chs(float32 a)
-{
- /* Note that chs does *not* handle NaN specially, nor does
- * it flush denormal inputs to zero.
- */
- return make_float32(float32_val(a) ^ 0x80000000);
-}
-
-INLINE int float32_is_infinity(float32 a)
-{
- return (float32_val(a) & 0x7fffffff) == 0x7f800000;
-}
-
-INLINE int float32_is_neg(float32 a)
-{
- return float32_val(a) >> 31;
-}
-
-INLINE int float32_is_zero(float32 a)
-{
- return (float32_val(a) & 0x7fffffff) == 0;
-}
-
-INLINE int float32_is_any_nan(float32 a)
-{
- return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
-}
-
-INLINE int float32_is_zero_or_denormal(float32 a)
-{
- return (float32_val(a) & 0x7f800000) == 0;
-}
-
-INLINE float32 float32_set_sign(float32 a, int sign)
-{
- return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
-}
-
-#define float32_zero make_float32(0)
-#define float32_one make_float32(0x3f800000)
-#define float32_ln2 make_float32(0x3f317218)
-#define float32_pi make_float32(0x40490fdb)
-#define float32_half make_float32(0x3f000000)
-#define float32_infinity make_float32(0x7f800000)
-
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated single-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const float32 float32_default_nan;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM);
-uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM);
-int32 float64_to_int32( float64 STATUS_PARAM );
-int32 float64_to_int32_round_to_zero( float64 STATUS_PARAM );
-uint32 float64_to_uint32( float64 STATUS_PARAM );
-uint32 float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
-int64 float64_to_int64( float64 STATUS_PARAM );
-int64 float64_to_int64_round_to_zero( float64 STATUS_PARAM );
-uint64 float64_to_uint64 (float64 a STATUS_PARAM);
-uint64 float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
-float32 float64_to_float32( float64 STATUS_PARAM );
-floatx80 float64_to_floatx80( float64 STATUS_PARAM );
-float128 float64_to_float128( float64 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision operations.
-*----------------------------------------------------------------------------*/
-float64 float64_round_to_int( float64 STATUS_PARAM );
-float64 float64_trunc_to_int( float64 STATUS_PARAM );
-float64 float64_add( float64, float64 STATUS_PARAM );
-float64 float64_sub( float64, float64 STATUS_PARAM );
-float64 float64_mul( float64, float64 STATUS_PARAM );
-float64 float64_div( float64, float64 STATUS_PARAM );
-float64 float64_rem( float64, float64 STATUS_PARAM );
-float64 float64_muladd(float64, float64, float64, int STATUS_PARAM);
-float64 float64_sqrt( float64 STATUS_PARAM );
-float64 float64_log2( float64 STATUS_PARAM );
-int float64_eq( float64, float64 STATUS_PARAM );
-int float64_le( float64, float64 STATUS_PARAM );
-int float64_lt( float64, float64 STATUS_PARAM );
-int float64_unordered( float64, float64 STATUS_PARAM );
-int float64_eq_quiet( float64, float64 STATUS_PARAM );
-int float64_le_quiet( float64, float64 STATUS_PARAM );
-int float64_lt_quiet( float64, float64 STATUS_PARAM );
-int float64_unordered_quiet( float64, float64 STATUS_PARAM );
-int float64_compare( float64, float64 STATUS_PARAM );
-int float64_compare_quiet( float64, float64 STATUS_PARAM );
-float64 float64_min(float64, float64 STATUS_PARAM);
-float64 float64_max(float64, float64 STATUS_PARAM);
-int float64_is_quiet_nan( float64 a );
-int float64_is_signaling_nan( float64 );
-float64 float64_maybe_silence_nan( float64 );
-float64 float64_scalbn( float64, int STATUS_PARAM );
-
-INLINE float64 float64_abs(float64 a)
-{
- /* Note that abs does *not* handle NaN specially, nor does
- * it flush denormal inputs to zero.
- */
- return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
-}
-
-INLINE float64 float64_chs(float64 a)
-{
- /* Note that chs does *not* handle NaN specially, nor does
- * it flush denormal inputs to zero.
- */
- return make_float64(float64_val(a) ^ 0x8000000000000000LL);
-}
-
-INLINE int float64_is_infinity(float64 a)
-{
- return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL;
-}
-
-INLINE int float64_is_neg(float64 a)
-{
- return float64_val(a) >> 63;
-}
-
-INLINE int float64_is_zero(float64 a)
-{
- return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
-}
-
-INLINE int float64_is_any_nan(float64 a)
-{
- return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
-}
-
-INLINE int float64_is_zero_or_denormal(float64 a)
-{
- return (float64_val(a) & 0x7ff0000000000000LL) == 0;
-}
-
-INLINE float64 float64_set_sign(float64 a, int sign)
-{
- return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
- | ((int64_t)sign << 63));
-}
-
-#define float64_zero make_float64(0)
-#define float64_one make_float64(0x3ff0000000000000LL)
-#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
-#define float64_pi make_float64(0x400921fb54442d18LL)
-#define float64_half make_float64(0x3fe0000000000000LL)
-#define float64_infinity make_float64(0x7ff0000000000000LL)
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated double-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const float64 float64_default_nan;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int32 floatx80_to_int32( floatx80 STATUS_PARAM );
-int32 floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
-int64 floatx80_to_int64( floatx80 STATUS_PARAM );
-int64 floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
-float32 floatx80_to_float32( floatx80 STATUS_PARAM );
-float64 floatx80_to_float64( floatx80 STATUS_PARAM );
-float128 floatx80_to_float128( floatx80 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE extended double-precision operations.
-*----------------------------------------------------------------------------*/
-floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
-floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
-floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
-int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
-int floatx80_le( floatx80, floatx80 STATUS_PARAM );
-int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
-int floatx80_unordered( floatx80, floatx80 STATUS_PARAM );
-int floatx80_eq_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_unordered_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
-int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_is_quiet_nan( floatx80 );
-int floatx80_is_signaling_nan( floatx80 );
-floatx80 floatx80_maybe_silence_nan( floatx80 );
-floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
-
-INLINE floatx80 floatx80_abs(floatx80 a)
-{
- a.high &= 0x7fff;
- return a;
-}
-
-INLINE floatx80 floatx80_chs(floatx80 a)
-{
- a.high ^= 0x8000;
- return a;
-}
-
-INLINE int floatx80_is_infinity(floatx80 a)
-{
- return (a.high & 0x7fff) == 0x7fff && a.low == 0x8000000000000000LL;
-}
-
-INLINE int floatx80_is_neg(floatx80 a)
-{
- return a.high >> 15;
-}
-
-INLINE int floatx80_is_zero(floatx80 a)
-{
- return (a.high & 0x7fff) == 0 && a.low == 0;
-}
-
-INLINE int floatx80_is_zero_or_denormal(floatx80 a)
-{
- return (a.high & 0x7fff) == 0;
-}
-
-INLINE int floatx80_is_any_nan(floatx80 a)
-{
- return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
-}
-
-#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
-#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
-#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
-#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
-#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
-#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated extended double-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const floatx80 floatx80_default_nan;
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE quadruple-precision conversion routines.
-*----------------------------------------------------------------------------*/
-int32 float128_to_int32( float128 STATUS_PARAM );
-int32 float128_to_int32_round_to_zero( float128 STATUS_PARAM );
-int64 float128_to_int64( float128 STATUS_PARAM );
-int64 float128_to_int64_round_to_zero( float128 STATUS_PARAM );
-float32 float128_to_float32( float128 STATUS_PARAM );
-float64 float128_to_float64( float128 STATUS_PARAM );
-floatx80 float128_to_floatx80( float128 STATUS_PARAM );
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE quadruple-precision operations.
-*----------------------------------------------------------------------------*/
-float128 float128_round_to_int( float128 STATUS_PARAM );
-float128 float128_add( float128, float128 STATUS_PARAM );
-float128 float128_sub( float128, float128 STATUS_PARAM );
-float128 float128_mul( float128, float128 STATUS_PARAM );
-float128 float128_div( float128, float128 STATUS_PARAM );
-float128 float128_rem( float128, float128 STATUS_PARAM );
-float128 float128_sqrt( float128 STATUS_PARAM );
-int float128_eq( float128, float128 STATUS_PARAM );
-int float128_le( float128, float128 STATUS_PARAM );
-int float128_lt( float128, float128 STATUS_PARAM );
-int float128_unordered( float128, float128 STATUS_PARAM );
-int float128_eq_quiet( float128, float128 STATUS_PARAM );
-int float128_le_quiet( float128, float128 STATUS_PARAM );
-int float128_lt_quiet( float128, float128 STATUS_PARAM );
-int float128_unordered_quiet( float128, float128 STATUS_PARAM );
-int float128_compare( float128, float128 STATUS_PARAM );
-int float128_compare_quiet( float128, float128 STATUS_PARAM );
-int float128_is_quiet_nan( float128 );
-int float128_is_signaling_nan( float128 );
-float128 float128_maybe_silence_nan( float128 );
-float128 float128_scalbn( float128, int STATUS_PARAM );
-
-INLINE float128 float128_abs(float128 a)
-{
- a.high &= 0x7fffffffffffffffLL;
- return a;
-}
-
-INLINE float128 float128_chs(float128 a)
-{
- a.high ^= 0x8000000000000000LL;
- return a;
-}
-
-INLINE int float128_is_infinity(float128 a)
-{
- return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0;
-}
-
-INLINE int float128_is_neg(float128 a)
-{
- return a.high >> 63;
-}
-
-INLINE int float128_is_zero(float128 a)
-{
- return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
-}
-
-INLINE int float128_is_zero_or_denormal(float128 a)
-{
- return (a.high & 0x7fff000000000000LL) == 0;
-}
-
-INLINE int float128_is_any_nan(float128 a)
-{
- return ((a.high >> 48) & 0x7fff) == 0x7fff &&
- ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
-}
-
-#define float128_zero make_float128(0, 0)
-
-/*----------------------------------------------------------------------------
-| The pattern for a default generated quadruple-precision NaN.
-*----------------------------------------------------------------------------*/
-extern const float128 float128_default_nan;
-
-#endif /* !SOFTFLOAT_H */
diff --git a/contrib/qemu/include/glib-compat.h b/contrib/qemu/include/glib-compat.h
deleted file mode 100644
index 8aa77afd626..00000000000
--- a/contrib/qemu/include/glib-compat.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * GLIB Compatibility Functions
- *
- * Copyright IBM, Corp. 2013
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_GLIB_COMPAT_H
-#define QEMU_GLIB_COMPAT_H
-
-#include <glib.h>
-
-#if !GLIB_CHECK_VERSION(2, 14, 0)
-static inline guint g_timeout_add_seconds(guint interval, GSourceFunc function,
- gpointer data)
-{
- return g_timeout_add(interval * 1000, function, data);
-}
-#endif
-
-#endif
diff --git a/contrib/qemu/include/migration/migration.h b/contrib/qemu/include/migration/migration.h
deleted file mode 100644
index bc9fde0b2ab..00000000000
--- a/contrib/qemu/include/migration/migration.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * QEMU live migration
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_MIGRATION_H
-#define QEMU_MIGRATION_H
-
-#include "qapi/qmp/qdict.h"
-#include "qemu-common.h"
-#include "qemu/thread.h"
-#include "qemu/notify.h"
-#include "qapi/error.h"
-#include "migration/vmstate.h"
-#include "qapi-types.h"
-#include "exec/cpu-common.h"
-
-struct MigrationParams {
- bool blk;
- bool shared;
-};
-
-typedef struct MigrationState MigrationState;
-
-struct MigrationState
-{
- int64_t bandwidth_limit;
- size_t bytes_xfer;
- size_t xfer_limit;
- QemuThread thread;
- QEMUBH *cleanup_bh;
- QEMUFile *file;
-
- int state;
- MigrationParams params;
- double mbps;
- int64_t total_time;
- int64_t downtime;
- int64_t expected_downtime;
- int64_t dirty_pages_rate;
- int64_t dirty_bytes_rate;
- bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
- int64_t xbzrle_cache_size;
-};
-
-void process_incoming_migration(QEMUFile *f);
-
-void qemu_start_incoming_migration(const char *uri, Error **errp);
-
-uint64_t migrate_max_downtime(void);
-
-void do_info_migrate_print(Monitor *mon, const QObject *data);
-
-void do_info_migrate(Monitor *mon, QObject **ret_data);
-
-void exec_start_incoming_migration(const char *host_port, Error **errp);
-
-void exec_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
-
-void tcp_start_incoming_migration(const char *host_port, Error **errp);
-
-void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
-
-void unix_start_incoming_migration(const char *path, Error **errp);
-
-void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
-
-void fd_start_incoming_migration(const char *path, Error **errp);
-
-void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
-
-void migrate_fd_error(MigrationState *s);
-
-void migrate_fd_connect(MigrationState *s);
-
-int migrate_fd_close(MigrationState *s);
-
-void add_migration_state_change_notifier(Notifier *notify);
-void remove_migration_state_change_notifier(Notifier *notify);
-bool migration_is_active(MigrationState *);
-bool migration_has_finished(MigrationState *);
-bool migration_has_failed(MigrationState *);
-MigrationState *migrate_get_current(void);
-
-uint64_t ram_bytes_remaining(void);
-uint64_t ram_bytes_transferred(void);
-uint64_t ram_bytes_total(void);
-
-void acct_update_position(QEMUFile *f, size_t size, bool zero);
-
-extern SaveVMHandlers savevm_ram_handlers;
-
-uint64_t dup_mig_bytes_transferred(void);
-uint64_t dup_mig_pages_transferred(void);
-uint64_t skipped_mig_bytes_transferred(void);
-uint64_t skipped_mig_pages_transferred(void);
-uint64_t norm_mig_bytes_transferred(void);
-uint64_t norm_mig_pages_transferred(void);
-uint64_t xbzrle_mig_bytes_transferred(void);
-uint64_t xbzrle_mig_pages_transferred(void);
-uint64_t xbzrle_mig_pages_overflow(void);
-uint64_t xbzrle_mig_pages_cache_miss(void);
-
-/**
- * @migrate_add_blocker - prevent migration from proceeding
- *
- * @reason - an error to be returned whenever migration is attempted
- */
-void migrate_add_blocker(Error *reason);
-
-/**
- * @migrate_del_blocker - remove a blocking error from migration
- *
- * @reason - the error blocking migration
- */
-void migrate_del_blocker(Error *reason);
-
-bool migrate_rdma_pin_all(void);
-
-bool migrate_auto_converge(void);
-
-int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
- uint8_t *dst, int dlen);
-int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
-
-int migrate_use_xbzrle(void);
-int64_t migrate_xbzrle_cache_size(void);
-
-int64_t xbzrle_cache_resize(int64_t new_size);
-
-void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
-void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
-void ram_control_load_hook(QEMUFile *f, uint64_t flags);
-
-/* Whenever this is found in the data stream, the flags
- * will be passed to ram_control_load_hook in the incoming-migration
- * side. This lets before_ram_iterate/after_ram_iterate add
- * transport-specific sections to the RAM migration data.
- */
-#define RAM_SAVE_FLAG_HOOK 0x80
-
-#define RAM_SAVE_CONTROL_NOT_SUPP -1000
-#define RAM_SAVE_CONTROL_DELAYED -2000
-
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
- ram_addr_t offset, size_t size,
- int *bytes_sent);
-
-#endif
diff --git a/contrib/qemu/include/migration/qemu-file.h b/contrib/qemu/include/migration/qemu-file.h
deleted file mode 100644
index 0f757fbeb63..00000000000
--- a/contrib/qemu/include/migration/qemu-file.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef QEMU_FILE_H
-#define QEMU_FILE_H 1
-#include "exec/cpu-common.h"
-
-/* This function writes a chunk of data to a file at the given position.
- * The pos argument can be ignored if the file is only being used for
- * streaming. The handler should try to write all of the data it can.
- */
-typedef int (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf,
- int64_t pos, int size);
-
-/* Read a chunk of data from a file at the given position. The pos argument
- * can be ignored if the file is only be used for streaming. The number of
- * bytes actually read should be returned.
- */
-typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
- int64_t pos, int size);
-
-/* Close a file
- *
- * Return negative error number on error, 0 or positive value on success.
- *
- * The meaning of return value on success depends on the specific back-end being
- * used.
- */
-typedef int (QEMUFileCloseFunc)(void *opaque);
-
-/* Called to return the OS file descriptor associated to the QEMUFile.
- */
-typedef int (QEMUFileGetFD)(void *opaque);
-
-/*
- * This function writes an iovec to file.
- */
-typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
- int iovcnt, int64_t pos);
-
-/*
- * This function provides hooks around different
- * stages of RAM migration.
- */
-typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
-
-/*
- * Constants used by ram_control_* hooks
- */
-#define RAM_CONTROL_SETUP 0
-#define RAM_CONTROL_ROUND 1
-#define RAM_CONTROL_HOOK 2
-#define RAM_CONTROL_FINISH 3
-
-/*
- * This function allows override of where the RAM page
- * is saved (such as RDMA, for example.)
- */
-typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
- ram_addr_t block_offset,
- ram_addr_t offset,
- size_t size,
- int *bytes_sent);
-
-typedef struct QEMUFileOps {
- QEMUFilePutBufferFunc *put_buffer;
- QEMUFileGetBufferFunc *get_buffer;
- QEMUFileCloseFunc *close;
- QEMUFileGetFD *get_fd;
- QEMUFileWritevBufferFunc *writev_buffer;
- QEMURamHookFunc *before_ram_iterate;
- QEMURamHookFunc *after_ram_iterate;
- QEMURamHookFunc *hook_ram_load;
- QEMURamSaveFunc *save_page;
-} QEMUFileOps;
-
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
-QEMUFile *qemu_fopen(const char *filename, const char *mode);
-QEMUFile *qemu_fdopen(int fd, const char *mode);
-QEMUFile *qemu_fopen_socket(int fd, const char *mode);
-QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
-int qemu_get_fd(QEMUFile *f);
-int qemu_fclose(QEMUFile *f);
-int64_t qemu_ftell(QEMUFile *f);
-void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
-void qemu_put_byte(QEMUFile *f, int v);
-/*
- * put_buffer without copying the buffer.
- * The buffer should be available till it is sent asynchronously.
- */
-void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
-bool qemu_file_mode_is_not_valid(const char *mode);
-
-static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
-{
- qemu_put_byte(f, (int)v);
-}
-
-#define qemu_put_sbyte qemu_put_byte
-
-void qemu_put_be16(QEMUFile *f, unsigned int v);
-void qemu_put_be32(QEMUFile *f, unsigned int v);
-void qemu_put_be64(QEMUFile *f, uint64_t v);
-int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
-int qemu_get_byte(QEMUFile *f);
-void qemu_update_position(QEMUFile *f, size_t size);
-
-static inline unsigned int qemu_get_ubyte(QEMUFile *f)
-{
- return (unsigned int)qemu_get_byte(f);
-}
-
-#define qemu_get_sbyte qemu_get_byte
-
-unsigned int qemu_get_be16(QEMUFile *f);
-unsigned int qemu_get_be32(QEMUFile *f);
-uint64_t qemu_get_be64(QEMUFile *f);
-
-int qemu_file_rate_limit(QEMUFile *f);
-void qemu_file_reset_rate_limit(QEMUFile *f);
-void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
-int64_t qemu_file_get_rate_limit(QEMUFile *f);
-int qemu_file_get_error(QEMUFile *f);
-void qemu_fflush(QEMUFile *f);
-
-static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
-{
- qemu_put_be64(f, *pv);
-}
-
-static inline void qemu_put_be32s(QEMUFile *f, const uint32_t *pv)
-{
- qemu_put_be32(f, *pv);
-}
-
-static inline void qemu_put_be16s(QEMUFile *f, const uint16_t *pv)
-{
- qemu_put_be16(f, *pv);
-}
-
-static inline void qemu_put_8s(QEMUFile *f, const uint8_t *pv)
-{
- qemu_put_byte(f, *pv);
-}
-
-static inline void qemu_get_be64s(QEMUFile *f, uint64_t *pv)
-{
- *pv = qemu_get_be64(f);
-}
-
-static inline void qemu_get_be32s(QEMUFile *f, uint32_t *pv)
-{
- *pv = qemu_get_be32(f);
-}
-
-static inline void qemu_get_be16s(QEMUFile *f, uint16_t *pv)
-{
- *pv = qemu_get_be16(f);
-}
-
-static inline void qemu_get_8s(QEMUFile *f, uint8_t *pv)
-{
- *pv = qemu_get_byte(f);
-}
-
-// Signed versions for type safety
-static inline void qemu_put_sbuffer(QEMUFile *f, const int8_t *buf, int size)
-{
- qemu_put_buffer(f, (const uint8_t *)buf, size);
-}
-
-static inline void qemu_put_sbe16(QEMUFile *f, int v)
-{
- qemu_put_be16(f, (unsigned int)v);
-}
-
-static inline void qemu_put_sbe32(QEMUFile *f, int v)
-{
- qemu_put_be32(f, (unsigned int)v);
-}
-
-static inline void qemu_put_sbe64(QEMUFile *f, int64_t v)
-{
- qemu_put_be64(f, (uint64_t)v);
-}
-
-static inline size_t qemu_get_sbuffer(QEMUFile *f, int8_t *buf, int size)
-{
- return qemu_get_buffer(f, (uint8_t *)buf, size);
-}
-
-static inline int qemu_get_sbe16(QEMUFile *f)
-{
- return (int)qemu_get_be16(f);
-}
-
-static inline int qemu_get_sbe32(QEMUFile *f)
-{
- return (int)qemu_get_be32(f);