summaryrefslogtreecommitdiffstats
path: root/contrib/qemu/block
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/qemu/block')
-rw-r--r--contrib/qemu/block/qcow.c914
-rw-r--r--contrib/qemu/block/qcow2-cache.c323
-rw-r--r--contrib/qemu/block/qcow2-cluster.c1478
-rw-r--r--contrib/qemu/block/qcow2-refcount.c1374
-rw-r--r--contrib/qemu/block/qcow2-snapshot.c660
-rw-r--r--contrib/qemu/block/qcow2.c1825
-rw-r--r--contrib/qemu/block/qcow2.h437
-rw-r--r--contrib/qemu/block/qed-check.c248
-rw-r--r--contrib/qemu/block/qed-cluster.c165
-rw-r--r--contrib/qemu/block/qed-gencb.c32
-rw-r--r--contrib/qemu/block/qed-l2-cache.c187
-rw-r--r--contrib/qemu/block/qed-table.c296
-rw-r--r--contrib/qemu/block/qed.c1596
-rw-r--r--contrib/qemu/block/qed.h344
-rw-r--r--contrib/qemu/block/snapshot.c157
15 files changed, 0 insertions, 10036 deletions
diff --git a/contrib/qemu/block/qcow.c b/contrib/qemu/block/qcow.c
deleted file mode 100644
index 5239bd68f1c..00000000000
--- a/contrib/qemu/block/qcow.c
+++ /dev/null
@@ -1,914 +0,0 @@
-/*
- * Block driver for the QCOW format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "qemu/aes.h"
-#include "migration/migration.h"
-
-/**************************************************************/
-/* QEMU COW block driver with compression and encryption support */
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 1
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES 1
-
-#define QCOW_OFLAG_COMPRESSED (1LL << 63)
-
-typedef struct QCowHeader {
- uint32_t magic;
- uint32_t version;
- uint64_t backing_file_offset;
- uint32_t backing_file_size;
- uint32_t mtime;
- uint64_t size; /* in bytes */
- uint8_t cluster_bits;
- uint8_t l2_bits;
- uint32_t crypt_method;
- uint64_t l1_table_offset;
-} QCowHeader;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct BDRVQcowState {
- int cluster_bits;
- int cluster_size;
- int cluster_sectors;
- int l2_bits;
- int l2_size;
- int l1_size;
- uint64_t cluster_offset_mask;
- uint64_t l1_table_offset;
- uint64_t *l1_table;
- uint64_t *l2_cache;
- uint64_t l2_cache_offsets[L2_CACHE_SIZE];
- uint32_t l2_cache_counts[L2_CACHE_SIZE];
- uint8_t *cluster_cache;
- uint8_t *cluster_data;
- uint64_t cluster_cache_offset;
- uint32_t crypt_method; /* current crypt method, 0 if no key yet */
- uint32_t crypt_method_header;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- CoMutex lock;
- Error *migration_blocker;
-} BDRVQcowState;
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-
-static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
- const QCowHeader *cow_header = (const void *)buf;
-
- if (buf_size >= sizeof(QCowHeader) &&
- be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
- be32_to_cpu(cow_header->version) == QCOW_VERSION)
- return 100;
- else
- return 0;
-}
-
-static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQcowState *s = bs->opaque;
- int len, i, shift, ret;
- QCowHeader header;
-
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
- if (ret < 0) {
- goto fail;
- }
- be32_to_cpus(&header.magic);
- be32_to_cpus(&header.version);
- be64_to_cpus(&header.backing_file_offset);
- be32_to_cpus(&header.backing_file_size);
- be32_to_cpus(&header.mtime);
- be64_to_cpus(&header.size);
- be32_to_cpus(&header.crypt_method);
- be64_to_cpus(&header.l1_table_offset);
-
- if (header.magic != QCOW_MAGIC) {
- ret = -EMEDIUMTYPE;
- goto fail;
- }
- if (header.version != QCOW_VERSION) {
- char version[64];
- snprintf(version, sizeof(version), "QCOW version %d", header.version);
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "qcow", version);
- ret = -ENOTSUP;
- goto fail;
- }
-
- if (header.size <= 1 || header.cluster_bits < 9) {
- ret = -EINVAL;
- goto fail;
- }
- if (header.crypt_method > QCOW_CRYPT_AES) {
- ret = -EINVAL;
- goto fail;
- }
- s->crypt_method_header = header.crypt_method;
- if (s->crypt_method_header) {
- bs->encrypted = 1;
- }
- s->cluster_bits = header.cluster_bits;
- s->cluster_size = 1 << s->cluster_bits;
- s->cluster_sectors = 1 << (s->cluster_bits - 9);
- s->l2_bits = header.l2_bits;
- s->l2_size = 1 << s->l2_bits;
- bs->total_sectors = header.size / 512;
- s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
-
- /* read the level 1 table */
- shift = s->cluster_bits + s->l2_bits;
- s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
-
- s->l1_table_offset = header.l1_table_offset;
- s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
-
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
-
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
- /* alloc L2 cache */
- s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
- s->cluster_cache = g_malloc(s->cluster_size);
- s->cluster_data = g_malloc(s->cluster_size);
- s->cluster_cache_offset = -1;
-
- /* read the backing file name */
- if (header.backing_file_offset != 0) {
- len = header.backing_file_size;
- if (len > 1023) {
- len = 1023;
- }
- ret = bdrv_pread(bs->file, header.backing_file_offset,
- bs->backing_file, len);
- if (ret < 0) {
- goto fail;
- }
- bs->backing_file[len] = '\0';
- }
-
- /* Disable migration when qcow images are used */
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "qcow", bs->device_name, "live migration");
- migrate_add_blocker(s->migration_blocker);
-
- qemu_co_mutex_init(&s->lock);
- return 0;
-
- fail:
- g_free(s->l1_table);
- g_free(s->l2_cache);
- g_free(s->cluster_cache);
- g_free(s->cluster_data);
- return ret;
-}
-
-
-/* We have nothing to do for QCOW reopen, stubs just return
- * success */
-static int qcow_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
- BDRVQcowState *s = bs->opaque;
- uint8_t keybuf[16];
- int len, i;
-
- memset(keybuf, 0, 16);
- len = strlen(key);
- if (len > 16)
- len = 16;
- /* XXX: we could compress the chars to 7 bits to increase
- entropy */
- for(i = 0;i < len;i++) {
- keybuf[i] = key[i];
- }
- s->crypt_method = s->crypt_method_header;
-
- if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
- return -1;
- if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
- return -1;
- return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
- algorithm for < 4 GB images. NOTE: out_buf == in_buf is
- supported */
-static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key)
-{
- union {
- uint64_t ll[2];
- uint8_t b[16];
- } ivec;
- int i;
-
- for(i = 0; i < nb_sectors; i++) {
- ivec.ll[0] = cpu_to_le64(sector_num);
- ivec.ll[1] = 0;
- AES_cbc_encrypt(in_buf, out_buf, 512, key,
- ivec.b, enc);
- sector_num++;
- in_buf += 512;
- out_buf += 512;
- }
-}
-
-/* 'allocate' is:
- *
- * 0 to not allocate.
- *
- * 1 to allocate a normal cluster (for sector indexes 'n_start' to
- * 'n_end')
- *
- * 2 to allocate a compressed cluster of size
- * 'compressed_size'. 'compressed_size' must be > 0 and <
- * cluster_size
- *
- * return 0 if not allocated.
- */
-static uint64_t get_cluster_offset(BlockDriverState *bs,
- uint64_t offset, int allocate,
- int compressed_size,
- int n_start, int n_end)
-{
- BDRVQcowState *s = bs->opaque;
- int min_index, i, j, l1_index, l2_index;
- uint64_t l2_offset, *l2_table, cluster_offset, tmp;
- uint32_t min_count;
- int new_l2_table;
-
- l1_index = offset >> (s->l2_bits + s->cluster_bits);
- l2_offset = s->l1_table[l1_index];
- new_l2_table = 0;
- if (!l2_offset) {
- if (!allocate)
- return 0;
- /* allocate a new l2 entry */
- l2_offset = bdrv_getlength(bs->file);
- /* round to cluster size */
- l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
- /* update the L1 entry */
- s->l1_table[l1_index] = l2_offset;
- tmp = cpu_to_be64(l2_offset);
- if (bdrv_pwrite_sync(bs->file,
- s->l1_table_offset + l1_index * sizeof(tmp),
- &tmp, sizeof(tmp)) < 0)
- return 0;
- new_l2_table = 1;
- }
- for(i = 0; i < L2_CACHE_SIZE; i++) {
- if (l2_offset == s->l2_cache_offsets[i]) {
- /* increment the hit count */
- if (++s->l2_cache_counts[i] == 0xffffffff) {
- for(j = 0; j < L2_CACHE_SIZE; j++) {
- s->l2_cache_counts[j] >>= 1;
- }
- }
- l2_table = s->l2_cache + (i << s->l2_bits);
- goto found;
- }
- }
- /* not found: load a new entry in the least used one */
- min_index = 0;
- min_count = 0xffffffff;
- for(i = 0; i < L2_CACHE_SIZE; i++) {
- if (s->l2_cache_counts[i] < min_count) {
- min_count = s->l2_cache_counts[i];
- min_index = i;
- }
- }
- l2_table = s->l2_cache + (min_index << s->l2_bits);
- if (new_l2_table) {
- memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
- s->l2_size * sizeof(uint64_t)) < 0)
- return 0;
- } else {
- if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
- s->l2_size * sizeof(uint64_t))
- return 0;
- }
- s->l2_cache_offsets[min_index] = l2_offset;
- s->l2_cache_counts[min_index] = 1;
- found:
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
- if (!cluster_offset ||
- ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
- if (!allocate)
- return 0;
- /* allocate a new cluster */
- if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
- (n_end - n_start) < s->cluster_sectors) {
- /* if the cluster is already compressed, we must
- decompress it in the case it is not completely
- overwritten */
- if (decompress_cluster(bs, cluster_offset) < 0)
- return 0;
- cluster_offset = bdrv_getlength(bs->file);
- cluster_offset = (cluster_offset + s->cluster_size - 1) &
- ~(s->cluster_size - 1);
- /* write the cluster content */
- if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
- s->cluster_size)
- return -1;
- } else {
- cluster_offset = bdrv_getlength(bs->file);
- if (allocate == 1) {
- /* round to cluster size */
- cluster_offset = (cluster_offset + s->cluster_size - 1) &
- ~(s->cluster_size - 1);
- bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
- /* if encrypted, we must initialize the cluster
- content which won't be written */
- if (s->crypt_method &&
- (n_end - n_start) < s->cluster_sectors) {
- uint64_t start_sect;
- start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
- memset(s->cluster_data + 512, 0x00, 512);
- for(i = 0; i < s->cluster_sectors; i++) {
- if (i < n_start || i >= n_end) {
- encrypt_sectors(s, start_sect + i,
- s->cluster_data,
- s->cluster_data + 512, 1, 1,
- &s->aes_encrypt_key);
- if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
- s->cluster_data, 512) != 512)
- return -1;
- }
- }
- }
- } else if (allocate == 2) {
- cluster_offset |= QCOW_OFLAG_COMPRESSED |
- (uint64_t)compressed_size << (63 - s->cluster_bits);
- }
- }
- /* update L2 table */
- tmp = cpu_to_be64(cluster_offset);
- l2_table[l2_index] = tmp;
- if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
- &tmp, sizeof(tmp)) < 0)
- return 0;
- }
- return cluster_offset;
-}
-
-static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster, n;
- uint64_t cluster_offset;
-
- qemu_co_mutex_lock(&s->lock);
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
- qemu_co_mutex_unlock(&s->lock);
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors)
- n = nb_sectors;
- *pnum = n;
- return (cluster_offset != 0);
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
- const uint8_t *buf, int buf_size)
-{
- z_stream strm1, *strm = &strm1;
- int ret, out_len;
-
- memset(strm, 0, sizeof(*strm));
-
- strm->next_in = (uint8_t *)buf;
- strm->avail_in = buf_size;
- strm->next_out = out_buf;
- strm->avail_out = out_buf_size;
-
- ret = inflateInit2(strm, -12);
- if (ret != Z_OK)
- return -1;
- ret = inflate(strm, Z_FINISH);
- out_len = strm->next_out - out_buf;
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
- out_len != out_buf_size) {
- inflateEnd(strm);
- return -1;
- }
- inflateEnd(strm);
- return 0;
-}
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, csize;
- uint64_t coffset;
-
- coffset = cluster_offset & s->cluster_offset_mask;
- if (s->cluster_cache_offset != coffset) {
- csize = cluster_offset >> (63 - s->cluster_bits);
- csize &= (s->cluster_size - 1);
- ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
- if (ret != csize)
- return -1;
- if (decompress_buffer(s->cluster_cache, s->cluster_size,
- s->cluster_data, csize) < 0) {
- return -1;
- }
- s->cluster_cache_offset = coffset;
- }
- return 0;
-}
-
-static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- int ret = 0, n;
- uint64_t cluster_offset;
- struct iovec hd_iov;
- QEMUIOVector hd_qiov;
- uint8_t *buf;
- void *orig_buf;
-
- if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
- } else {
- orig_buf = NULL;
- buf = (uint8_t *)qiov->iov->iov_base;
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- while (nb_sectors != 0) {
- /* prepare next request */
- cluster_offset = get_cluster_offset(bs, sector_num << 9,
- 0, 0, 0, 0);
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
-
- if (!cluster_offset) {
- if (bs->backing_hd) {
- /* read from the base image */
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- } else {
- /* Note: in this case, no need to wait */
- memset(buf, 0, 512 * n);
- }
- } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- /* add AIO support for compressed blocks ? */
- if (decompress_cluster(bs, cluster_offset) < 0) {
- goto fail;
- }
- memcpy(buf,
- s->cluster_cache + index_in_cluster * 512, 512 * n);
- } else {
- if ((cluster_offset & 511) != 0) {
- goto fail;
- }
- hd_iov.iov_base = (void *)buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- break;
- }
- if (s->crypt_method) {
- encrypt_sectors(s, sector_num, buf, buf,
- n, 0,
- &s->aes_decrypt_key);
- }
- }
- ret = 0;
-
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- }
-
-done:
- qemu_co_mutex_unlock(&s->lock);
-
- if (qiov->niov > 1) {
- qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
- qemu_vfree(orig_buf);
- }
-
- return ret;
-
-fail:
- ret = -EIO;
- goto done;
-}
-
-static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- uint64_t cluster_offset;
- const uint8_t *src_buf;
- int ret = 0, n;
- uint8_t *cluster_data = NULL;
- struct iovec hd_iov;
- QEMUIOVector hd_qiov;
- uint8_t *buf;
- void *orig_buf;
-
- s->cluster_cache_offset = -1; /* disable compressed cache */
-
- if (qiov->niov > 1) {
- buf = orig_buf = qemu_blockalign(bs, qiov->size);
- qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
- } else {
- orig_buf = NULL;
- buf = (uint8_t *)qiov->iov->iov_base;
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- while (nb_sectors != 0) {
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n = s->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
- index_in_cluster,
- index_in_cluster + n);
- if (!cluster_offset || (cluster_offset & 511) != 0) {
- ret = -EIO;
- break;
- }
- if (s->crypt_method) {
- if (!cluster_data) {
- cluster_data = g_malloc0(s->cluster_size);
- }
- encrypt_sectors(s, sector_num, cluster_data, buf,
- n, 1, &s->aes_encrypt_key);
- src_buf = cluster_data;
- } else {
- src_buf = buf;
- }
-
- hd_iov.iov_base = (void *)src_buf;
- hd_iov.iov_len = n * 512;
- qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- n, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- break;
- }
- ret = 0;
-
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- }
- qemu_co_mutex_unlock(&s->lock);
-
- if (qiov->niov > 1) {
- qemu_vfree(orig_buf);
- }
- g_free(cluster_data);
-
- return ret;
-}
-
-static void qcow_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
-
- g_free(s->l1_table);
- g_free(s->l2_cache);
- g_free(s->cluster_cache);
- g_free(s->cluster_data);
-
- migrate_del_blocker(s->migration_blocker);
- error_free(s->migration_blocker);
-}
-
-static int qcow_create(const char *filename, QEMUOptionParameter *options)
-{
- int header_size, backing_filename_len, l1_size, shift, i;
- QCowHeader header;
- uint8_t *tmp;
- int64_t total_size = 0;
- const char *backing_file = NULL;
- int flags = 0;
- int ret;
- BlockDriverState *qcow_bs;
-
- /* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
- flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
- }
- options++;
- }
-
- ret = bdrv_create_file(filename, options);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_truncate(qcow_bs, 0);
- if (ret < 0) {
- goto exit;
- }
-
- memset(&header, 0, sizeof(header));
- header.magic = cpu_to_be32(QCOW_MAGIC);
- header.version = cpu_to_be32(QCOW_VERSION);
- header.size = cpu_to_be64(total_size * 512);
- header_size = sizeof(header);
- backing_filename_len = 0;
- if (backing_file) {
- if (strcmp(backing_file, "fat:")) {
- header.backing_file_offset = cpu_to_be64(header_size);
- backing_filename_len = strlen(backing_file);
- header.backing_file_size = cpu_to_be32(backing_filename_len);
- header_size += backing_filename_len;
- } else {
- /* special backing file for vvfat */
- backing_file = NULL;
- }
- header.cluster_bits = 9; /* 512 byte cluster to avoid copying
- unmodifyed sectors */
- header.l2_bits = 12; /* 32 KB L2 tables */
- } else {
- header.cluster_bits = 12; /* 4 KB clusters */
- header.l2_bits = 9; /* 4 KB L2 tables */
- }
- header_size = (header_size + 7) & ~7;
- shift = header.cluster_bits + header.l2_bits;
- l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
-
- header.l1_table_offset = cpu_to_be64(header_size);
- if (flags & BLOCK_FLAG_ENCRYPT) {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
- } else {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
- }
-
- /* write all the data */
- ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
- if (ret != sizeof(header)) {
- goto exit;
- }
-
- if (backing_file) {
- ret = bdrv_pwrite(qcow_bs, sizeof(header),
- backing_file, backing_filename_len);
- if (ret != backing_filename_len) {
- goto exit;
- }
- }
-
- tmp = g_malloc0(BDRV_SECTOR_SIZE);
- for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
- BDRV_SECTOR_SIZE); i++) {
- ret = bdrv_pwrite(qcow_bs, header_size +
- BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
- if (ret != BDRV_SECTOR_SIZE) {
- g_free(tmp);
- goto exit;
- }
- }
-
- g_free(tmp);
- ret = 0;
-exit:
- bdrv_delete(qcow_bs);
- return ret;
-}
-
-static int qcow_make_empty(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- uint32_t l1_length = s->l1_size * sizeof(uint64_t);
- int ret;
-
- memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
- l1_length) < 0)
- return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
- if (ret < 0)
- return ret;
-
- memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
- memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
- memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
-
- return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
- tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- z_stream strm;
- int ret, out_len;
- uint8_t *out_buf;
- uint64_t cluster_offset;
-
- if (nb_sectors != s->cluster_sectors) {
- ret = -EINVAL;
-
- /* Zero-pad last write if image size is not cluster aligned */
- if (sector_num + nb_sectors == bs->total_sectors &&
- nb_sectors < s->cluster_sectors) {
- uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
- memset(pad_buf, 0, s->cluster_size);
- memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
- ret = qcow_write_compressed(bs, sector_num,
- pad_buf, s->cluster_sectors);
- qemu_vfree(pad_buf);
- }
- return ret;
- }
-
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
- /* best compression, small window, no zlib header */
- memset(&strm, 0, sizeof(strm));
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
- Z_DEFLATED, -12,
- 9, Z_DEFAULT_STRATEGY);
- if (ret != 0) {
- ret = -EINVAL;
- goto fail;
- }
-
- strm.avail_in = s->cluster_size;
- strm.next_in = (uint8_t *)buf;
- strm.avail_out = s->cluster_size;
- strm.next_out = out_buf;
-
- ret = deflate(&strm, Z_FINISH);
- if (ret != Z_STREAM_END && ret != Z_OK) {
- deflateEnd(&strm);
- ret = -EINVAL;
- goto fail;
- }
- out_len = strm.next_out - out_buf;
-
- deflateEnd(&strm);
-
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
- /* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
- if (ret < 0) {
- goto fail;
- }
- } else {
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
- out_len, 0, 0);
- if (cluster_offset == 0) {
- ret = -EIO;
- goto fail;
- }
-
- cluster_offset &= s->cluster_offset_mask;
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = 0;
-fail:
- g_free(out_buf);
- return ret;
-}
-
-static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQcowState *s = bs->opaque;
- bdi->cluster_size = s->cluster_size;
- return 0;
-}
-
-
-static QEMUOptionParameter qcow_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_ENCRYPT,
- .type = OPT_FLAG,
- .help = "Encrypt the image"
- },
- { NULL }
-};
-
-static BlockDriver bdrv_qcow = {
- .format_name = "qcow",
- .instance_size = sizeof(BDRVQcowState),
- .bdrv_probe = qcow_probe,
- .bdrv_open = qcow_open,
- .bdrv_close = qcow_close,
- .bdrv_reopen_prepare = qcow_reopen_prepare,
- .bdrv_create = qcow_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
-
- .bdrv_co_readv = qcow_co_readv,
- .bdrv_co_writev = qcow_co_writev,
- .bdrv_co_is_allocated = qcow_co_is_allocated,
-
- .bdrv_set_key = qcow_set_key,
- .bdrv_make_empty = qcow_make_empty,
- .bdrv_write_compressed = qcow_write_compressed,
- .bdrv_get_info = qcow_get_info,
-
- .create_options = qcow_create_options,
-};
-
-static void bdrv_qcow_init(void)
-{
- bdrv_register(&bdrv_qcow);
-}
-
-block_init(bdrv_qcow_init);
diff --git a/contrib/qemu/block/qcow2-cache.c b/contrib/qemu/block/qcow2-cache.c
deleted file mode 100644
index 2f3114ecc24..00000000000
--- a/contrib/qemu/block/qcow2-cache.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * L2/refcount table cache for the QCOW2 format
- *
- * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/block_int.h"
-#include "qemu-common.h"
-#include "qcow2.h"
-#include "trace.h"
-
-typedef struct Qcow2CachedTable {
- void* table;
- int64_t offset;
- bool dirty;
- int cache_hits;
- int ref;
-} Qcow2CachedTable;
-
-struct Qcow2Cache {
- Qcow2CachedTable* entries;
- struct Qcow2Cache* depends;
- int size;
- bool depends_on_flush;
-};
-
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2Cache *c;
- int i;
-
- c = g_malloc0(sizeof(*c));
- c->size = num_tables;
- c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
-
- for (i = 0; i < c->size; i++) {
- c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
- }
-
- return c;
-}
-
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- assert(c->entries[i].ref == 0);
- qemu_vfree(c->entries[i].table);
- }
-
- g_free(c->entries);
- g_free(c);
-
- return 0;
-}
-
-static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
-{
- int ret;
-
- ret = qcow2_cache_flush(bs, c->depends);
- if (ret < 0) {
- return ret;
- }
-
- c->depends = NULL;
- c->depends_on_flush = false;
-
- return 0;
-}
-
-static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
-{
- BDRVQcowState *s = bs->opaque;
- int ret = 0;
-
- if (!c->entries[i].dirty || !c->entries[i].offset) {
- return 0;
- }
-
- trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
-
- if (c->depends) {
- ret = qcow2_cache_flush_dependency(bs, c);
- } else if (c->depends_on_flush) {
- ret = bdrv_flush(bs->file);
- if (ret >= 0) {
- c->depends_on_flush = false;
- }
- }
-
- if (ret < 0) {
- return ret;
- }
-
- if (c == s->refcount_block_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
- } else if (c == s->l2_table_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
- }
-
- ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
- s->cluster_size);
- if (ret < 0) {
- return ret;
- }
-
- c->entries[i].dirty = false;
-
- return 0;
-}
-
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
-{
- BDRVQcowState *s = bs->opaque;
- int result = 0;
- int ret;
- int i;
-
- trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
-
- for (i = 0; i < c->size; i++) {
- ret = qcow2_cache_entry_flush(bs, c, i);
- if (ret < 0 && result != -ENOSPC) {
- result = ret;
- }
- }
-
- if (result == 0) {
- ret = bdrv_flush(bs->file);
- if (ret < 0) {
- result = ret;
- }
- }
-
- return result;
-}
-
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
- Qcow2Cache *dependency)
-{
- int ret;
-
- if (dependency->depends) {
- ret = qcow2_cache_flush_dependency(bs, dependency);
- if (ret < 0) {
- return ret;
- }
- }
-
- if (c->depends && (c->depends != dependency)) {
- ret = qcow2_cache_flush_dependency(bs, c);
- if (ret < 0) {
- return ret;
- }
- }
-
- c->depends = dependency;
- return 0;
-}
-
-void qcow2_cache_depends_on_flush(Qcow2Cache *c)
-{
- c->depends_on_flush = true;
-}
-
-static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
-{
- int i;
- int min_count = INT_MAX;
- int min_index = -1;
-
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].ref) {
- continue;
- }
-
- if (c->entries[i].cache_hits < min_count) {
- min_index = i;
- min_count = c->entries[i].cache_hits;
- }
-
- /* Give newer hits priority */
- /* TODO Check how to optimize the replacement strategy */
- c->entries[i].cache_hits /= 2;
- }
-
- if (min_index == -1) {
- /* This can't happen in current synchronous code, but leave the check
- * here as a reminder for whoever starts using AIO with the cache */
- abort();
- }
- return min_index;
-}
-
-static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
- uint64_t offset, void **table, bool read_from_disk)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
- int ret;
-
- trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
- offset, read_from_disk);
-
- /* Check if the table is already cached */
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].offset == offset) {
- goto found;
- }
- }
-
- /* If not, write a table back and replace it */
- i = qcow2_cache_find_entry_to_replace(c);
- trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
- if (i < 0) {
- return i;
- }
-
- ret = qcow2_cache_entry_flush(bs, c, i);
- if (ret < 0) {
- return ret;
- }
-
- trace_qcow2_cache_get_read(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
- c->entries[i].offset = 0;
- if (read_from_disk) {
- if (c == s->l2_table_cache) {
- BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
- }
-
- ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
- if (ret < 0) {
- return ret;
- }
- }
-
- /* Give the table some hits for the start so that it won't be replaced
- * immediately. The number 32 is completely arbitrary. */
- c->entries[i].cache_hits = 32;
- c->entries[i].offset = offset;
-
- /* And return the right table */
-found:
- c->entries[i].cache_hits++;
- c->entries[i].ref++;
- *table = c->entries[i].table;
-
- trace_qcow2_cache_get_done(qemu_coroutine_self(),
- c == s->l2_table_cache, i);
-
- return 0;
-}
-
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table)
-{
- return qcow2_cache_do_get(bs, c, offset, table, true);
-}
-
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table)
-{
- return qcow2_cache_do_get(bs, c, offset, table, false);
-}
-
-int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].table == *table) {
- goto found;
- }
- }
- return -ENOENT;
-
-found:
- c->entries[i].ref--;
- *table = NULL;
-
- assert(c->entries[i].ref >= 0);
- return 0;
-}
-
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
-{
- int i;
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].table == table) {
- goto found;
- }
- }
- abort();
-
-found:
- c->entries[i].dirty = true;
-}
diff --git a/contrib/qemu/block/qcow2-cluster.c b/contrib/qemu/block/qcow2-cluster.c
deleted file mode 100644
index cca76d4fcdd..00000000000
--- a/contrib/qemu/block/qcow2-cluster.c
+++ /dev/null
@@ -1,1478 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include <zlib.h>
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-#include "trace.h"
-
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
- bool exact_size)
-{
- BDRVQcowState *s = bs->opaque;
- int new_l1_size2, ret, i;
- uint64_t *new_l1_table;
- int64_t new_l1_table_offset, new_l1_size;
- uint8_t data[12];
-
- if (min_size <= s->l1_size)
- return 0;
-
- if (exact_size) {
- new_l1_size = min_size;
- } else {
- /* Bump size up to reduce the number of times we have to grow */
- new_l1_size = s->l1_size;
- if (new_l1_size == 0) {
- new_l1_size = 1;
- }
- while (min_size > new_l1_size) {
- new_l1_size = (new_l1_size * 3 + 1) / 2;
- }
- }
-
- if (new_l1_size > INT_MAX) {
- return -EFBIG;
- }
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
- s->l1_size, new_l1_size);
-#endif
-
- new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
- memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
-
- /* write new table (align to cluster) */
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
- new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
- if (new_l1_table_offset < 0) {
- g_free(new_l1_table);
- return new_l1_table_offset;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
- for(i = 0; i < s->l1_size; i++)
- new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
- ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
- if (ret < 0)
- goto fail;
- for(i = 0; i < s->l1_size; i++)
- new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
-
- /* set new table */
- BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
- cpu_to_be32w((uint32_t*)data, new_l1_size);
- cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
- if (ret < 0) {
- goto fail;
- }
- g_free(s->l1_table);
- qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- s->l1_table_offset = new_l1_table_offset;
- s->l1_table = new_l1_table;
- s->l1_size = new_l1_size;
- return 0;
- fail:
- g_free(new_l1_table);
- qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
- QCOW2_DISCARD_OTHER);
- return ret;
-}
-
-/*
- * l2_load
- *
- * Loads a L2 table into memory. If the table is in the cache, the cache
- * is used; otherwise the L2 table is loaded from the image file.
- *
- * Returns a pointer to the L2 table on success, or NULL if the read from
- * the image file failed.
- */
-
-static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
- uint64_t **l2_table)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
-
- return ret;
-}
-
-/*
- * Writes one sector of the L1 table to the disk (can't update single entries
- * and we really don't want bdrv_pread to perform a read-modify-write)
- */
-#define L1_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l1_entry(BlockDriverState *bs, int l1_index)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t buf[L1_ENTRIES_PER_SECTOR];
- int l1_start_index;
- int i, ret;
-
- l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
- for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
- buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
- buf, sizeof(buf));
- if (ret < 0) {
- return ret;
- }
-
- return 0;
-}
-
-/*
- * l2_allocate
- *
- * Allocate a new l2 entry in the file. If l1_index points to an already
- * used entry in the L2 table (i.e. we are doing a copy on write for the L2
- * table) copy the contents of the old L2 table into the newly allocated one.
- * Otherwise the new table is initialized with zeros.
- *
- */
-
-static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t old_l2_offset;
- uint64_t *l2_table;
- int64_t l2_offset;
- int ret;
-
- old_l2_offset = s->l1_table[l1_index];
-
- trace_qcow2_l2_allocate(bs, l1_index);
-
- /* allocate a new l2 entry */
-
- l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
- if (l2_offset < 0) {
- return l2_offset;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail;
- }
-
- /* allocate a new entry in the l2 cache */
-
- trace_qcow2_l2_allocate_get_empty(bs, l1_index);
- ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
- if (ret < 0) {
- return ret;
- }
-
- l2_table = *table;
-
- if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
- /* if there was no old l2 table, clear the new table */
- memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- } else {
- uint64_t* old_table;
-
- /* if there was an old l2 table, read it from the disk */
- BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
- ret = qcow2_cache_get(bs, s->l2_table_cache,
- old_l2_offset & L1E_OFFSET_MASK,
- (void**) &old_table);
- if (ret < 0) {
- goto fail;
- }
-
- memcpy(l2_table, old_table, s->cluster_size);
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* write the l2 table to the file */
- BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-
- trace_qcow2_l2_allocate_write_l2(bs, l1_index);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- goto fail;
- }
-
- /* update the L1 entry */
- trace_qcow2_l2_allocate_write_l1(bs, l1_index);
- s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
- ret = write_l1_entry(bs, l1_index);
- if (ret < 0) {
- goto fail;
- }
-
- *table = l2_table;
- trace_qcow2_l2_allocate_done(bs, l1_index, 0);
- return 0;
-
-fail:
- trace_qcow2_l2_allocate_done(bs, l1_index, ret);
- qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
- s->l1_table[l1_index] = old_l2_offset;
- return ret;
-}
-
-/*
- * Checks how many clusters in a given L2 table are contiguous in the image
- * file. As soon as one of the flags in the bitmask stop_flags changes compared
- * to the first cluster, the search is stopped and the cluster is not counted
- * as contiguous. (This allows it, for example, to stop at the first compressed
- * cluster which may require a different handling)
- */
-static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
- uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
-{
- int i;
- uint64_t mask = stop_flags | L2E_OFFSET_MASK;
- uint64_t offset = be64_to_cpu(l2_table[0]) & mask;
-
- if (!offset)
- return 0;
-
- for (i = start; i < start + nb_clusters; i++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
- if (offset + (uint64_t) i * cluster_size != l2_entry) {
- break;
- }
- }
-
- return (i - start);
-}
-
-static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
-{
- int i;
-
- for (i = 0; i < nb_clusters; i++) {
- int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
-
- if (type != QCOW2_CLUSTER_UNALLOCATED) {
- break;
- }
- }
-
- return i;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
- algorithm for < 4 GB images. NOTE: out_buf == in_buf is
- supported */
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key)
-{
- union {
- uint64_t ll[2];
- uint8_t b[16];
- } ivec;
- int i;
-
- for(i = 0; i < nb_sectors; i++) {
- ivec.ll[0] = cpu_to_le64(sector_num);
- ivec.ll[1] = 0;
- AES_cbc_encrypt(in_buf, out_buf, 512, key,
- ivec.b, enc);
- sector_num++;
- in_buf += 512;
- out_buf += 512;
- }
-}
-
-static int coroutine_fn copy_sectors(BlockDriverState *bs,
- uint64_t start_sect,
- uint64_t cluster_offset,
- int n_start, int n_end)
-{
- BDRVQcowState *s = bs->opaque;
- QEMUIOVector qiov;
- struct iovec iov;
- int n, ret;
-
- /*
- * If this is the last cluster and it is only partially used, we must only
- * copy until the end of the image, or bdrv_check_request will fail for the
- * bdrv_read/write calls below.
- */
- if (start_sect + n_end > bs->total_sectors) {
- n_end = bs->total_sectors - start_sect;
- }
-
- n = n_end - n_start;
- if (n <= 0) {
- return 0;
- }
-
- iov.iov_len = n * BDRV_SECTOR_SIZE;
- iov.iov_base = qemu_blockalign(bs, iov.iov_len);
-
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
-
- /* Call .bdrv_co_readv() directly instead of using the public block-layer
- * interface. This avoids double I/O throttling and request tracking,
- * which can lead to deadlock when block layer copy-on-read is enabled.
- */
- ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
- if (ret < 0) {
- goto out;
- }
-
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, start_sect + n_start,
- iov.iov_base, iov.iov_base, n, 1,
- &s->aes_encrypt_key);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
- if (ret < 0) {
- goto out;
- }
-
- ret = 0;
-out:
- qemu_vfree(iov.iov_base);
- return ret;
-}
-
-
-/*
- * get_cluster_offset
- *
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
- *
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
- *
- * on exit, *num is the number of contiguous sectors we can read.
- *
- * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
- * cases.
- */
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int l2_index;
- uint64_t l1_index, l2_offset, *l2_table;
- int l1_bits, c;
- unsigned int index_in_cluster, nb_clusters;
- uint64_t nb_available, nb_needed;
- int ret;
-
- index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
- nb_needed = *num + index_in_cluster;
-
- l1_bits = s->l2_bits + s->cluster_bits;
-
- /* compute how many bytes there are between the offset and
- * the end of the l1 entry
- */
-
- nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
-
- /* compute the number of available sectors */
-
- nb_available = (nb_available >> 9) + index_in_cluster;
-
- if (nb_needed > nb_available) {
- nb_needed = nb_available;
- }
-
- *cluster_offset = 0;
-
- /* seek the the l2 offset in the l1 table */
-
- l1_index = offset >> l1_bits;
- if (l1_index >= s->l1_size) {
- ret = QCOW2_CLUSTER_UNALLOCATED;
- goto out;
- }
-
- l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
- if (!l2_offset) {
- ret = QCOW2_CLUSTER_UNALLOCATED;
- goto out;
- }
-
- /* load the l2 table in memory */
-
- ret = l2_load(bs, l2_offset, &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* find the cluster offset for the given disk offset */
-
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
- *cluster_offset = be64_to_cpu(l2_table[l2_index]);
- nb_clusters = size_to_clusters(s, nb_needed << 9);
-
- ret = qcow2_get_cluster_type(*cluster_offset);
- switch (ret) {
- case QCOW2_CLUSTER_COMPRESSED:
- /* Compressed clusters can only be processed one by one */
- c = 1;
- *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
- break;
- case QCOW2_CLUSTER_ZERO:
- if (s->qcow_version < 3) {
- return -EIO;
- }
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
- *cluster_offset = 0;
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- /* how many empty clusters ? */
- c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
- *cluster_offset = 0;
- break;
- case QCOW2_CLUSTER_NORMAL:
- /* how many allocated clusters ? */
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
- *cluster_offset &= L2E_OFFSET_MASK;
- break;
- default:
- abort();
- }
-
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-
- nb_available = (c * s->cluster_sectors);
-
-out:
- if (nb_available > nb_needed)
- nb_available = nb_needed;
-
- *num = nb_available - index_in_cluster;
-
- return ret;
-}
-
-/*
- * get_cluster_table
- *
- * for a given disk offset, load (and allocate if needed)
- * the l2 table.
- *
- * the l2 table offset in the qcow2 file and the cluster index
- * in the l2 table are given to the caller.
- *
- * Returns 0 on success, -errno in failure case
- */
-static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
- uint64_t **new_l2_table,
- int *new_l2_index)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int l2_index;
- uint64_t l1_index, l2_offset;
- uint64_t *l2_table = NULL;
- int ret;
-
- /* seek the the l2 offset in the l1 table */
-
- l1_index = offset >> (s->l2_bits + s->cluster_bits);
- if (l1_index >= s->l1_size) {
- ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
- if (ret < 0) {
- return ret;
- }
- }
-
- assert(l1_index < s->l1_size);
- l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
-
- /* seek the l2 table of the given l2 offset */
-
- if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
- /* load the l2 table in memory */
- ret = l2_load(bs, l2_offset, &l2_table);
- if (ret < 0) {
- return ret;
- }
- } else {
- /* First allocate a new L2 table (and do COW if needed) */
- ret = l2_allocate(bs, l1_index, &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* Then decrease the refcount of the old table */
- if (l2_offset) {
- qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- }
- }
-
- /* find the cluster offset for the given disk offset */
-
- l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-
- *new_l2_table = l2_table;
- *new_l2_index = l2_index;
-
- return 0;
-}
-
-/*
- * alloc_compressed_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
- * If the offset is not found, allocate a new compressed cluster.
- *
- * Return the cluster offset if successful,
- * Return 0, otherwise.
- *
- */
-
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
- uint64_t offset,
- int compressed_size)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index, ret;
- uint64_t *l2_table;
- int64_t cluster_offset;
- int nb_csectors;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return 0;
- }
-
- /* Compression can't overwrite anything. Fail if the cluster was already
- * allocated. */
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
- if (cluster_offset & L2E_OFFSET_MASK) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- return 0;
- }
-
- cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
- if (cluster_offset < 0) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- return 0;
- }
-
- nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
- (cluster_offset >> 9);
-
- cluster_offset |= QCOW_OFLAG_COMPRESSED |
- ((uint64_t)nb_csectors << s->csize_shift);
-
- /* update L2 table */
-
- /* compressed clusters never have the copied flag */
-
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- l2_table[l2_index] = cpu_to_be64(cluster_offset);
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return 0;
- }
-
- return cluster_offset;
-}
-
-static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- if (r->nb_sectors == 0) {
- return 0;
- }
-
- qemu_co_mutex_unlock(&s->lock);
- ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
- r->offset / BDRV_SECTOR_SIZE,
- r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
- qemu_co_mutex_lock(&s->lock);
-
- if (ret < 0) {
- return ret;
- }
-
- /*
- * Before we update the L2 table to actually point to the new cluster, we
- * need to be sure that the refcounts have been increased and COW was
- * handled.
- */
- qcow2_cache_depends_on_flush(s->l2_table_cache);
-
- return 0;
-}
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
-{
- BDRVQcowState *s = bs->opaque;
- int i, j = 0, l2_index, ret;
- uint64_t *old_cluster, *l2_table;
- uint64_t cluster_offset = m->alloc_offset;
-
- trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
- assert(m->nb_clusters > 0);
-
- old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
-
- /* copy content of unmodified sectors */
- ret = perform_cow(bs, m, &m->cow_start);
- if (ret < 0) {
- goto err;
- }
-
- ret = perform_cow(bs, m, &m->cow_end);
- if (ret < 0) {
- goto err;
- }
-
- /* Update L2 table. */
- if (s->use_lazy_refcounts) {
- qcow2_mark_dirty(bs);
- }
- if (qcow2_need_accurate_refcounts(s)) {
- qcow2_cache_set_dependency(bs, s->l2_table_cache,
- s->refcount_block_cache);
- }
-
- ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
- if (ret < 0) {
- goto err;
- }
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-
- for (i = 0; i < m->nb_clusters; i++) {
- /* if two concurrent writes happen to the same unallocated cluster
- * each write allocates separate cluster and writes data concurrently.
- * The first one to complete updates l2 table with pointer to its
- * cluster the second one has to do RMW (which is done above by
- * copy_sectors()), update l2 table with its cluster pointer and free
- * old cluster. This is what this loop does */
- if(l2_table[l2_index + i] != 0)
- old_cluster[j++] = l2_table[l2_index + i];
-
- l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
- (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
- }
-
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- goto err;
- }
-
- /*
- * If this was a COW, we need to decrease the refcount of the old cluster.
- * Also flush bs->file to get the right order for L2 and refcount update.
- *
- * Don't discard clusters that reach a refcount of 0 (e.g. compressed
- * clusters), the next write will reuse them anyway.
- */
- if (j != 0) {
- for (i = 0; i < j; i++) {
- qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
- QCOW2_DISCARD_NEVER);
- }
- }
-
- ret = 0;
-err:
- g_free(old_cluster);
- return ret;
- }
-
-/*
- * Returns the number of contiguous clusters that can be used for an allocating
- * write, but require COW to be performed (this includes yet unallocated space,
- * which must copy from the backing file)
- */
-static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
- uint64_t *l2_table, int l2_index)
-{
- int i;
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
- int cluster_type = qcow2_get_cluster_type(l2_entry);
-
- switch(cluster_type) {
- case QCOW2_CLUSTER_NORMAL:
- if (l2_entry & QCOW_OFLAG_COPIED) {
- goto out;
- }
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- case QCOW2_CLUSTER_COMPRESSED:
- case QCOW2_CLUSTER_ZERO:
- break;
- default:
- abort();
- }
- }
-
-out:
- assert(i <= nb_clusters);
- return i;
-}
-
-/*
- * Check if there already is an AIO write request in flight which allocates
- * the same cluster. In this case we need to wait until the previous
- * request has completed and updated the L2 table accordingly.
- *
- * Returns:
- * 0 if there was no dependency. *cur_bytes indicates the number of
- * bytes from guest_offset that can be read before the next
- * dependency must be processed (or the request is complete)
- *
- * -EAGAIN if we had to wait for another request, previously gathered
- * information on cluster allocation may be invalid now. The caller
- * must start over anyway, so consider *cur_bytes undefined.
- */
-static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *cur_bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- QCowL2Meta *old_alloc;
- uint64_t bytes = *cur_bytes;
-
- QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
-
- uint64_t start = guest_offset;
- uint64_t end = start + bytes;
- uint64_t old_start = l2meta_cow_start(old_alloc);
- uint64_t old_end = l2meta_cow_end(old_alloc);
-
- if (end <= old_start || start >= old_end) {
- /* No intersection */
- } else {
- if (start < old_start) {
- /* Stop at the start of a running allocation */
- bytes = old_start - start;
- } else {
- bytes = 0;
- }
-
- /* Stop if already an l2meta exists. After yielding, it wouldn't
- * be valid any more, so we'd have to clean up the old L2Metas
- * and deal with requests depending on them before starting to
- * gather new ones. Not worth the trouble. */
- if (bytes == 0 && *m) {
- *cur_bytes = 0;
- return 0;
- }
-
- if (bytes == 0) {
- /* Wait for the dependency to complete. We need to recheck
- * the free/allocated clusters when we continue. */
- qemu_co_mutex_unlock(&s->lock);
- qemu_co_queue_wait(&old_alloc->dependent_requests);
- qemu_co_mutex_lock(&s->lock);
- return -EAGAIN;
- }
- }
- }
-
- /* Make sure that existing clusters and new allocations are only used up to
- * the next dependency if we shortened the request above */
- *cur_bytes = bytes;
-
- return 0;
-}
-
-/*
- * Checks how many already allocated clusters that don't require a copy on
- * write there are at the given guest_offset (up to *bytes). If
- * *host_offset is not zero, only physically contiguous clusters beginning at
- * this host offset are counted.
- *
- * Note that guest_offset may not be cluster aligned. In this case, the
- * returned *host_offset points to exact byte referenced by guest_offset and
- * therefore isn't cluster aligned as well.
- *
- * Returns:
- * 0: if no allocated clusters are available at the given offset.
- * *bytes is normally unchanged. It is set to 0 if the cluster
- * is allocated and doesn't need COW, but doesn't have the right
- * physical offset.
- *
- * 1: if allocated clusters that don't require a COW are available at
- * the requested offset. *bytes may have decreased and describes
- * the length of the area that can be written to.
- *
- * -errno: in error cases
- */
-static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index;
- uint64_t cluster_offset;
- uint64_t *l2_table;
- unsigned int nb_clusters;
- unsigned int keep_clusters;
- int ret, pret;
-
- trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
- *bytes);
-
- assert(*host_offset == 0 || offset_into_cluster(s, guest_offset)
- == offset_into_cluster(s, *host_offset));
-
- /*
- * Calculate the number of clusters to look for. We stop at L2 table
- * boundaries to keep things simple.
- */
- nb_clusters =
- size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
-
- l2_index = offset_to_l2_index(s, guest_offset);
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- /* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
-
- /* Check how many clusters are already allocated and don't need COW */
- if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
- && (cluster_offset & QCOW_OFLAG_COPIED))
- {
- /* If a specific host_offset is required, check it */
- bool offset_matches =
- (cluster_offset & L2E_OFFSET_MASK) == *host_offset;
-
- if (*host_offset != 0 && !offset_matches) {
- *bytes = 0;
- ret = 0;
- goto out;
- }
-
- /* We keep all QCOW_OFLAG_COPIED clusters */
- keep_clusters =
- count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], 0,
- QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
- assert(keep_clusters <= nb_clusters);
-
- *bytes = MIN(*bytes,
- keep_clusters * s->cluster_size
- - offset_into_cluster(s, guest_offset));
-
- ret = 1;
- } else {
- ret = 0;
- }
-
- /* Cleanup */
-out:
- pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (pret < 0) {
- return pret;
- }
-
- /* Only return a host offset if we actually made progress. Otherwise we
- * would make requirements for handle_alloc() that it can't fulfill */
- if (ret) {
- *host_offset = (cluster_offset & L2E_OFFSET_MASK)
- + offset_into_cluster(s, guest_offset);
- }
-
- return ret;
-}
-
-/*
- * Allocates new clusters for the given guest_offset.
- *
- * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
- * contain the number of clusters that have been allocated and are contiguous
- * in the image file.
- *
- * If *host_offset is non-zero, it specifies the offset in the image file at
- * which the new clusters must start. *nb_clusters can be 0 on return in this
- * case if the cluster at host_offset is already in use. If *host_offset is
- * zero, the clusters can be allocated anywhere in the image file.
- *
- * *host_offset is updated to contain the offset into the image file at which
- * the first allocated cluster starts.
- *
- * Return 0 on success and -errno in error cases. -EAGAIN means that the
- * function has been waiting for another request and the allocation must be
- * restarted, but the whole request should not be failed.
- */
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, unsigned int *nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
-
- trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
- *host_offset, *nb_clusters);
-
- /* Allocate new clusters */
- trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
- if (*host_offset == 0) {
- int64_t cluster_offset =
- qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
- if (cluster_offset < 0) {
- return cluster_offset;
- }
- *host_offset = cluster_offset;
- return 0;
- } else {
- int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
- if (ret < 0) {
- return ret;
- }
- *nb_clusters = ret;
- return 0;
- }
-}
-
-/*
- * Allocates new clusters for an area that either is yet unallocated or needs a
- * copy on write. If *host_offset is non-zero, clusters are only allocated if
- * the new allocation can match the specified host offset.
- *
- * Note that guest_offset may not be cluster aligned. In this case, the
- * returned *host_offset points to exact byte referenced by guest_offset and
- * therefore isn't cluster aligned as well.
- *
- * Returns:
- * 0: if no clusters could be allocated. *bytes is set to 0,
- * *host_offset is left unchanged.
- *
- * 1: if new clusters were allocated. *bytes may be decreased if the
- * new allocation doesn't cover all of the requested area.
- * *host_offset is updated to contain the host offset of the first
- * newly allocated cluster.
- *
- * -errno: in error cases
- */
-static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- int l2_index;
- uint64_t *l2_table;
- uint64_t entry;
- unsigned int nb_clusters;
- int ret;
-
- uint64_t alloc_cluster_offset;
-
- trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
- *bytes);
- assert(*bytes > 0);
-
- /*
- * Calculate the number of clusters to look for. We stop at L2 table
- * boundaries to keep things simple.
- */
- nb_clusters =
- size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
-
- l2_index = offset_to_l2_index(s, guest_offset);
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- /* Find L2 entry for the first involved cluster */
- ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- entry = be64_to_cpu(l2_table[l2_index]);
-
- /* For the moment, overwrite compressed clusters one by one */
- if (entry & QCOW_OFLAG_COMPRESSED) {
- nb_clusters = 1;
- } else {
- nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
- }
-
- /* This function is only called when there were no non-COW clusters, so if
- * we can't find any unallocated or COW clusters either, something is
- * wrong with our code. */
- assert(nb_clusters > 0);
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- /* Allocate, if necessary at a given offset in the image file */
- alloc_cluster_offset = start_of_cluster(s, *host_offset);
- ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
- &nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- /* Can't extend contiguous allocation */
- if (nb_clusters == 0) {
- *bytes = 0;
- return 0;
- }
-
- /*
- * Save info needed for meta data update.
- *
- * requested_sectors: Number of sectors from the start of the first
- * newly allocated cluster to the end of the (possibly shortened
- * before) write request.
- *
- * avail_sectors: Number of sectors from the start of the first
- * newly allocated to the end of the last newly allocated cluster.
- *
- * nb_sectors: The number of sectors from the start of the first
- * newly allocated cluster to the end of the area that the write
- * request actually writes to (excluding COW at the end)
- */
- int requested_sectors =
- (*bytes + offset_into_cluster(s, guest_offset))
- >> BDRV_SECTOR_BITS;
- int avail_sectors = nb_clusters
- << (s->cluster_bits - BDRV_SECTOR_BITS);
- int alloc_n_start = offset_into_cluster(s, guest_offset)
- >> BDRV_SECTOR_BITS;
- int nb_sectors = MIN(requested_sectors, avail_sectors);
- QCowL2Meta *old_m = *m;
-
- *m = g_malloc0(sizeof(**m));
-
- **m = (QCowL2Meta) {
- .next = old_m,
-
- .alloc_offset = alloc_cluster_offset,
- .offset = start_of_cluster(s, guest_offset),
- .nb_clusters = nb_clusters,
- .nb_available = nb_sectors,
-
- .cow_start = {
- .offset = 0,
- .nb_sectors = alloc_n_start,
- },
- .cow_end = {
- .offset = nb_sectors * BDRV_SECTOR_SIZE,
- .nb_sectors = avail_sectors - nb_sectors,
- },
- };
- qemu_co_queue_init(&(*m)->dependent_requests);
- QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
-
- *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
- *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
- - offset_into_cluster(s, guest_offset));
- assert(*bytes != 0);
-
- return 1;
-
-fail:
- if (*m && (*m)->nb_clusters > 0) {
- QLIST_REMOVE(*m, next_in_flight);
- }
- return ret;
-}
-
-/*
- * alloc_cluster_offset
- *
- * For a given offset on the virtual disk, find the cluster offset in qcow2
- * file. If the offset is not found, allocate a new cluster.
- *
- * If the cluster was already allocated, m->nb_clusters is set to 0 and
- * other fields in m are meaningless.
- *
- * If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. In this case, the other
- * fields of m are valid and contain information about the first allocated
- * cluster.
- *
- * If the request conflicts with another write request in flight, the coroutine
- * is queued and will be reentered when the dependency has completed.
- *
- * Return 0 on success and -errno in error cases
- */
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t start, remaining;
- uint64_t cluster_offset;
- uint64_t cur_bytes;
- int ret;
-
- trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
- n_start, n_end);
-
- assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
- offset = start_of_cluster(s, offset);
-
-again:
- start = offset + (n_start << BDRV_SECTOR_BITS);
- remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
- cluster_offset = 0;
- *host_offset = 0;
- cur_bytes = 0;
- *m = NULL;
-
- while (true) {
-
- if (!*host_offset) {
- *host_offset = start_of_cluster(s, cluster_offset);
- }
-
- assert(remaining >= cur_bytes);
-
- start += cur_bytes;
- remaining -= cur_bytes;
- cluster_offset += cur_bytes;
-
- if (remaining == 0) {
- break;
- }
-
- cur_bytes = remaining;
-
- /*
- * Now start gathering as many contiguous clusters as possible:
- *
- * 1. Check for overlaps with in-flight allocations
- *
- * a) Overlap not in the first cluster -> shorten this request and
- * let the caller handle the rest in its next loop iteration.
- *
- * b) Real overlaps of two requests. Yield and restart the search
- * for contiguous clusters (the situation could have changed
- * while we were sleeping)
- *
- * c) TODO: Request starts in the same cluster as the in-flight
- * allocation ends. Shorten the COW of the in-fight allocation,
- * set cluster_offset to write to the same cluster and set up
- * the right synchronisation between the in-flight request and
- * the new one.
- */
- ret = handle_dependencies(bs, start, &cur_bytes, m);
- if (ret == -EAGAIN) {
- /* Currently handle_dependencies() doesn't yield if we already had
- * an allocation. If it did, we would have to clean up the L2Meta
- * structs before starting over. */
- assert(*m == NULL);
- goto again;
- } else if (ret < 0) {
- return ret;
- } else if (cur_bytes == 0) {
- break;
- } else {
- /* handle_dependencies() may have decreased cur_bytes (shortened
- * the allocations below) so that the next dependency is processed
- * correctly during the next loop iteration. */
- }
-
- /*
- * 2. Count contiguous COPIED clusters.
- */
- ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- continue;
- } else if (cur_bytes == 0) {
- break;
- }
-
- /*
- * 3. If the request still hasn't completed, allocate new clusters,
- * considering any cluster_offset of steps 1c or 2.
- */
- ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- continue;
- } else {
- assert(cur_bytes == 0);
- break;
- }
- }
-
- *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
- assert(*num > 0);
- assert(*host_offset != 0);
-
- return 0;
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
- const uint8_t *buf, int buf_size)
-{
- z_stream strm1, *strm = &strm1;
- int ret, out_len;
-
- memset(strm, 0, sizeof(*strm));
-
- strm->next_in = (uint8_t *)buf;
- strm->avail_in = buf_size;
- strm->next_out = out_buf;
- strm->avail_out = out_buf_size;
-
- ret = inflateInit2(strm, -12);
- if (ret != Z_OK)
- return -1;
- ret = inflate(strm, Z_FINISH);
- out_len = strm->next_out - out_buf;
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
- out_len != out_buf_size) {
- inflateEnd(strm);
- return -1;
- }
- inflateEnd(strm);
- return 0;
-}
-
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, csize, nb_csectors, sector_offset;
- uint64_t coffset;
-
- coffset = cluster_offset & s->cluster_offset_mask;
- if (s->cluster_cache_offset != coffset) {
- nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
- sector_offset = coffset & 511;
- csize = nb_csectors * 512 - sector_offset;
- BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
- if (ret < 0) {
- return ret;
- }
- if (decompress_buffer(s->cluster_cache, s->cluster_size,
- s->cluster_data + sector_offset, csize) < 0) {
- return -EIO;
- }
- s->cluster_cache_offset = coffset;
- }
- return 0;
-}
-
-/*
- * This discards as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of discarded
- * clusters.
- */
-static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table;
- int l2_index;
- int ret;
- int i;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- /* Limit nb_clusters to one L2 table */
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t old_offset;
-
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
- if ((old_offset & L2E_OFFSET_MASK) == 0) {
- continue;
- }
-
- /* First remove L2 entries */
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- l2_table[l2_index + i] = cpu_to_be64(0);
-
- /* Then decrease the refcount */
- qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- return nb_clusters;
-}
-
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t end_offset;
- unsigned int nb_clusters;
- int ret;
-
- end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
-
- /* Round start up and end down */
- offset = align_offset(offset, s->cluster_size);
- end_offset &= ~(s->cluster_size - 1);
-
- if (offset > end_offset) {
- return 0;
- }
-
- nb_clusters = size_to_clusters(s, end_offset - offset);
-
- s->cache_discards = true;
-
- /* Each L2 table is handled by its own loop iteration */
- while (nb_clusters > 0) {
- ret = discard_single_l2(bs, offset, nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- nb_clusters -= ret;
- offset += (ret * s->cluster_size);
- }
-
- ret = 0;
-fail:
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- return ret;
-}
-
-/*
- * This zeroes as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of zeroed
- * clusters.
- */
-static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table;
- int l2_index;
- int ret;
- int i;
-
- ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
- if (ret < 0) {
- return ret;
- }
-
- /* Limit nb_clusters to one L2 table */
- nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
- for (i = 0; i < nb_clusters; i++) {
- uint64_t old_offset;
-
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
-
- /* Update L2 entries */
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- if (old_offset & QCOW_OFLAG_COMPRESSED) {
- l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
- qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
- } else {
- l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
- }
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
-
- return nb_clusters;
-}
-
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int nb_clusters;
- int ret;
-
- /* The zero flag is only supported by version 3 and newer */
- if (s->qcow_version < 3) {
- return -ENOTSUP;
- }
-
- /* Each L2 table is handled by its own loop iteration */
- nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
-
- s->cache_discards = true;
-
- while (nb_clusters > 0) {
- ret = zero_single_l2(bs, offset, nb_clusters);
- if (ret < 0) {
- goto fail;
- }
-
- nb_clusters -= ret;
- offset += (ret * s->cluster_size);
- }
-
- ret = 0;
-fail:
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- return ret;
-}
diff --git a/contrib/qemu/block/qcow2-refcount.c b/contrib/qemu/block/qcow2-refcount.c
deleted file mode 100644
index 1244693f39e..00000000000
--- a/contrib/qemu/block/qcow2-refcount.c
+++ /dev/null
@@ -1,1374 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length,
- int addend, enum qcow2_discard_type type);
-
-
-/*********************************************************/
-/* refcount handling */
-
-int qcow2_refcount_init(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, refcount_table_size2, i;
-
- refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
- s->refcount_table = g_malloc(refcount_table_size2);
- if (s->refcount_table_size > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
- ret = bdrv_pread(bs->file, s->refcount_table_offset,
- s->refcount_table, refcount_table_size2);
- if (ret != refcount_table_size2)
- goto fail;
- for(i = 0; i < s->refcount_table_size; i++)
- be64_to_cpus(&s->refcount_table[i]);
- }
- return 0;
- fail:
- return -ENOMEM;
-}
-
-void qcow2_refcount_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- g_free(s->refcount_table);
-}
-
-
-static int load_refcount_block(BlockDriverState *bs,
- int64_t refcount_block_offset,
- void **refcount_block)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
- ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- refcount_block);
-
- return ret;
-}
-
-/*
- * Returns the refcount of the cluster given by its index. Any non-negative
- * return value is the refcount of the cluster, negative values are -errno
- * and indicate an error.
- */
-static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
-{
- BDRVQcowState *s = bs->opaque;
- int refcount_table_index, block_index;
- int64_t refcount_block_offset;
- int ret;
- uint16_t *refcount_block;
- uint16_t refcount;
-
- refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
- if (refcount_table_index >= s->refcount_table_size)
- return 0;
- refcount_block_offset = s->refcount_table[refcount_table_index];
- if (!refcount_block_offset)
- return 0;
-
- ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- (void**) &refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- block_index = cluster_index &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
- refcount = be16_to_cpu(refcount_block[block_index]);
-
- ret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- return refcount;
-}
-
-/*
- * Rounds the refcount table size up to avoid growing the table for each single
- * refcount block that is allocated.
- */
-static unsigned int next_refcount_table_size(BDRVQcowState *s,
- unsigned int min_size)
-{
- unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
- unsigned int refcount_table_clusters =
- MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
-
- while (min_clusters > refcount_table_clusters) {
- refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
- }
-
- return refcount_table_clusters << (s->cluster_bits - 3);
-}
-
-
-/* Checks if two offsets are described by the same refcount block */
-static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
- uint64_t offset_b)
-{
- uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
- uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
-
- return (block_a == block_b);
-}
-
-/*
- * Loads a refcount block. If it doesn't exist yet, it is allocated first
- * (including growing the refcount table if needed).
- *
- * Returns 0 on success or -errno in error case
- */
-static int alloc_refcount_block(BlockDriverState *bs,
- int64_t cluster_index, uint16_t **refcount_block)
-{
- BDRVQcowState *s = bs->opaque;
- unsigned int refcount_table_index;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
-
- /* Find the refcount block for the given cluster */
- refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-
- if (refcount_table_index < s->refcount_table_size) {
-
- uint64_t refcount_block_offset =
- s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
-
- /* If it's already there, we're done */
- if (refcount_block_offset) {
- return load_refcount_block(bs, refcount_block_offset,
- (void**) refcount_block);
- }
- }
-
- /*
- * If we came here, we need to allocate something. Something is at least
- * a cluster for the new refcount block. It may also include a new refcount
- * table if the old refcount table is too small.
- *
- * Note that allocating clusters here needs some special care:
- *
- * - We can't use the normal qcow2_alloc_clusters(), it would try to
- * increase the refcount and very likely we would end up with an endless
- * recursion. Instead we must place the refcount blocks in a way that
- * they can describe them themselves.
- *
- * - We need to consider that at this point we are inside update_refcounts
- * and doing the initial refcount increase. This means that some clusters
- * have already been allocated by the caller, but their refcount isn't
- * accurate yet. free_cluster_index tells us where this allocation ends
- * as long as we don't overwrite it by freeing clusters.
- *
- * - alloc_clusters_noref and qcow2_free_clusters may load a different
- * refcount block into the cache
- */
-
- *refcount_block = NULL;
-
- /* We write to the refcount table, so we might depend on L2 tables */
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- return ret;
- }
-
- /* Allocate the refcount block itself and mark it as used */
- int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
- if (new_block < 0) {
- return new_block;
- }
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
- " at %" PRIx64 "\n",
- refcount_table_index, cluster_index << s->cluster_bits, new_block);
-#endif
-
- if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
- /* Zero the new refcount block before updating it */
- ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
- (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- memset(*refcount_block, 0, s->cluster_size);
-
- /* The block describes itself, need to update the cache */
- int block_index = (new_block >> s->cluster_bits) &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
- (*refcount_block)[block_index] = cpu_to_be16(1);
- } else {
- /* Described somewhere else. This can recurse at most twice before we
- * arrive at a block that describes itself. */
- ret = update_refcount(bs, new_block, s->cluster_size, 1,
- QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- goto fail_block;
- }
-
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail_block;
- }
-
- /* Initialize the new refcount block only after updating its refcount,
- * update_refcount uses the refcount cache itself */
- ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
- (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- memset(*refcount_block, 0, s->cluster_size);
- }
-
- /* Now the new refcount block needs to be written to disk */
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
- qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- goto fail_block;
- }
-
- /* If the refcount table is big enough, just hook the block up there */
- if (refcount_table_index < s->refcount_table_size) {
- uint64_t data64 = cpu_to_be64(new_block);
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
- ret = bdrv_pwrite_sync(bs->file,
- s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
- &data64, sizeof(data64));
- if (ret < 0) {
- goto fail_block;
- }
-
- s->refcount_table[refcount_table_index] = new_block;
- return 0;
- }
-
- ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
-
- /*
- * If we come here, we need to grow the refcount table. Again, a new
- * refcount table needs some space and we can't simply allocate to avoid
- * endless recursion.
- *
- * Therefore let's grab new refcount blocks at the end of the image, which
- * will describe themselves and the new refcount table. This way we can
- * reference them only in the new table and do the switch to the new
- * refcount table at once without producing an inconsistent state in
- * between.
- */
- BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
-
- /* Calculate the number of refcount blocks needed so far */
- uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
- uint64_t blocks_used = (s->free_cluster_index +
- refcount_block_clusters - 1) / refcount_block_clusters;
-
- /* And now we need at least one block more for the new metadata */
- uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
- uint64_t last_table_size;
- uint64_t blocks_clusters;
- do {
- uint64_t table_clusters =
- size_to_clusters(s, table_size * sizeof(uint64_t));
- blocks_clusters = 1 +
- ((table_clusters + refcount_block_clusters - 1)
- / refcount_block_clusters);
- uint64_t meta_clusters = table_clusters + blocks_clusters;
-
- last_table_size = table_size;
- table_size = next_refcount_table_size(s, blocks_used +
- ((meta_clusters + refcount_block_clusters - 1)
- / refcount_block_clusters));
-
- } while (last_table_size != table_size);
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
- s->refcount_table_size, table_size);
-#endif
-
- /* Create the new refcount table and blocks */
- uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
- s->cluster_size;
- uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
- uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
- uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
-
- assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
-
- /* Fill the new refcount table */
- memcpy(new_table, s->refcount_table,
- s->refcount_table_size * sizeof(uint64_t));
- new_table[refcount_table_index] = new_block;
-
- int i;
- for (i = 0; i < blocks_clusters; i++) {
- new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
- }
-
- /* Fill the refcount blocks */
- uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
- int block = 0;
- for (i = 0; i < table_clusters + blocks_clusters; i++) {
- new_blocks[block++] = cpu_to_be16(1);
- }
-
- /* Write refcount blocks to disk */
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
- ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
- blocks_clusters * s->cluster_size);
- g_free(new_blocks);
- if (ret < 0) {
- goto fail_table;
- }
-
- /* Write refcount table to disk */
- for(i = 0; i < table_size; i++) {
- cpu_to_be64s(&new_table[i]);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
- ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
- table_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail_table;
- }
-
- for(i = 0; i < table_size; i++) {
- be64_to_cpus(&new_table[i]);
- }
-
- /* Hook up the new refcount table in the qcow2 header */
- uint8_t data[12];
- cpu_to_be64w((uint64_t*)data, table_offset);
- cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
- BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
- data, sizeof(data));
- if (ret < 0) {
- goto fail_table;
- }
-
- /* And switch it in memory */
- uint64_t old_table_offset = s->refcount_table_offset;
- uint64_t old_table_size = s->refcount_table_size;
-
- g_free(s->refcount_table);
- s->refcount_table = new_table;
- s->refcount_table_size = table_size;
- s->refcount_table_offset = table_offset;
-
- /* Free old table. Remember, we must not change free_cluster_index */
- uint64_t old_free_cluster_index = s->free_cluster_index;
- qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
- QCOW2_DISCARD_OTHER);
- s->free_cluster_index = old_free_cluster_index;
-
- ret = load_refcount_block(bs, new_block, (void**) refcount_block);
- if (ret < 0) {
- return ret;
- }
-
- return 0;
-
-fail_table:
- g_free(new_table);
-fail_block:
- if (*refcount_block != NULL) {
- qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
- }
- return ret;
-}
-
-void qcow2_process_discards(BlockDriverState *bs, int ret)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2DiscardRegion *d, *next;
-
- QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
- QTAILQ_REMOVE(&s->discards, d, next);
-
- /* Discard is optional, ignore the return value */
- if (ret >= 0) {
- bdrv_discard(bs->file,
- d->offset >> BDRV_SECTOR_BITS,
- d->bytes >> BDRV_SECTOR_BITS);
- }
-
- g_free(d);
- }
-}
-
-static void update_refcount_discard(BlockDriverState *bs,
- uint64_t offset, uint64_t length)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2DiscardRegion *d, *p, *next;
-
- QTAILQ_FOREACH(d, &s->discards, next) {
- uint64_t new_start = MIN(offset, d->offset);
- uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
-
- if (new_end - new_start <= length + d->bytes) {
- /* There can't be any overlap, areas ending up here have no
- * references any more and therefore shouldn't get freed another
- * time. */
- assert(d->bytes + length == new_end - new_start);
- d->offset = new_start;
- d->bytes = new_end - new_start;
- goto found;
- }
- }
-
- d = g_malloc(sizeof(*d));
- *d = (Qcow2DiscardRegion) {
- .bs = bs,
- .offset = offset,
- .bytes = length,
- };
- QTAILQ_INSERT_TAIL(&s->discards, d, next);
-
-found:
- /* Merge discard requests if they are adjacent now */
- QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
- if (p == d
- || p->offset > d->offset + d->bytes
- || d->offset > p->offset + p->bytes)
- {
- continue;
- }
-
- /* Still no overlap possible */
- assert(p->offset == d->offset + d->bytes
- || d->offset == p->offset + p->bytes);
-
- QTAILQ_REMOVE(&s->discards, p, next);
- d->offset = MIN(d->offset, p->offset);
- d->bytes += p->bytes;
- }
-}
-
-/* XXX: cache several refcount block clusters ? */
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t start, last, cluster_offset;
- uint16_t *refcount_block = NULL;
- int64_t old_table_index = -1;
- int ret;
-
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
- offset, length, addend);
-#endif
- if (length < 0) {
- return -EINVAL;
- } else if (length == 0) {
- return 0;
- }
-
- if (addend < 0) {
- qcow2_cache_set_dependency(bs, s->refcount_block_cache,
- s->l2_table_cache);
- }
-
- start = offset & ~(s->cluster_size - 1);
- last = (offset + length - 1) & ~(s->cluster_size - 1);
- for(cluster_offset = start; cluster_offset <= last;
- cluster_offset += s->cluster_size)
- {
- int block_index, refcount;
- int64_t cluster_index = cluster_offset >> s->cluster_bits;
- int64_t table_index =
- cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-
- /* Load the refcount block and allocate it if needed */
- if (table_index != old_table_index) {
- if (refcount_block) {
- ret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
- if (ret < 0) {
- goto fail;
- }
- }
- old_table_index = table_index;
-
- qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
-
- /* we can update the count and save it */
- block_index = cluster_index &
- ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
-
- refcount = be16_to_cpu(refcount_block[block_index]);
- refcount += addend;
- if (refcount < 0 || refcount > 0xffff) {
- ret = -EINVAL;
- goto fail;
- }
- if (refcount == 0 && cluster_index < s->free_cluster_index) {
- s->free_cluster_index = cluster_index;
- }
- refcount_block[block_index] = cpu_to_be16(refcount);
-
- if (refcount == 0 && s->discard_passthrough[type]) {
- update_refcount_discard(bs, cluster_offset, s->cluster_size);
- }
- }
-
- ret = 0;
-fail:
- if (!s->cache_discards) {
- qcow2_process_discards(bs, ret);
- }
-
- /* Write last changed block to disk */
- if (refcount_block) {
- int wret;
- wret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (wret < 0) {
- return ret < 0 ? ret : wret;
- }
- }
-
- /*
- * Try do undo any updates if an error is returned (This may succeed in
- * some cases like ENOSPC for allocating a new refcount block)
- */
- if (ret < 0) {
- int dummy;
- dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
- QCOW2_DISCARD_NEVER);
- (void)dummy;
- }
-
- return ret;
-}
-
-/*
- * Increases or decreases the refcount of a given cluster by one.
- * addend must be 1 or -1.
- *
- * If the return value is non-negative, it is the new refcount of the cluster.
- * If it is negative, it is -errno and indicates an error.
- */
-static int update_cluster_refcount(BlockDriverState *bs,
- int64_t cluster_index,
- int addend,
- enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
- type);
- if (ret < 0) {
- return ret;
- }
-
- return get_refcount(bs, cluster_index);
-}
-
-
-
-/*********************************************************/
-/* cluster allocation functions */
-
-
-
-/* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
-{
- BDRVQcowState *s = bs->opaque;
- int i, nb_clusters, refcount;
-
- nb_clusters = size_to_clusters(s, size);
-retry:
- for(i = 0; i < nb_clusters; i++) {
- int64_t next_cluster_index = s->free_cluster_index++;
- refcount = get_refcount(bs, next_cluster_index);
-
- if (refcount < 0) {
- return refcount;
- } else if (refcount != 0) {
- goto retry;
- }
- }
-#ifdef DEBUG_ALLOC2
- fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
- size,
- (s->free_cluster_index - nb_clusters) << s->cluster_bits);
-#endif
- return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
-}
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
-{
- int64_t offset;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
- offset = alloc_clusters_noref(bs, size);
- if (offset < 0) {
- return offset;
- }
-
- ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- return ret;
- }
-
- return offset;
-}
-
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t cluster_index;
- uint64_t old_free_cluster_index;
- int i, refcount, ret;
-
- /* Check how many clusters there are free */
- cluster_index = offset >> s->cluster_bits;
- for(i = 0; i < nb_clusters; i++) {
- refcount = get_refcount(bs, cluster_index++);
-
- if (refcount < 0) {
- return refcount;
- } else if (refcount != 0) {
- break;
- }
- }
-
- /* And then allocate them */
- old_free_cluster_index = s->free_cluster_index;
- s->free_cluster_index = cluster_index + i;
-
- ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- if (ret < 0) {
- return ret;
- }
-
- s->free_cluster_index = old_free_cluster_index;
-
- return i;
-}
-
-/* only used to allocate compressed sectors. We try to allocate
- contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t offset, cluster_offset;
- int free_in_cluster;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
- assert(size > 0 && size <= s->cluster_size);
- if (s->free_byte_offset == 0) {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
- }
- s->free_byte_offset = offset;
- }
- redo:
- free_in_cluster = s->cluster_size -
- (s->free_byte_offset & (s->cluster_size - 1));
- if (size <= free_in_cluster) {
- /* enough space in current cluster */
- offset = s->free_byte_offset;
- s->free_byte_offset += size;
- free_in_cluster -= size;
- if (free_in_cluster == 0)
- s->free_byte_offset = 0;
- if ((offset & (s->cluster_size - 1)) != 0)
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- } else {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
- }
- cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
- if ((cluster_offset + s->cluster_size) == offset) {
- /* we are lucky: contiguous data */
- offset = s->free_byte_offset;
- update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- s->free_byte_offset += size;
- } else {
- s->free_byte_offset = offset;
- goto redo;
- }
- }
-
- /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
- * or explicitly by update_cluster_refcount(). Refcount blocks must be
- * flushed before the caller's L2 table updates.
- */
- qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
- return offset;
-}
-
-void qcow2_free_clusters(BlockDriverState *bs,
- int64_t offset, int64_t size,
- enum qcow2_discard_type type)
-{
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
- ret = update_refcount(bs, offset, size, -1, type);
- if (ret < 0) {
- fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
- /* TODO Remember the clusters to free them later and avoid leaking */
- }
-}
-
-/*
- * Free a cluster using its L2 entry (handles clusters of all types, e.g.
- * normal cluster, compressed cluster, etc.)
- */
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
- int nb_clusters, enum qcow2_discard_type type)
-{
- BDRVQcowState *s = bs->opaque;
-
- switch (qcow2_get_cluster_type(l2_entry)) {
- case QCOW2_CLUSTER_COMPRESSED:
- {
- int nb_csectors;
- nb_csectors = ((l2_entry >> s->csize_shift) &
- s->csize_mask) + 1;
- qcow2_free_clusters(bs,
- (l2_entry & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, type);
- }
- break;
- case QCOW2_CLUSTER_NORMAL:
- qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
- nb_clusters << s->cluster_bits, type);
- break;
- case QCOW2_CLUSTER_UNALLOCATED:
- case QCOW2_CLUSTER_ZERO:
- break;
- default:
- abort();
- }
-}
-
-
-
-/*********************************************************/
-/* snapshots and image creation */
-
-
-
-/* update the refcounts of snapshots and the copied flag */
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
- int64_t l1_table_offset, int l1_size, int addend)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
- int64_t old_offset, old_l2_offset;
- int i, j, l1_modified = 0, nb_csectors, refcount;
- int ret;
-
- l2_table = NULL;
- l1_table = NULL;
- l1_size2 = l1_size * sizeof(uint64_t);
-
- s->cache_discards = true;
-
- /* WARNING: qcow2_snapshot_goto relies on this function not using the
- * l1_table_offset when it is the current s->l1_table_offset! Be careful
- * when changing this! */
- if (l1_table_offset != s->l1_table_offset) {
- l1_table = g_malloc0(align_offset(l1_size2, 512));
- l1_allocated = 1;
-
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
- if (ret < 0) {
- goto fail;
- }
-
- for(i = 0;i < l1_size; i++)
- be64_to_cpus(&l1_table[i]);
- } else {
- assert(l1_size == s->l1_size);
- l1_table = s->l1_table;
- l1_allocated = 0;
- }
-
- for(i = 0; i < l1_size; i++) {
- l2_offset = l1_table[i];
- if (l2_offset) {
- old_l2_offset = l2_offset;
- l2_offset &= L1E_OFFSET_MASK;
-
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
- (void**) &l2_table);
- if (ret < 0) {
- goto fail;
- }
-
- for(j = 0; j < s->l2_size; j++) {
- offset = be64_to_cpu(l2_table[j]);
- if (offset != 0) {
- old_offset = offset;
- offset &= ~QCOW_OFLAG_COPIED;
- if (offset & QCOW_OFLAG_COMPRESSED) {
- nb_csectors = ((offset >> s->csize_shift) &
- s->csize_mask) + 1;
- if (addend != 0) {
- int ret;
- ret = update_refcount(bs,
- (offset & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, addend,
- QCOW2_DISCARD_SNAPSHOT);
- if (ret < 0) {
- goto fail;
- }
- }
- /* compressed clusters are never modified */
- refcount = 2;
- } else {
- uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
- if (addend != 0) {
- refcount = update_cluster_refcount(bs, cluster_index, addend,
- QCOW2_DISCARD_SNAPSHOT);
- } else {
- refcount = get_refcount(bs, cluster_index);
- }
-
- if (refcount < 0) {
- ret = refcount;
- goto fail;
- }
- }
-
- if (refcount == 1) {
- offset |= QCOW_OFLAG_COPIED;
- }
- if (offset != old_offset) {
- if (addend > 0) {
- qcow2_cache_set_dependency(bs, s->l2_table_cache,
- s->refcount_block_cache);
- }
- l2_table[j] = cpu_to_be64(offset);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
- }
- }
- }
-
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- goto fail;
- }
-
-
- if (addend != 0) {
- refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
- QCOW2_DISCARD_SNAPSHOT);
- } else {
- refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
- }
- if (refcount < 0) {
- ret = refcount;
- goto fail;
- } else if (refcount == 1) {
- l2_offset |= QCOW_OFLAG_COPIED;
- }
- if (l2_offset != old_l2_offset) {
- l1_table[i] = l2_offset;
- l1_modified = 1;
- }
- }
- }
-
- ret = bdrv_flush(bs);
-fail:
- if (l2_table) {
- qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- }
-
- s->cache_discards = false;
- qcow2_process_discards(bs, ret);
-
- /* Update L1 only if it isn't deleted anyway (addend = -1) */
- if (ret == 0 && addend >= 0 && l1_modified) {
- for (i = 0; i < l1_size; i++) {
- cpu_to_be64s(&l1_table[i]);
- }
-
- ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
-
- for (i = 0; i < l1_size; i++) {
- be64_to_cpus(&l1_table[i]);
- }
- }
- if (l1_allocated)
- g_free(l1_table);
- return ret;
-}
-
-
-
-
-/*********************************************************/
-/* refcount checking functions */
-
-
-
-/*
- * Increases the refcount for a range of clusters in a given refcount table.
- * This is used to construct a temporary refcount table out of L1 and L2 tables
- * which can be compared the the refcount table saved in the image.
- *
- * Modifies the number of errors in res.
- */
-static void inc_refcounts(BlockDriverState *bs,
- BdrvCheckResult *res,
- uint16_t *refcount_table,
- int refcount_table_size,
- int64_t offset, int64_t size)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t start, last, cluster_offset;
- int k;
-
- if (size <= 0)
- return;
-
- start = offset & ~(s->cluster_size - 1);
- last = (offset + size - 1) & ~(s->cluster_size - 1);
- for(cluster_offset = start; cluster_offset <= last;
- cluster_offset += s->cluster_size) {
- k = cluster_offset >> s->cluster_bits;
- if (k < 0) {
- fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
- cluster_offset);
- res->corruptions++;
- } else if (k >= refcount_table_size) {
- fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
- "the end of the image file, can't properly check refcounts.\n",
- cluster_offset);
- res->check_errors++;
- } else {
- if (++refcount_table[k] == 0) {
- fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
- "\n", cluster_offset);
- res->corruptions++;
- }
- }
- }
-}
-
-/* Flags for check_refcounts_l1() and check_refcounts_l2() */
-enum {
- CHECK_OFLAG_COPIED = 0x1, /* check QCOW_OFLAG_COPIED matches refcount */
- CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */
-};
-
-/*
- * Increases the refcount in the given refcount table for the all clusters
- * referenced in the L2 table. While doing so, performs some checks on L2
- * entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
- uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
- int flags)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l2_table, l2_entry;
- uint64_t next_contiguous_offset = 0;
- int i, l2_size, nb_csectors, refcount;
-
- /* Read L2 table from disk */
- l2_size = s->l2_size * sizeof(uint64_t);
- l2_table = g_malloc(l2_size);
-
- if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
- goto fail;
-
- /* Do the actual checks */
- for(i = 0; i < s->l2_size; i++) {
- l2_entry = be64_to_cpu(l2_table[i]);
-
- switch (qcow2_get_cluster_type(l2_entry)) {
- case QCOW2_CLUSTER_COMPRESSED:
- /* Compressed clusters don't have QCOW_OFLAG_COPIED */
- if (l2_entry & QCOW_OFLAG_COPIED) {
- fprintf(stderr, "ERROR: cluster %" PRId64 ": "
- "copied flag must never be set for compressed "
- "clusters\n", l2_entry >> s->cluster_bits);
- l2_entry &= ~QCOW_OFLAG_COPIED;
- res->corruptions++;
- }
-
- /* Mark cluster as used */
- nb_csectors = ((l2_entry >> s->csize_shift) &
- s->csize_mask) + 1;
- l2_entry &= s->cluster_offset_mask;
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l2_entry & ~511, nb_csectors * 512);
-
- if (flags & CHECK_FRAG_INFO) {
- res->bfi.allocated_clusters++;
- res->bfi.compressed_clusters++;
-
- /* Compressed clusters are fragmented by nature. Since they
- * take up sub-sector space but we only have sector granularity
- * I/O we need to re-read the same sectors even for adjacent
- * compressed clusters.
- */
- res->bfi.fragmented_clusters++;
- }
- break;
-
- case QCOW2_CLUSTER_ZERO:
- if ((l2_entry & L2E_OFFSET_MASK) == 0) {
- break;
- }
- /* fall through */
-
- case QCOW2_CLUSTER_NORMAL:
- {
- /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
- uint64_t offset = l2_entry & L2E_OFFSET_MASK;
-
- if (flags & CHECK_OFLAG_COPIED) {
- refcount = get_refcount(bs, offset >> s->cluster_bits);
- if (refcount < 0) {
- fprintf(stderr, "Can't get refcount for offset %"
- PRIx64 ": %s\n", l2_entry, strerror(-refcount));
- goto fail;
- }
- if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
- fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
- PRIx64 " refcount=%d\n", l2_entry, refcount);
- res->corruptions++;
- }
- }
-
- if (flags & CHECK_FRAG_INFO) {
- res->bfi.allocated_clusters++;
- if (next_contiguous_offset &&
- offset != next_contiguous_offset) {
- res->bfi.fragmented_clusters++;
- }
- next_contiguous_offset = offset + s->cluster_size;
- }
-
- /* Mark cluster as used */
- inc_refcounts(bs, res, refcount_table,refcount_table_size,
- offset, s->cluster_size);
-
- /* Correct offsets are cluster aligned */
- if (offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
- "properly aligned; L2 entry corrupted.\n", offset);
- res->corruptions++;
- }
- break;
- }
-
- case QCOW2_CLUSTER_UNALLOCATED:
- break;
-
- default:
- abort();
- }
- }
-
- g_free(l2_table);
- return 0;
-
-fail:
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
- g_free(l2_table);
- return -EIO;
-}
-
-/*
- * Increases the refcount for the L1 table, its L2 tables and all referenced
- * clusters in the given refcount table. While doing so, performs some checks
- * on L1 and L2 entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l1(BlockDriverState *bs,
- BdrvCheckResult *res,
- uint16_t *refcount_table,
- int refcount_table_size,
- int64_t l1_table_offset, int l1_size,
- int flags)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, l2_offset, l1_size2;
- int i, refcount, ret;
-
- l1_size2 = l1_size * sizeof(uint64_t);
-
- /* Mark L1 table as used */
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l1_table_offset, l1_size2);
-
- /* Read L1 table entries from disk */
- if (l1_size2 == 0) {
- l1_table = NULL;
- } else {
- l1_table = g_malloc(l1_size2);
- if (bdrv_pread(bs->file, l1_table_offset,
- l1_table, l1_size2) != l1_size2)
- goto fail;
- for(i = 0;i < l1_size; i++)
- be64_to_cpus(&l1_table[i]);
- }
-
- /* Do the actual checks */
- for(i = 0; i < l1_size; i++) {
- l2_offset = l1_table[i];
- if (l2_offset) {
- /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
- if (flags & CHECK_OFLAG_COPIED) {
- refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
- >> s->cluster_bits);
- if (refcount < 0) {
- fprintf(stderr, "Can't get refcount for l2_offset %"
- PRIx64 ": %s\n", l2_offset, strerror(-refcount));
- goto fail;
- }
- if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
- fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
- " refcount=%d\n", l2_offset, refcount);
- res->corruptions++;
- }
- }
-
- /* Mark L2 table as used */
- l2_offset &= L1E_OFFSET_MASK;
- inc_refcounts(bs, res, refcount_table, refcount_table_size,
- l2_offset, s->cluster_size);
-
- /* L2 tables are cluster aligned */
- if (l2_offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
- "cluster aligned; L1 entry corrupted\n", l2_offset);
- res->corruptions++;
- }
-
- /* Process and check L2 entries */
- ret = check_refcounts_l2(bs, res, refcount_table,
- refcount_table_size, l2_offset, flags);
- if (ret < 0) {
- goto fail;
- }
- }
- }
- g_free(l1_table);
- return 0;
-
-fail:
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
- res->check_errors++;
- g_free(l1_table);
- return -EIO;
-}
-
-/*
- * Checks an image for refcount consistency.
- *
- * Returns 0 if no errors are found, the number of errors in case the image is
- * detected as corrupted, and -errno when an internal error occurred.
- */
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t size, i, highest_cluster;
- int nb_clusters, refcount1, refcount2;
- QCowSnapshot *sn;
- uint16_t *refcount_table;
- int ret;
-
- size = bdrv_getlength(bs->file);
- nb_clusters = size_to_clusters(s, size);
- refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
-
- res->bfi.total_clusters =
- size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
-
- /* header */
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- 0, s->cluster_size);
-
- /* current L1 table */
- ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
- s->l1_table_offset, s->l1_size,
- CHECK_OFLAG_COPIED | CHECK_FRAG_INFO);
- if (ret < 0) {
- goto fail;
- }
-
- /* snapshots */
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
- sn->l1_table_offset, sn->l1_size, 0);
- if (ret < 0) {
- goto fail;
- }
- }
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- s->snapshots_offset, s->snapshots_size);
-
- /* refcount data */
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- s->refcount_table_offset,
- s->refcount_table_size * sizeof(uint64_t));
-
- for(i = 0; i < s->refcount_table_size; i++) {
- uint64_t offset, cluster;
- offset = s->refcount_table[i];
- cluster = offset >> s->cluster_bits;
-
- /* Refcount blocks are cluster aligned */
- if (offset & (s->cluster_size - 1)) {
- fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
- "cluster aligned; refcount table entry corrupted\n", i);
- res->corruptions++;
- continue;
- }
-
- if (cluster >= nb_clusters) {
- fprintf(stderr, "ERROR refcount block %" PRId64
- " is outside image\n", i);
- res->corruptions++;
- continue;
- }
-
- if (offset != 0) {
- inc_refcounts(bs, res, refcount_table, nb_clusters,
- offset, s->cluster_size);
- if (refcount_table[cluster] != 1) {
- fprintf(stderr, "ERROR refcount block %" PRId64
- " refcount=%d\n",
- i, refcount_table[cluster]);
- res->corruptions++;
- }
- }
- }
-
- /* compare ref counts */
- for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
- refcount1 = get_refcount(bs, i);
- if (refcount1 < 0) {
- fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
- i, strerror(-refcount1));
- res->check_errors++;
- continue;
- }
-
- refcount2 = refcount_table[i];
-
- if (refcount1 > 0 || refcount2 > 0) {
- highest_cluster = i;
- }
-
- if (refcount1 != refcount2) {
-
- /* Check if we're allowed to fix the mismatch */
- int *num_fixed = NULL;
- if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
- num_fixed = &res->leaks_fixed;
- } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
- num_fixed = &res->corruptions_fixed;
- }
-
- fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
- num_fixed != NULL ? "Repairing" :
- refcount1 < refcount2 ? "ERROR" :
- "Leaked",
- i, refcount1, refcount2);
-
- if (num_fixed) {
- ret = update_refcount(bs, i << s->cluster_bits, 1,
- refcount2 - refcount1,
- QCOW2_DISCARD_ALWAYS);
- if (ret >= 0) {
- (*num_fixed)++;
- continue;
- }
- }
-
- /* And if we couldn't, print an error */
- if (refcount1 < refcount2) {
- res->corruptions++;
- } else {
- res->leaks++;
- }
- }
- }
-
- res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
- ret = 0;
-
-fail:
- g_free(refcount_table);
-
- return ret;
-}
-
diff --git a/contrib/qemu/block/qcow2-snapshot.c b/contrib/qemu/block/qcow2-snapshot.c
deleted file mode 100644
index 0caac9055f8..00000000000
--- a/contrib/qemu/block/qcow2-snapshot.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-
-typedef struct QEMU_PACKED QCowSnapshotHeader {
- /* header is 8 byte aligned */
- uint64_t l1_table_offset;
-
- uint32_t l1_size;
- uint16_t id_str_size;
- uint16_t name_size;
-
- uint32_t date_sec;
- uint32_t date_nsec;
-
- uint64_t vm_clock_nsec;
-
- uint32_t vm_state_size;
- uint32_t extra_data_size; /* for extension */
- /* extra data follows */
- /* id_str follows */
- /* name follows */
-} QCowSnapshotHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotExtraData {
- uint64_t vm_state_size_large;
- uint64_t disk_size;
-} QCowSnapshotExtraData;
-
-void qcow2_free_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- g_free(s->snapshots[i].name);
- g_free(s->snapshots[i].id_str);
- }
- g_free(s->snapshots);
- s->snapshots = NULL;
- s->nb_snapshots = 0;
-}
-
-int qcow2_read_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshotHeader h;
- QCowSnapshotExtraData extra;
- QCowSnapshot *sn;
- int i, id_str_size, name_size;
- int64_t offset;
- uint32_t extra_data_size;
- int ret;
-
- if (!s->nb_snapshots) {
- s->snapshots = NULL;
- s->snapshots_size = 0;
- return 0;
- }
-
- offset = s->snapshots_offset;
- s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
-
- for(i = 0; i < s->nb_snapshots; i++) {
- /* Read statically sized part of the snapshot header */
- offset = align_offset(offset, 8);
- ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
- if (ret < 0) {
- goto fail;
- }
-
- offset += sizeof(h);
- sn = s->snapshots + i;
- sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
- sn->l1_size = be32_to_cpu(h.l1_size);
- sn->vm_state_size = be32_to_cpu(h.vm_state_size);
- sn->date_sec = be32_to_cpu(h.date_sec);
- sn->date_nsec = be32_to_cpu(h.date_nsec);
- sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
- extra_data_size = be32_to_cpu(h.extra_data_size);
-
- id_str_size = be16_to_cpu(h.id_str_size);
- name_size = be16_to_cpu(h.name_size);
-
- /* Read extra data */
- ret = bdrv_pread(bs->file, offset, &extra,
- MIN(sizeof(extra), extra_data_size));
- if (ret < 0) {
- goto fail;
- }
- offset += extra_data_size;
-
- if (extra_data_size >= 8) {
- sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
- }
-
- if (extra_data_size >= 16) {
- sn->disk_size = be64_to_cpu(extra.disk_size);
- } else {
- sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- }
-
- /* Read snapshot ID */
- sn->id_str = g_malloc(id_str_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
- if (ret < 0) {
- goto fail;
- }
- offset += id_str_size;
- sn->id_str[id_str_size] = '\0';
-
- /* Read snapshot name */
- sn->name = g_malloc(name_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->name, name_size);
- if (ret < 0) {
- goto fail;
- }
- offset += name_size;
- sn->name[name_size] = '\0';
- }
-
- s->snapshots_size = offset - s->snapshots_offset;
- return 0;
-
-fail:
- qcow2_free_snapshots(bs);
- return ret;
-}
-
-/* add at the end of the file a new list of snapshots */
-static int qcow2_write_snapshots(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- QCowSnapshotHeader h;
- QCowSnapshotExtraData extra;
- int i, name_size, id_str_size, snapshots_size;
- struct {
- uint32_t nb_snapshots;
- uint64_t snapshots_offset;
- } QEMU_PACKED header_data;
- int64_t offset, snapshots_offset;
- int ret;
-
- /* compute the size of the snapshots */
- offset = 0;
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- offset = align_offset(offset, 8);
- offset += sizeof(h);
- offset += sizeof(extra);
- offset += strlen(sn->id_str);
- offset += strlen(sn->name);
- }
- snapshots_size = offset;
-
- /* Allocate space for the new snapshot list */
- snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
- offset = snapshots_offset;
- if (offset < 0) {
- return offset;
- }
- ret = bdrv_flush(bs);
- if (ret < 0) {
- return ret;
- }
-
- /* Write all snapshots to the new list */
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- memset(&h, 0, sizeof(h));
- h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
- h.l1_size = cpu_to_be32(sn->l1_size);
- /* If it doesn't fit in 32 bit, older implementations should treat it
- * as a disk-only snapshot rather than truncate the VM state */
- if (sn->vm_state_size <= 0xffffffff) {
- h.vm_state_size = cpu_to_be32(sn->vm_state_size);
- }
- h.date_sec = cpu_to_be32(sn->date_sec);
- h.date_nsec = cpu_to_be32(sn->date_nsec);
- h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
- h.extra_data_size = cpu_to_be32(sizeof(extra));
-
- memset(&extra, 0, sizeof(extra));
- extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
- extra.disk_size = cpu_to_be64(sn->disk_size);
-
- id_str_size = strlen(sn->id_str);
- name_size = strlen(sn->name);
- h.id_str_size = cpu_to_be16(id_str_size);
- h.name_size = cpu_to_be16(name_size);
- offset = align_offset(offset, 8);
-
- ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
- if (ret < 0) {
- goto fail;
- }
- offset += sizeof(h);
-
- ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
- if (ret < 0) {
- goto fail;
- }
- offset += sizeof(extra);
-
- ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
- if (ret < 0) {
- goto fail;
- }
- offset += id_str_size;
-
- ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
- if (ret < 0) {
- goto fail;
- }
- offset += name_size;
- }
-
- /*
- * Update the header to point to the new snapshot table. This requires the
- * new table and its refcounts to be stable on disk.
- */
- ret = bdrv_flush(bs);
- if (ret < 0) {
- goto fail;
- }
-
- QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
- offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
-
- header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
- header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
-
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
- &header_data, sizeof(header_data));
- if (ret < 0) {
- goto fail;
- }
-
- /* free the old snapshot table */
- qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
- QCOW2_DISCARD_SNAPSHOT);
- s->snapshots_offset = snapshots_offset;
- s->snapshots_size = snapshots_size;
- return 0;
-
-fail:
- return ret;
-}
-
-static void find_new_snapshot_id(BlockDriverState *bs,
- char *id_str, int id_str_size)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- int i, id, id_max = 0;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- sn = s->snapshots + i;
- id = strtoul(sn->id_str, NULL, 10);
- if (id > id_max)
- id_max = id;
- }
- snprintf(id_str, id_str_size, "%d", id_max + 1);
-}
-
-static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
-{
- BDRVQcowState *s = bs->opaque;
- int i;
-
- for(i = 0; i < s->nb_snapshots; i++) {
- if (!strcmp(s->snapshots[i].id_str, id_str))
- return i;
- }
- return -1;
-}
-
-static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
-{
- BDRVQcowState *s = bs->opaque;
- int i, ret;
-
- ret = find_snapshot_by_id(bs, name);
- if (ret >= 0)
- return ret;
- for(i = 0; i < s->nb_snapshots; i++) {
- if (!strcmp(s->snapshots[i].name, name))
- return i;
- }
- return -1;
-}
-
-/* if no id is provided, a new one is constructed */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *new_snapshot_list = NULL;
- QCowSnapshot *old_snapshot_list = NULL;
- QCowSnapshot sn1, *sn = &sn1;
- int i, ret;
- uint64_t *l1_table = NULL;
- int64_t l1_table_offset;
-
- memset(sn, 0, sizeof(*sn));
-
- /* Generate an ID if it wasn't passed */
- if (sn_info->id_str[0] == '\0') {
- find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
- }
-
- /* Check that the ID is unique */
- if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
- return -EEXIST;
- }
-
- /* Populate sn with passed data */
- sn->id_str = g_strdup(sn_info->id_str);
- sn->name = g_strdup(sn_info->name);
-
- sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- sn->vm_state_size = sn_info->vm_state_size;
- sn->date_sec = sn_info->date_sec;
- sn->date_nsec = sn_info->date_nsec;
- sn->vm_clock_nsec = sn_info->vm_clock_nsec;
-
- /* Allocate the L1 table of the snapshot and copy the current one there. */
- l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
- if (l1_table_offset < 0) {
- ret = l1_table_offset;
- goto fail;
- }
-
- sn->l1_table_offset = l1_table_offset;
- sn->l1_size = s->l1_size;
-
- l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
- for(i = 0; i < s->l1_size; i++) {
- l1_table[i] = cpu_to_be64(s->l1_table[i]);
- }
-
- ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
-
- g_free(l1_table);
- l1_table = NULL;
-
- /*
- * Increase the refcounts of all clusters and make sure everything is
- * stable on disk before updating the snapshot table to contain a pointer
- * to the new L1 table.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
- if (ret < 0) {
- goto fail;
- }
-
- /* Append the new snapshot to the snapshot list */
- new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
- if (s->snapshots) {
- memcpy(new_snapshot_list, s->snapshots,
- s->nb_snapshots * sizeof(QCowSnapshot));
- old_snapshot_list = s->snapshots;
- }
- s->snapshots = new_snapshot_list;
- s->snapshots[s->nb_snapshots++] = *sn;
-
- ret = qcow2_write_snapshots(bs);
- if (ret < 0) {
- g_free(s->snapshots);
- s->snapshots = old_snapshot_list;
- goto fail;
- }
-
- g_free(old_snapshot_list);
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-
-fail:
- g_free(sn->id_str);
- g_free(sn->name);
- g_free(l1_table);
-
- return ret;
-}
-
-/* copy the snapshot 'snapshot_name' into the current disk image */
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- int i, snapshot_index;
- int cur_l1_bytes, sn_l1_bytes;
- int ret;
- uint64_t *sn_l1_table = NULL;
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = &s->snapshots[snapshot_index];
-
- if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
- error_report("qcow2: Loading snapshots with different disk "
- "size is not implemented");
- ret = -ENOTSUP;
- goto fail;
- }
-
- /*
- * Make sure that the current L1 table is big enough to contain the whole
- * L1 table of the snapshot. If the snapshot L1 table is smaller, the
- * current one must be padded with zeros.
- */
- ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
- if (ret < 0) {
- goto fail;
- }
-
- cur_l1_bytes = s->l1_size * sizeof(uint64_t);
- sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
-
- /*
- * Copy the snapshot L1 table to the current L1 table.
- *
- * Before overwriting the old current L1 table on disk, make sure to
- * increase all refcounts for the clusters referenced by the new one.
- * Decrease the refcount referenced by the old one only when the L1
- * table is overwritten.
- */
- sn_l1_table = g_malloc0(cur_l1_bytes);
-
- ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
- if (ret < 0) {
- goto fail;
- }
-
- ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
- sn->l1_size, 1);
- if (ret < 0) {
- goto fail;
- }
-
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
- cur_l1_bytes);
- if (ret < 0) {
- goto fail;
- }
-
- /*
- * Decrease refcount of clusters of current L1 table.
- *
- * At this point, the in-memory s->l1_table points to the old L1 table,
- * whereas on disk we already have the new one.
- *
- * qcow2_update_snapshot_refcount special cases the current L1 table to use
- * the in-memory data instead of really using the offset to load a new one,
- * which is why this works.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
- s->l1_size, -1);
-
- /*
- * Now update the in-memory L1 table to be in sync with the on-disk one. We
- * need to do this even if updating refcounts failed.
- */
- for(i = 0;i < s->l1_size; i++) {
- s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
- }
-
- if (ret < 0) {
- goto fail;
- }
-
- g_free(sn_l1_table);
- sn_l1_table = NULL;
-
- /*
- * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
- * when we decreased the refcount of the old snapshot.
- */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
- if (ret < 0) {
- goto fail;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-
-fail:
- g_free(sn_l1_table);
- return ret;
-}
-
-int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
-{
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot sn;
- int snapshot_index, ret;
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = s->snapshots[snapshot_index];
-
- /* Remove it from the snapshot list */
- memmove(s->snapshots + snapshot_index,
- s->snapshots + snapshot_index + 1,
- (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
- s->nb_snapshots--;
- ret = qcow2_write_snapshots(bs);
- if (ret < 0) {
- return ret;
- }
-
- /*
- * The snapshot is now unused, clean up. If we fail after this point, we
- * won't recover but just leak clusters.
- */
- g_free(sn.id_str);
- g_free(sn.name);
-
- /*
- * Now decrease the refcounts of clusters referenced by the snapshot and
- * free the L1 table.
- */
- ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
- sn.l1_size, -1);
- if (ret < 0) {
- return ret;
- }
- qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
- QCOW2_DISCARD_SNAPSHOT);
-
- /* must update the copied flag on the current cluster offsets */
- ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
- if (ret < 0) {
- return ret;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return 0;
-}
-
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
-{
- BDRVQcowState *s = bs->opaque;
- QEMUSnapshotInfo *sn_tab, *sn_info;
- QCowSnapshot *sn;
- int i;
-
- if (!s->nb_snapshots) {
- *psn_tab = NULL;
- return s->nb_snapshots;
- }
-
- sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
- for(i = 0; i < s->nb_snapshots; i++) {
- sn_info = sn_tab + i;
- sn = s->snapshots + i;
- pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
- sn->id_str);
- pstrcpy(sn_info->name, sizeof(sn_info->name),
- sn->name);
- sn_info->vm_state_size = sn->vm_state_size;
- sn_info->date_sec = sn->date_sec;
- sn_info->date_nsec = sn->date_nsec;
- sn_info->vm_clock_nsec = sn->vm_clock_nsec;
- }
- *psn_tab = sn_tab;
- return s->nb_snapshots;
-}
-
-int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
-{
- int i, snapshot_index;
- BDRVQcowState *s = bs->opaque;
- QCowSnapshot *sn;
- uint64_t *new_l1_table;
- int new_l1_bytes;
- int ret;
-
- assert(bs->read_only);
-
- /* Search the snapshot */
- snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
- if (snapshot_index < 0) {
- return -ENOENT;
- }
- sn = &s->snapshots[snapshot_index];
-
- /* Allocate and read in the snapshot's L1 table */
- new_l1_bytes = s->l1_size * sizeof(uint64_t);
- new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
-
- ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
- if (ret < 0) {
- g_free(new_l1_table);
- return ret;
- }
-
- /* Switch the L1 table */
- g_free(s->l1_table);
-
- s->l1_size = sn->l1_size;
- s->l1_table_offset = sn->l1_table_offset;
- s->l1_table = new_l1_table;
-
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
-
- return 0;
-}
diff --git a/contrib/qemu/block/qcow2.c b/contrib/qemu/block/qcow2.c
deleted file mode 100644
index 0eceefe2cd9..00000000000
--- a/contrib/qemu/block/qcow2.c
+++ /dev/null
@@ -1,1825 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "qemu/aes.h"
-#include "block/qcow2.h"
-#include "qemu/error-report.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qbool.h"
-#include "trace.h"
-
-/*
- Differences with QCOW:
-
- - Support for multiple incremental snapshots.
- - Memory management by reference counts.
- - Clusters which have a reference count of one have the bit
- QCOW_OFLAG_COPIED to optimize write performance.
- - Size of compressed clusters is stored in sectors to reduce bit usage
- in the cluster offsets.
- - Support for storing additional data (such as the VM state) in the
- snapshots.
- - If a backing store is used, the cluster size is not constrained
- (could be backported to QCOW).
- - L2 tables have always a size of one cluster.
-*/
-
-
-typedef struct {
- uint32_t magic;
- uint32_t len;
-} QCowExtension;
-
-#define QCOW2_EXT_MAGIC_END 0
-#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
-#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
-
-static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
- const QCowHeader *cow_header = (const void *)buf;
-
- if (buf_size >= sizeof(QCowHeader) &&
- be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
- be32_to_cpu(cow_header->version) >= 2)
- return 100;
- else
- return 0;
-}
-
-
-/*
- * read qcow2 extension and fill bs
- * start reading from start_offset
- * finish reading upon magic of value 0 or when end_offset reached
- * unknown magic is skipped (future extension this version knows nothing about)
- * return 0 upon success, non-0 otherwise
- */
-static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
- uint64_t end_offset, void **p_feature_table)
-{
- BDRVQcowState *s = bs->opaque;
- QCowExtension ext;
- uint64_t offset;
- int ret;
-
-#ifdef DEBUG_EXT
- printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
-#endif
- offset = start_offset;
- while (offset < end_offset) {
-
-#ifdef DEBUG_EXT
- /* Sanity check */
- if (offset > s->cluster_size)
- printf("qcow2_read_extension: suspicious offset %lu\n", offset);
-
- printf("attempting to read extended header in offset %lu\n", offset);
-#endif
-
- if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
- fprintf(stderr, "qcow2_read_extension: ERROR: "
- "pread fail from offset %" PRIu64 "\n",
- offset);
- return 1;
- }
- be32_to_cpus(&ext.magic);
- be32_to_cpus(&ext.len);
- offset += sizeof(ext);
-#ifdef DEBUG_EXT
- printf("ext.magic = 0x%x\n", ext.magic);
-#endif
- if (ext.len > end_offset - offset) {
- error_report("Header extension too large");
- return -EINVAL;
- }
-
- switch (ext.magic) {
- case QCOW2_EXT_MAGIC_END:
- return 0;
-
- case QCOW2_EXT_MAGIC_BACKING_FORMAT:
- if (ext.len >= sizeof(bs->backing_format)) {
- fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
- " (>=%zu)\n",
- ext.len, sizeof(bs->backing_format));
- return 2;
- }
- if (bdrv_pread(bs->file, offset , bs->backing_format,
- ext.len) != ext.len)
- return 3;
- bs->backing_format[ext.len] = '\0';
-#ifdef DEBUG_EXT
- printf("Qcow2: Got format extension %s\n", bs->backing_format);
-#endif
- break;
-
- case QCOW2_EXT_MAGIC_FEATURE_TABLE:
- if (p_feature_table != NULL) {
- void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
- ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
- if (ret < 0) {
- return ret;
- }
-
- *p_feature_table = feature_table;
- }
- break;
-
- default:
- /* unknown magic - save it in case we need to rewrite the header */
- {
- Qcow2UnknownHeaderExtension *uext;
-
- uext = g_malloc0(sizeof(*uext) + ext.len);
- uext->magic = ext.magic;
- uext->len = ext.len;
- QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
-
- ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
- if (ret < 0) {
- return ret;
- }
- }
- break;
- }
-
- offset += ((ext.len + 7) & ~7);
- }
-
- return 0;
-}
-
-static void cleanup_unknown_header_ext(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- Qcow2UnknownHeaderExtension *uext, *next;
-
- QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
- QLIST_REMOVE(uext, next);
- g_free(uext);
- }
-}
-
-static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs,
- const char *fmt, ...)
-{
- char msg[64];
- va_list ap;
-
- va_start(ap, fmt);
- vsnprintf(msg, sizeof(msg), fmt, ap);
- va_end(ap);
-
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "qcow2", msg);
-}
-
-static void report_unsupported_feature(BlockDriverState *bs,
- Qcow2Feature *table, uint64_t mask)
-{
- while (table && table->name[0] != '\0') {
- if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
- if (mask & (1 << table->bit)) {
- report_unsupported(bs, "%.46s",table->name);
- mask &= ~(1 << table->bit);
- }
- }
- table++;
- }
-
- if (mask) {
- report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask);
- }
-}
-
-/*
- * Sets the dirty bit and flushes afterwards if necessary.
- *
- * The incompatible_features bit is only set if the image file header was
- * updated successfully. Therefore it is not required to check the return
- * value of this function.
- */
-int qcow2_mark_dirty(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t val;
- int ret;
-
- assert(s->qcow_version >= 3);
-
- if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
- return 0; /* already dirty */
- }
-
- val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
- ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
- &val, sizeof(val));
- if (ret < 0) {
- return ret;
- }
- ret = bdrv_flush(bs->file);
- if (ret < 0) {
- return ret;
- }
-
- /* Only treat image as dirty if the header was updated successfully */
- s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
- return 0;
-}
-
-/*
- * Clears the dirty bit and flushes before if necessary. Only call this
- * function when there are no pending requests, it does not guard against
- * concurrent requests dirtying the image.
- */
-static int qcow2_mark_clean(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
-
- if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
- int ret = bdrv_flush(bs);
- if (ret < 0) {
- return ret;
- }
-
- s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
- return qcow2_update_header(bs);
- }
- return 0;
-}
-
-static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
- BdrvCheckMode fix)
-{
- int ret = qcow2_check_refcounts(bs, result, fix);
- if (ret < 0) {
- return ret;
- }
-
- if (fix && result->check_errors == 0 && result->corruptions == 0) {
- return qcow2_mark_clean(bs);
- }
- return ret;
-}
-
-static QemuOptsList qcow2_runtime_opts = {
- .name = "qcow2",
- .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
- .desc = {
- {
- .name = "lazy_refcounts",
- .type = QEMU_OPT_BOOL,
- .help = "Postpone refcount updates",
- },
- {
- .name = QCOW2_OPT_DISCARD_REQUEST,
- .type = QEMU_OPT_BOOL,
- .help = "Pass guest discard requests to the layer below",
- },
- {
- .name = QCOW2_OPT_DISCARD_SNAPSHOT,
- .type = QEMU_OPT_BOOL,
- .help = "Generate discard requests when snapshot related space "
- "is freed",
- },
- {
- .name = QCOW2_OPT_DISCARD_OTHER,
- .type = QEMU_OPT_BOOL,
- .help = "Generate discard requests when other clusters are freed",
- },
- { /* end of list */ }
- },
-};
-
-static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQcowState *s = bs->opaque;
- int len, i, ret = 0;
- QCowHeader header;
- QemuOpts *opts;
- Error *local_err = NULL;
- uint64_t ext_end;
- uint64_t l1_vm_state_index;
-
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
- if (ret < 0) {
- goto fail;
- }
- be32_to_cpus(&header.magic);
- be32_to_cpus(&header.version);
- be64_to_cpus(&header.backing_file_offset);
- be32_to_cpus(&header.backing_file_size);
- be64_to_cpus(&header.size);
- be32_to_cpus(&header.cluster_bits);
- be32_to_cpus(&header.crypt_method);
- be64_to_cpus(&header.l1_table_offset);
- be32_to_cpus(&header.l1_size);
- be64_to_cpus(&header.refcount_table_offset);
- be32_to_cpus(&header.refcount_table_clusters);
- be64_to_cpus(&header.snapshots_offset);
- be32_to_cpus(&header.nb_snapshots);
-
- if (header.magic != QCOW_MAGIC) {
- ret = -EMEDIUMTYPE;
- goto fail;
- }
- if (header.version < 2 || header.version > 3) {
- report_unsupported(bs, "QCOW version %d", header.version);
- ret = -ENOTSUP;
- goto fail;
- }
-
- s->qcow_version = header.version;
-
- /* Initialise version 3 header fields */
- if (header.version == 2) {
- header.incompatible_features = 0;
- header.compatible_features = 0;
- header.autoclear_features = 0;
- header.refcount_order = 4;
- header.header_length = 72;
- } else {
- be64_to_cpus(&header.incompatible_features);
- be64_to_cpus(&header.compatible_features);
- be64_to_cpus(&header.autoclear_features);
- be32_to_cpus(&header.refcount_order);
- be32_to_cpus(&header.header_length);
- }
-
- if (header.header_length > sizeof(header)) {
- s->unknown_header_fields_size = header.header_length - sizeof(header);
- s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
- ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
- s->unknown_header_fields_size);
- if (ret < 0) {
- goto fail;
- }
- }
-
- if (header.backing_file_offset) {
- ext_end = header.backing_file_offset;
- } else {
- ext_end = 1 << header.cluster_bits;
- }
-
- /* Handle feature bits */
- s->incompatible_features = header.incompatible_features;
- s->compatible_features = header.compatible_features;
- s->autoclear_features = header.autoclear_features;
-
- if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
- void *feature_table = NULL;
- qcow2_read_extensions(bs, header.header_length, ext_end,
- &feature_table);
- report_unsupported_feature(bs, feature_table,
- s->incompatible_features &
- ~QCOW2_INCOMPAT_MASK);
- ret = -ENOTSUP;
- goto fail;
- }
-
- /* Check support for various header values */
- if (header.refcount_order != 4) {
- report_unsupported(bs, "%d bit reference counts",
- 1 << header.refcount_order);
- ret = -ENOTSUP;
- goto fail;
- }
-
- if (header.cluster_bits < MIN_CLUSTER_BITS ||
- header.cluster_bits > MAX_CLUSTER_BITS) {
- ret = -EINVAL;
- goto fail;
- }
- if (header.crypt_method > QCOW_CRYPT_AES) {
- ret = -EINVAL;
- goto fail;
- }
- s->crypt_method_header = header.crypt_method;
- if (s->crypt_method_header) {
- bs->encrypted = 1;
- }
- s->cluster_bits = header.cluster_bits;
- s->cluster_size = 1 << s->cluster_bits;
- s->cluster_sectors = 1 << (s->cluster_bits - 9);
- s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
- s->l2_size = 1 << s->l2_bits;
- bs->total_sectors = header.size / 512;
- s->csize_shift = (62 - (s->cluster_bits - 8));
- s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
- s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
- s->refcount_table_offset = header.refcount_table_offset;
- s->refcount_table_size =
- header.refcount_table_clusters << (s->cluster_bits - 3);
-
- s->snapshots_offset = header.snapshots_offset;
- s->nb_snapshots = header.nb_snapshots;
-
- /* read the level 1 table */
- s->l1_size = header.l1_size;
-
- l1_vm_state_index = size_to_l1(s, header.size);
- if (l1_vm_state_index > INT_MAX) {
- ret = -EFBIG;
- goto fail;
- }
- s->l1_vm_state_index = l1_vm_state_index;
-
- /* the L1 table must contain at least enough entries to put
- header.size bytes */
- if (s->l1_size < s->l1_vm_state_index) {
- ret = -EINVAL;
- goto fail;
- }
- s->l1_table_offset = header.l1_table_offset;
- if (s->l1_size > 0) {
- s->l1_table = g_malloc0(
- align_offset(s->l1_size * sizeof(uint64_t), 512));
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
- s->l1_size * sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
- }
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
- }
- }
-
- /* alloc L2 table/refcount block cache */
- s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
- s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
-
- s->cluster_cache = g_malloc(s->cluster_size);
- /* one more sector for decompressed data alignment */
- s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
- + 512);
- s->cluster_cache_offset = -1;
- s->flags = flags;
-
- ret = qcow2_refcount_init(bs);
- if (ret != 0) {
- goto fail;
- }
-
- QLIST_INIT(&s->cluster_allocs);
- QTAILQ_INIT(&s->discards);
-
- /* read qcow2 extensions */
- if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
- ret = -EINVAL;
- goto fail;
- }
-
- /* read the backing file name */
- if (header.backing_file_offset != 0) {
- len = header.backing_file_size;
- if (len > 1023) {
- len = 1023;
- }
- ret = bdrv_pread(bs->file, header.backing_file_offset,
- bs->backing_file, len);
- if (ret < 0) {
- goto fail;
- }
- bs->backing_file[len] = '\0';
- }
-
- ret = qcow2_read_snapshots(bs);
- if (ret < 0) {
- goto fail;
- }
-
- /* Clear unknown autoclear feature bits */
- if (!bs->read_only && s->autoclear_features != 0) {
- s->autoclear_features = 0;
- ret = qcow2_update_header(bs);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* Initialise locks */
- qemu_co_mutex_init(&s->lock);
-
- /* Repair image if dirty */
- if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
- (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
- BdrvCheckResult result = {0};
-
- ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
- if (ret < 0) {
- goto fail;
- }
- }
-
- /* Enable lazy_refcounts according to image and command line options */
- opts = qemu_opts_create_nofail(&qcow2_runtime_opts);
- qemu_opts_absorb_qdict(opts, options, &local_err);
- if (error_is_set(&local_err)) {
- qerror_report_err(local_err);
- error_free(local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
- (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
-
- s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
- s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
- s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
- flags & BDRV_O_UNMAP);
- s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
- s->discard_passthrough[QCOW2_DISCARD_OTHER] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
-
- qemu_opts_del(opts);
-
- if (s->use_lazy_refcounts && s->qcow_version < 3) {
- qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require "
- "a qcow2 image with at least qemu 1.1 compatibility level");
- ret = -EINVAL;
- goto fail;
- }
-
-#ifdef DEBUG_ALLOC
- {
- BdrvCheckResult result = {0};
- qcow2_check_refcounts(bs, &result, 0);
- }
-#endif
- return ret;
-
- fail:
- g_free(s->unknown_header_fields);
- cleanup_unknown_header_ext(bs);
- qcow2_free_snapshots(bs);
- qcow2_refcount_close(bs);
- g_free(s->l1_table);
- if (s->l2_table_cache) {
- qcow2_cache_destroy(bs, s->l2_table_cache);
- }
- g_free(s->cluster_cache);
- qemu_vfree(s->cluster_data);
- return ret;
-}
-
-static int qcow2_set_key(BlockDriverState *bs, const char *key)
-{
- BDRVQcowState *s = bs->opaque;
- uint8_t keybuf[16];
- int len, i;
-
- memset(keybuf, 0, 16);
- len = strlen(key);
- if (len > 16)
- len = 16;
- /* XXX: we could compress the chars to 7 bits to increase
- entropy */
- for(i = 0;i < len;i++) {
- keybuf[i] = key[i];
- }
- s->crypt_method = s->crypt_method_header;
-
- if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
- return -1;
- if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
- return -1;
-#if 0
- /* test */
- {
- uint8_t in[16];
- uint8_t out[16];
- uint8_t tmp[16];
- for(i=0;i<16;i++)
- in[i] = i;
- AES_encrypt(in, tmp, &s->aes_encrypt_key);
- AES_decrypt(tmp, out, &s->aes_decrypt_key);
- for(i = 0; i < 16; i++)
- printf(" %02x", tmp[i]);
- printf("\n");
- for(i = 0; i < 16; i++)
- printf(" %02x", out[i]);
- printf("\n");
- }
-#endif
- return 0;
-}
-
-/* We have nothing to do for QCOW2 reopen, stubs just return
- * success */
-static int qcow2_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- BDRVQcowState *s = bs->opaque;
- uint64_t cluster_offset;
- int ret;
-
- *pnum = nb_sectors;
- /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
- * can't pass them on today */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
- qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- *pnum = 0;
- }
-
- return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
-}
-
-/* handle reading after the end of the backing file */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors)
-{
- int n1;
- if ((sector_num + nb_sectors) <= bs->total_sectors)
- return nb_sectors;
- if (sector_num >= bs->total_sectors)
- n1 = 0;
- else
- n1 = bs->total_sectors - sector_num;
-
- qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
-
- return n1;
-}
-
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
- int remaining_sectors, QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster, n1;
- int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
- uint64_t cluster_offset = 0;
- uint64_t bytes_done = 0;
- QEMUIOVector hd_qiov;
- uint8_t *cluster_data = NULL;
-
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
- qemu_co_mutex_lock(&s->lock);
-
- while (remaining_sectors != 0) {
-
- /* prepare next request */
- cur_nr_sectors = remaining_sectors;
- if (s->crypt_method) {
- cur_nr_sectors = MIN(cur_nr_sectors,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
- }
-
- ret = qcow2_get_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset);
- if (ret < 0) {
- goto fail;
- }
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
-
- switch (ret) {
- case QCOW2_CLUSTER_UNALLOCATED:
-
- if (bs->backing_hd) {
- /* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
- sector_num, cur_nr_sectors);
- if (n1 > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n1, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- }
- } else {
- /* Note: in this case, no need to wait */
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
- }
- break;
-
- case QCOW2_CLUSTER_ZERO:
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
- break;
-
- case QCOW2_CLUSTER_COMPRESSED:
- /* add AIO support for compressed blocks ? */
- ret = qcow2_decompress_cluster(bs, cluster_offset);
- if (ret < 0) {
- goto fail;
- }
-
- qemu_iovec_from_buf(&hd_qiov, 0,
- s->cluster_cache + index_in_cluster * 512,
- 512 * cur_nr_sectors);
- break;
-
- case QCOW2_CLUSTER_NORMAL:
- if ((cluster_offset & 511) != 0) {
- ret = -EIO;
- goto fail;
- }
-
- if (s->crypt_method) {
- /*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
- if (!cluster_data) {
- cluster_data =
- qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- }
-
- assert(cur_nr_sectors <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- 512 * cur_nr_sectors);
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
- qemu_iovec_from_buf(qiov, bytes_done,
- cluster_data, 512 * cur_nr_sectors);
- }
- break;
-
- default:
- g_assert_not_reached();
- ret = -EIO;
- goto fail;
- }
-
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- }
- ret = 0;
-
-fail:
- qemu_co_mutex_unlock(&s->lock);
-
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
-
- return ret;
-}
-
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
- int64_t sector_num,
- int remaining_sectors,
- QEMUIOVector *qiov)
-{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
- int n_end;
- int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
- uint64_t cluster_offset;
- QEMUIOVector hd_qiov;
- uint64_t bytes_done = 0;
- uint8_t *cluster_data = NULL;
- QCowL2Meta *l2meta = NULL;
-
- trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
- remaining_sectors);
-
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
- s->cluster_cache_offset = -1; /* disable compressed cache */
-
- qemu_co_mutex_lock(&s->lock);
-
- while (remaining_sectors != 0) {
-
- l2meta = NULL;
-
- trace_qcow2_writev_start_part(qemu_coroutine_self());
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n_end = index_in_cluster + remaining_sectors;
- if (s->crypt_method &&
- n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
- n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
- }
-
- ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
- index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
- if (ret < 0) {
- goto fail;
- }
-
- assert((cluster_offset & 511) == 0);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
-
- if (s->crypt_method) {
- if (!cluster_data) {
- cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
- s->cluster_size);
- }
-
- assert(hd_qiov.size <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
-
- qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- cur_nr_sectors * 512);
- }
-
- qemu_co_mutex_unlock(&s->lock);
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- trace_qcow2_writev_data(qemu_coroutine_self(),
- (cluster_offset >> 9) + index_in_cluster);
- ret = bdrv_co_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
- qemu_co_mutex_lock(&s->lock);
- if (ret < 0) {
- goto fail;
- }
-
- while (l2meta != NULL) {
- QCowL2Meta *next;
-
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
- if (ret < 0) {
- goto fail;
- }
-
- /* Take the request off the list of running requests */
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
-
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
- }
-
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
- }
- ret = 0;
-
-fail:
- qemu_co_mutex_unlock(&s->lock);
-
- while (l2meta != NULL) {
- QCowL2Meta *next;
-
- if (l2meta->nb_clusters != 0) {
- QLIST_REMOVE(l2meta, next_in_flight);
- }
- qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
- next = l2meta->next;
- g_free(l2meta);
- l2meta = next;
- }
-
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
- trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
-
- return ret;
-}
-
-static void qcow2_close(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- g_free(s->l1_table);
-
- qcow2_cache_flush(bs, s->l2_table_cache);
- qcow2_cache_flush(bs, s->refcount_block_cache);
-
- qcow2_mark_clean(bs);
-
- qcow2_cache_destroy(bs, s->l2_table_cache);
- qcow2_cache_destroy(bs, s->refcount_block_cache);
-
- g_free(s->unknown_header_fields);
- cleanup_unknown_header_ext(bs);
-
- g_free(s->cluster_cache);
- qemu_vfree(s->cluster_data);
- qcow2_refcount_close(bs);
- qcow2_free_snapshots(bs);
-}
-
-static void qcow2_invalidate_cache(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int flags = s->flags;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- uint32_t crypt_method = 0;
- QDict *options;
-
- /*
- * Backing files are read-only which makes all of their metadata immutable,
- * that means we don't have to worry about reopening them here.
- */
-
- if (s->crypt_method) {
- crypt_method = s->crypt_method;
- memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
- memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
- }
-
- qcow2_close(bs);
-
- options = qdict_new();
- qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS,
- qbool_from_int(s->use_lazy_refcounts));
-
- memset(s, 0, sizeof(BDRVQcowState));
- qcow2_open(bs, options, flags);
-
- QDECREF(options);
-
- if (crypt_method) {
- s->crypt_method = crypt_method;
- memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
- memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
- }
-}
-
-static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
- size_t len, size_t buflen)
-{
- QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
- size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
-
- if (buflen < ext_len) {
- return -ENOSPC;
- }
-
- *ext_backing_fmt = (QCowExtension) {
- .magic = cpu_to_be32(magic),
- .len = cpu_to_be32(len),
- };
- memcpy(buf + sizeof(QCowExtension), s, len);
-
- return ext_len;
-}
-
-/*
- * Updates the qcow2 header, including the variable length parts of it, i.e.
- * the backing file name and all extensions. qcow2 was not designed to allow
- * such changes, so if we run out of space (we can only use the first cluster)
- * this function may fail.
- *
- * Returns 0 on success, -errno in error cases.
- */
-int qcow2_update_header(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- QCowHeader *header;
- char *buf;
- size_t buflen = s->cluster_size;
- int ret;
- uint64_t total_size;
- uint32_t refcount_table_clusters;
- size_t header_length;
- Qcow2UnknownHeaderExtension *uext;
-
- buf = qemu_blockalign(bs, buflen);
-
- /* Header structure */
- header = (QCowHeader*) buf;
-
- if (buflen < sizeof(*header)) {
- ret = -ENOSPC;
- goto fail;
- }
-
- header_length = sizeof(*header) + s->unknown_header_fields_size;
- total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
-
- *header = (QCowHeader) {
- /* Version 2 fields */
- .magic = cpu_to_be32(QCOW_MAGIC),
- .version = cpu_to_be32(s->qcow_version),
- .backing_file_offset = 0,
- .backing_file_size = 0,
- .cluster_bits = cpu_to_be32(s->cluster_bits),
- .size = cpu_to_be64(total_size),
- .crypt_method = cpu_to_be32(s->crypt_method_header),
- .l1_size = cpu_to_be32(s->l1_size),
- .l1_table_offset = cpu_to_be64(s->l1_table_offset),
- .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
- .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
- .nb_snapshots = cpu_to_be32(s->nb_snapshots),
- .snapshots_offset = cpu_to_be64(s->snapshots_offset),
-
- /* Version 3 fields */
- .incompatible_features = cpu_to_be64(s->incompatible_features),
- .compatible_features = cpu_to_be64(s->compatible_features),
- .autoclear_features = cpu_to_be64(s->autoclear_features),
- .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
- .header_length = cpu_to_be32(header_length),
- };
-
- /* For older versions, write a shorter header */
- switch (s->qcow_version) {
- case 2:
- ret = offsetof(QCowHeader, incompatible_features);
- break;
- case 3:
- ret = sizeof(*header);
- break;
- default:
- ret = -EINVAL;
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- memset(buf, 0, buflen);
-
- /* Preserve any unknown field in the header */
- if (s->unknown_header_fields_size) {
- if (buflen < s->unknown_header_fields_size) {
- ret = -ENOSPC;
- goto fail;
- }
-
- memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
- buf += s->unknown_header_fields_size;
- buflen -= s->unknown_header_fields_size;
- }
-
- /* Backing file format header extension */
- if (*bs->backing_format) {
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
- bs->backing_format, strlen(bs->backing_format),
- buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- }
-
- /* Feature table */
- Qcow2Feature features[] = {
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
- .name = "dirty bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_COMPATIBLE,
- .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
- .name = "lazy refcounts",
- },
- };
-
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
- features, sizeof(features), buflen);
- if (ret < 0) {
- goto fail;
- }
- buf += ret;
- buflen -= ret;
-
- /* Keep unknown header extensions */
- QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
- ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
- }
-
- /* End of header extensions */
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
- if (ret < 0) {
- goto fail;
- }
-
- buf += ret;
- buflen -= ret;
-
- /* Backing file name */
- if (*bs->backing_file) {
- size_t backing_file_len = strlen(bs->backing_file);
-
- if (buflen < backing_file_len) {
- ret = -ENOSPC;
- goto fail;
- }
-
- /* Using strncpy is ok here, since buf is not NUL-terminated. */
- strncpy(buf, bs->backing_file, buflen);
-
- header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
- header->backing_file_size = cpu_to_be32(backing_file_len);
- }
-
- /* Write the new header */
- ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
- if (ret < 0) {
- goto fail;
- }
-
- ret = 0;
-fail:
- qemu_vfree(header);
- return ret;
-}
-
-static int qcow2_change_backing_file(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt)
-{
- pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
- pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
-
- return qcow2_update_header(bs);
-}
-
-static int preallocate(BlockDriverState *bs)
-{
- uint64_t nb_sectors;
- uint64_t offset;
- uint64_t host_offset = 0;
- int num;
- int ret;
- QCowL2Meta *meta;
-
- nb_sectors = bdrv_getlength(bs) >> 9;
- offset = 0;
-
- while (nb_sectors) {
- num = MIN(nb_sectors, INT_MAX >> 9);
- ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
- &host_offset, &meta);
- if (ret < 0) {
- return ret;
- }
-
- ret = qcow2_alloc_cluster_link_l2(bs, meta);
- if (ret < 0) {
- qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
- QCOW2_DISCARD_NEVER);
- return ret;
- }
-
- /* There are no dependent requests, but we need to remove our request
- * from the list of in-flight requests */
- if (meta != NULL) {
- QLIST_REMOVE(meta, next_in_flight);
- }
-
- /* TODO Preallocate data if requested */
-
- nb_sectors -= num;
- offset += num << 9;
- }
-
- /*
- * It is expected that the image file is large enough to actually contain
- * all of the allocated clusters (otherwise we get failing reads after
- * EOF). Extend the image to the last allocated sector.
- */
- if (host_offset != 0) {
- uint8_t buf[512];
- memset(buf, 0, 512);
- ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
- if (ret < 0) {
- return ret;
- }
- }
-
- return 0;
-}
-
-static int qcow2_create2(const char *filename, int64_t total_size,
- const char *backing_file, const char *backing_format,
- int flags, size_t cluster_size, int prealloc,
- QEMUOptionParameter *options, int version)
-{
- /* Calculate cluster_bits */
- int cluster_bits;
- cluster_bits = ffs(cluster_size) - 1;
- if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
- (1 << cluster_bits) != cluster_size)
- {
- error_report(
- "Cluster size must be a power of two between %d and %dk",
- 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
- return -EINVAL;
- }
-
- /*
- * Open the image file and write a minimal qcow2 header.
- *
- * We keep things simple and start with a zero-sized image. We also
- * do without refcount blocks or a L1 table for now. We'll fix the
- * inconsistency later.
- *
- * We do need a refcount table because growing the refcount table means
- * allocating two new refcount blocks - the seconds of which would be at
- * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
- * size for any qcow2 image.
- */
- BlockDriverState* bs;
- QCowHeader header;
- uint8_t* refcount_table;
- int ret;
-
- ret = bdrv_create_file(filename, options);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
- if (ret < 0) {
- return ret;
- }
-
- /* Write the header */
- memset(&header, 0, sizeof(header));
- header.magic = cpu_to_be32(QCOW_MAGIC);
- header.version = cpu_to_be32(version);
- header.cluster_bits = cpu_to_be32(cluster_bits);
- header.size = cpu_to_be64(0);
- header.l1_table_offset = cpu_to_be64(0);
- header.l1_size = cpu_to_be32(0);
- header.refcount_table_offset = cpu_to_be64(cluster_size);
- header.refcount_table_clusters = cpu_to_be32(1);
- header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
- header.header_length = cpu_to_be32(sizeof(header));
-
- if (flags & BLOCK_FLAG_ENCRYPT) {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
- } else {
- header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
- }
-
- if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
- header.compatible_features |=
- cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
- }
-
- ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
- if (ret < 0) {
- goto out;
- }
-
- /* Write an empty refcount table */
- refcount_table = g_malloc0(cluster_size);
- ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
- g_free(refcount_table);
-
- if (ret < 0) {
- goto out;
- }
-
- bdrv_close(bs);
-
- /*
- * And now open the image and make it consistent first (i.e. increase the
- * refcount of the cluster that is occupied by the header and the refcount
- * table)
- */
- BlockDriver* drv = bdrv_find_format("qcow2");
- assert(drv != NULL);
- ret = bdrv_open(bs, filename, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv);
- if (ret < 0) {
- goto out;
- }
-
- ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
- if (ret < 0) {
- goto out;
-
- } else if (ret != 0) {
- error_report("Huh, first cluster in empty image is already in use?");
- abort();
- }
-
- /* Okay, now that we have a valid image, let's give it the right size */
- ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
- if (ret < 0) {
- goto out;
- }
-
- /* Want a backing file? There you go.*/
- if (backing_file) {
- ret = bdrv_change_backing_file(bs, backing_file, backing_format);
- if (ret < 0) {
- goto out;
- }
- }
-
- /* And if we're supposed to preallocate metadata, do that now */
- if (prealloc) {
- BDRVQcowState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = preallocate(bs);
- qemu_co_mutex_unlock(&s->lock);
- if (ret < 0) {
- goto out;
- }
- }
-
- ret = 0;
-out:
- bdrv_delete(bs);
- return ret;
-}
-
-static int qcow2_create(const char *filename, QEMUOptionParameter *options)
-{
- const char *backing_file = NULL;
- const char *backing_fmt = NULL;
- uint64_t sectors = 0;
- int flags = 0;
- size_t cluster_size = DEFAULT_CLUSTER_SIZE;
- int prealloc = 0;
- int version = 2;
-
- /* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- sectors = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
- backing_fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
- flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- cluster_size = options->value.n;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
- if (!options->value.s || !strcmp(options->value.s, "off")) {
- prealloc = 0;
- } else if (!strcmp(options->value.s, "metadata")) {
- prealloc = 1;
- } else {
- fprintf(stderr, "Invalid preallocation mode: '%s'\n",
- options->value.s);
- return -EINVAL;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
- if (!options->value.s || !strcmp(options->value.s, "0.10")) {
- version = 2;
- } else if (!strcmp(options->value.s, "1.1")) {
- version = 3;
- } else {
- fprintf(stderr, "Invalid compatibility level: '%s'\n",
- options->value.s);
- return -EINVAL;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
- flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
- }
- options++;
- }
-
- if (backing_file && prealloc) {
- fprintf(stderr, "Backing file and preallocation cannot be used at "
- "the same time\n");
- return -EINVAL;
- }
-
- if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
- fprintf(stderr, "Lazy refcounts only supported with compatibility "
- "level 1.1 and above (use compat=1.1 or greater)\n");
- return -EINVAL;
- }
-
- return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
- cluster_size, prealloc, options, version);
-}
-
-static int qcow2_make_empty(BlockDriverState *bs)
-{
-#if 0
- /* XXX: not correct */
- BDRVQcowState *s = bs->opaque;
- uint32_t l1_length = s->l1_size * sizeof(uint64_t);
- int ret;
-
- memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
- return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
- if (ret < 0)
- return ret;
-
- l2_cache_reset(bs);
-#endif
- return 0;
-}
-
-static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- int ret;
- BDRVQcowState *s = bs->opaque;
-
- /* Emulate misaligned zero writes */
- if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
- return -ENOTSUP;
- }
-
- /* Whatever is left can use real zero clusters */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
-
- return ret;
-}
-
-static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
-{
- int ret;
- BDRVQcowState *s = bs->opaque;
-
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
- return ret;
-}
-
-static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t new_l1_size;
- int ret;
-
- if (offset & 511) {
- error_report("The new size must be a multiple of 512");
- return -EINVAL;
- }
-
- /* cannot proceed if image has snapshots */
- if (s->nb_snapshots) {
- error_report("Can't resize an image which has snapshots");
- return -ENOTSUP;
- }
-
- /* shrinking is currently not supported */
- if (offset < bs->total_sectors * 512) {
- error_report("qcow2 doesn't support shrinking images yet");
- return -ENOTSUP;
- }
-
- new_l1_size = size_to_l1(s, offset);
- ret = qcow2_grow_l1_table(bs, new_l1_size, true);
- if (ret < 0) {
- return ret;
- }
-
- /* write updated header.size */
- offset = cpu_to_be64(offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
- &offset, sizeof(uint64_t));
- if (ret < 0) {
- return ret;
- }
-
- s->l1_vm_state_index = new_l1_size;
- return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
- tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- z_stream strm;
- int ret, out_len;
- uint8_t *out_buf;
- uint64_t cluster_offset;
-
- if (nb_sectors == 0) {
- /* align end of file to a sector boundary to ease reading with
- sector based I/Os */
- cluster_offset = bdrv_getlength(bs->file);
- cluster_offset = (cluster_offset + 511) & ~511;
- bdrv_truncate(bs->file, cluster_offset);
- return 0;
- }
-
- if (nb_sectors != s->cluster_sectors) {
- ret = -EINVAL;
-
- /* Zero-pad last write if image size is not cluster aligned */
- if (sector_num + nb_sectors == bs->total_sectors &&
- nb_sectors < s->cluster_sectors) {
- uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
- memset(pad_buf, 0, s->cluster_size);
- memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
- ret = qcow2_write_compressed(bs, sector_num,
- pad_buf, s->cluster_sectors);
- qemu_vfree(pad_buf);
- }
- return ret;
- }
-
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
- /* best compression, small window, no zlib header */
- memset(&strm, 0, sizeof(strm));
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
- Z_DEFLATED, -12,
- 9, Z_DEFAULT_STRATEGY);
- if (ret != 0) {
- ret = -EINVAL;
- goto fail;
- }
-
- strm.avail_in = s->cluster_size;
- strm.next_in = (uint8_t *)buf;
- strm.avail_out = s->cluster_size;
- strm.next_out = out_buf;
-
- ret = deflate(&strm, Z_FINISH);
- if (ret != Z_STREAM_END && ret != Z_OK) {
- deflateEnd(&strm);
- ret = -EINVAL;
- goto fail;
- }
- out_len = strm.next_out - out_buf;
-
- deflateEnd(&strm);
-
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
- /* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
- if (ret < 0) {
- goto fail;
- }
- } else {
- cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
- sector_num << 9, out_len);
- if (!cluster_offset) {
- ret = -EIO;
- goto fail;
- }
- cluster_offset &= s->cluster_offset_mask;
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
- if (ret < 0) {
- goto fail;
- }
- }
-
- ret = 0;
-fail:
- g_free(out_buf);
- return ret;
-}
-
-static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int ret;
-
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
- if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
- }
-
- if (qcow2_need_accurate_refcounts(s)) {
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
- if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
- }
- }
- qemu_co_mutex_unlock(&s->lock);
-
- return 0;
-}
-
-static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
-{
- return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
-}
-
-static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQcowState *s = bs->opaque;
- bdi->cluster_size = s->cluster_size;
- bdi->vm_state_offset = qcow2_vm_state_offset(s);
- return 0;
-}
-
-#if 0
-static void dump_refcounts(BlockDriverState *bs)
-{
- BDRVQcowState *s = bs->opaque;
- int64_t nb_clusters, k, k1, size;
- int refcount;
-
- size = bdrv_getlength(bs->file);
- nb_clusters = size_to_clusters(s, size);
- for(k = 0; k < nb_clusters;) {
- k1 = k;
- refcount = get_refcount(bs, k);
- k++;
- while (k < nb_clusters && get_refcount(bs, k) == refcount)
- k++;
- printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
- k - k1);
- }
-}
-#endif
-
-static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t pos)
-{
- BDRVQcowState *s = bs->opaque;
- int growable = bs->growable;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- bs->growable = 1;
- ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
- bs->growable = growable;
-
- return ret;
-}
-
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
-{
- BDRVQcowState *s = bs->opaque;
- int growable = bs->growable;
- int ret;
-
- BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
- bs->growable = 1;
- ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
- bs->growable = growable;
-
- return ret;
-}
-
-static QEMUOptionParameter qcow2_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_COMPAT_LEVEL,
- .type = OPT_STRING,
- .help = "Compatibility level (0.10 or 1.1)"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_BACKING_FMT,
- .type = OPT_STRING,
- .help = "Image format of the base image"
- },
- {
- .name = BLOCK_OPT_ENCRYPT,
- .type = OPT_FLAG,
- .help = "Encrypt the image"
- },
- {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "qcow2 cluster size",
- .value = { .n = DEFAULT_CLUSTER_SIZE },
- },
- {
- .name = BLOCK_OPT_PREALLOC,
- .type = OPT_STRING,
- .help = "Preallocation mode (allowed values: off, metadata)"
- },
- {
- .name = BLOCK_OPT_LAZY_REFCOUNTS,
- .type = OPT_FLAG,
- .help = "Postpone refcount updates",
- },
- { NULL }
-};
-
-static BlockDriver bdrv_qcow2 = {
- .format_name = "qcow2",
- .instance_size = sizeof(BDRVQcowState),
- .bdrv_probe = qcow2_probe,
- .bdrv_open = qcow2_open,
- .bdrv_close = qcow2_close,
- .bdrv_reopen_prepare = qcow2_reopen_prepare,
- .bdrv_create = qcow2_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = qcow2_co_is_allocated,
- .bdrv_set_key = qcow2_set_key,
- .bdrv_make_empty = qcow2_make_empty,
-
- .bdrv_co_readv = qcow2_co_readv,
- .bdrv_co_writev = qcow2_co_writev,
- .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
-
- .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
- .bdrv_co_discard = qcow2_co_discard,
- .bdrv_truncate = qcow2_truncate,
- .bdrv_write_compressed = qcow2_write_compressed,
-
- .bdrv_snapshot_create = qcow2_snapshot_create,
- .bdrv_snapshot_goto = qcow2_snapshot_goto,
- .bdrv_snapshot_delete = qcow2_snapshot_delete,
- .bdrv_snapshot_list = qcow2_snapshot_list,
- .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
- .bdrv_get_info = qcow2_get_info,
-
- .bdrv_save_vmstate = qcow2_save_vmstate,
- .bdrv_load_vmstate = qcow2_load_vmstate,
-
- .bdrv_change_backing_file = qcow2_change_backing_file,
-
- .bdrv_invalidate_cache = qcow2_invalidate_cache,
-
- .create_options = qcow2_create_options,
- .bdrv_check = qcow2_check,
-};
-
-static void bdrv_qcow2_init(void)
-{
- bdrv_register(&bdrv_qcow2);
-}
-
-block_init(bdrv_qcow2_init);
diff --git a/contrib/qemu/block/qcow2.h b/contrib/qemu/block/qcow2.h
deleted file mode 100644
index 3b2d5cda71f..00000000000
--- a/contrib/qemu/block/qcow2.h
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef BLOCK_QCOW2_H
-#define BLOCK_QCOW2_H
-
-#include "qemu/aes.h"
-#include "block/coroutine.h"
-
-//#define DEBUG_ALLOC
-//#define DEBUG_ALLOC2
-//#define DEBUG_EXT
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES 1
-
-#define QCOW_MAX_CRYPT_CLUSTERS 32
-
-/* indicate that the refcount of the referenced cluster is exactly one. */
-#define QCOW_OFLAG_COPIED (1LL << 63)
-/* indicate that the cluster is compressed (they never have the copied flag) */
-#define QCOW_OFLAG_COMPRESSED (1LL << 62)
-/* The cluster reads as all zeros */
-#define QCOW_OFLAG_ZERO (1LL << 0)
-
-#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
-
-#define MIN_CLUSTER_BITS 9
-#define MAX_CLUSTER_BITS 21
-
-#define L2_CACHE_SIZE 16
-
-/* Must be at least 4 to cover all cases of refcount table growth */
-#define REFCOUNT_CACHE_SIZE 4
-
-#define DEFAULT_CLUSTER_SIZE 65536
-
-
-#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request"
-#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot"
-#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other"
-
-typedef struct QCowHeader {
- uint32_t magic;
- uint32_t version;
- uint64_t backing_file_offset;
- uint32_t backing_file_size;
- uint32_t cluster_bits;
- uint64_t size; /* in bytes */
- uint32_t crypt_method;
- uint32_t l1_size; /* XXX: save number of clusters instead ? */
- uint64_t l1_table_offset;
- uint64_t refcount_table_offset;
- uint32_t refcount_table_clusters;
- uint32_t nb_snapshots;
- uint64_t snapshots_offset;
-
- /* The following fields are only valid for version >= 3 */
- uint64_t incompatible_features;
- uint64_t compatible_features;
- uint64_t autoclear_features;
-
- uint32_t refcount_order;
- uint32_t header_length;
-} QCowHeader;
-
-typedef struct QCowSnapshot {
- uint64_t l1_table_offset;
- uint32_t l1_size;
- char *id_str;
- char *name;
- uint64_t disk_size;
- uint64_t vm_state_size;
- uint32_t date_sec;
- uint32_t date_nsec;
- uint64_t vm_clock_nsec;
-} QCowSnapshot;
-
-struct Qcow2Cache;
-typedef struct Qcow2Cache Qcow2Cache;
-
-typedef struct Qcow2UnknownHeaderExtension {
- uint32_t magic;
- uint32_t len;
- QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
- uint8_t data[];
-} Qcow2UnknownHeaderExtension;
-
-enum {
- QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
- QCOW2_FEAT_TYPE_COMPATIBLE = 1,
- QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
-};
-
-/* Incompatible feature bits */
-enum {
- QCOW2_INCOMPAT_DIRTY_BITNR = 0,
- QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
-
- QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
-};
-
-/* Compatible feature bits */
-enum {
- QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
- QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
-
- QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
-};
-
-enum qcow2_discard_type {
- QCOW2_DISCARD_NEVER = 0,
- QCOW2_DISCARD_ALWAYS,
- QCOW2_DISCARD_REQUEST,
- QCOW2_DISCARD_SNAPSHOT,
- QCOW2_DISCARD_OTHER,
- QCOW2_DISCARD_MAX
-};
-
-typedef struct Qcow2Feature {
- uint8_t type;
- uint8_t bit;
- char name[46];
-} QEMU_PACKED Qcow2Feature;
-
-typedef struct Qcow2DiscardRegion {
- BlockDriverState *bs;
- uint64_t offset;
- uint64_t bytes;
- QTAILQ_ENTRY(Qcow2DiscardRegion) next;
-} Qcow2DiscardRegion;
-
-typedef struct BDRVQcowState {
- int cluster_bits;
- int cluster_size;
- int cluster_sectors;
- int l2_bits;
- int l2_size;
- int l1_size;
- int l1_vm_state_index;
- int csize_shift;
- int csize_mask;
- uint64_t cluster_offset_mask;
- uint64_t l1_table_offset;
- uint64_t *l1_table;
-
- Qcow2Cache* l2_table_cache;
- Qcow2Cache* refcount_block_cache;
-
- uint8_t *cluster_cache;
- uint8_t *cluster_data;
- uint64_t cluster_cache_offset;
- QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
-
- uint64_t *refcount_table;
- uint64_t refcount_table_offset;
- uint32_t refcount_table_size;
- int64_t free_cluster_index;
- int64_t free_byte_offset;
-
- CoMutex lock;
-
- uint32_t crypt_method; /* current crypt method, 0 if no key yet */
- uint32_t crypt_method_header;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- uint64_t snapshots_offset;
- int snapshots_size;
- int nb_snapshots;
- QCowSnapshot *snapshots;
-
- int flags;
- int qcow_version;
- bool use_lazy_refcounts;
-
- bool discard_passthrough[QCOW2_DISCARD_MAX];
-
- uint64_t incompatible_features;
- uint64_t compatible_features;
- uint64_t autoclear_features;
-
- size_t unknown_header_fields_size;
- void* unknown_header_fields;
- QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
- QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
- bool cache_discards;
-} BDRVQcowState;
-
-/* XXX: use std qcow open function ? */
-typedef struct QCowCreateState {
- int cluster_size;
- int cluster_bits;
- uint16_t *refcount_block;
- uint64_t *refcount_table;
- int64_t l1_table_offset;
- int64_t refcount_table_offset;
- int64_t refcount_block_offset;
-} QCowCreateState;
-
-struct QCowAIOCB;
-
-typedef struct Qcow2COWRegion {
- /**
- * Offset of the COW region in bytes from the start of the first cluster
- * touched by the request.
- */
- uint64_t offset;
-
- /** Number of sectors to copy */
- int nb_sectors;
-} Qcow2COWRegion;
-
-/**
- * Describes an in-flight (part of a) write request that writes to clusters
- * that are not referenced in their L2 table yet.
- */
-typedef struct QCowL2Meta
-{
- /** Guest offset of the first newly allocated cluster */
- uint64_t offset;
-
- /** Host offset of the first newly allocated cluster */
- uint64_t alloc_offset;
-
- /**
- * Number of sectors from the start of the first allocated cluster to
- * the end of the (possibly shortened) request
- */
- int nb_available;
-
- /** Number of newly allocated clusters */
- int nb_clusters;
-
- /**
- * Requests that overlap with this allocation and wait to be restarted
- * when the allocating request has completed.
- */
- CoQueue dependent_requests;
-
- /**
- * The COW Region between the start of the first allocated cluster and the
- * area the guest actually writes to.
- */
- Qcow2COWRegion cow_start;
-
- /**
- * The COW Region between the area the guest actually writes to and the
- * end of the last allocated cluster.
- */
- Qcow2COWRegion cow_end;
-
- /** Pointer to next L2Meta of the same write request */
- struct QCowL2Meta *next;
-
- QLIST_ENTRY(QCowL2Meta) next_in_flight;
-} QCowL2Meta;
-
-enum {
- QCOW2_CLUSTER_UNALLOCATED,
- QCOW2_CLUSTER_NORMAL,
- QCOW2_CLUSTER_COMPRESSED,
- QCOW2_CLUSTER_ZERO
-};
-
-#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
-
-#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
-
-static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
-{
- return offset & ~(s->cluster_size - 1);
-}
-
-static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
-{
- return offset & (s->cluster_size - 1);
-}
-
-static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
-{
- return (size + (s->cluster_size - 1)) >> s->cluster_bits;
-}
-
-static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
-{
- int shift = s->cluster_bits + s->l2_bits;
- return (size + (1ULL << shift) - 1) >> shift;
-}
-
-static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
-{
- return (offset >> s->cluster_bits) & (s->l2_size - 1);
-}
-
-static inline int64_t align_offset(int64_t offset, int n)
-{
- offset = (offset + n - 1) & ~(n - 1);
- return offset;
-}
-
-static inline int qcow2_get_cluster_type(uint64_t l2_entry)
-{
- if (l2_entry & QCOW_OFLAG_COMPRESSED) {
- return QCOW2_CLUSTER_COMPRESSED;
- } else if (l2_entry & QCOW_OFLAG_ZERO) {
- return QCOW2_CLUSTER_ZERO;
- } else if (!(l2_entry & L2E_OFFSET_MASK)) {
- return QCOW2_CLUSTER_UNALLOCATED;
- } else {
- return QCOW2_CLUSTER_NORMAL;
- }
-}
-
-/* Check whether refcounts are eager or lazy */
-static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
-{
- return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
-}
-
-static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
-{
- return m->offset + m->cow_start.offset;
-}
-
-static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
-{
- return m->offset + m->cow_end.offset
- + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
-}
-
-// FIXME Need qcow2_ prefix to global functions
-
-/* qcow2.c functions */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors);
-
-int qcow2_mark_dirty(BlockDriverState *bs);
-int qcow2_update_header(BlockDriverState *bs);
-
-/* qcow2-refcount.c functions */
-int qcow2_refcount_init(BlockDriverState *bs);
-void qcow2_refcount_close(BlockDriverState *bs);
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters);
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
-void qcow2_free_clusters(BlockDriverState *bs,
- int64_t offset, int64_t size,
- enum qcow2_discard_type type);
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
- int nb_clusters, enum qcow2_discard_type type);
-
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
- int64_t l1_table_offset, int l1_size, int addend);
-
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix);
-
-void qcow2_process_discards(BlockDriverState *bs, int ret);
-
-/* qcow2-cluster.c functions */
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
- bool exact_size);
-void qcow2_l2_cache_reset(BlockDriverState *bs);
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key);
-
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset);
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
- uint64_t offset,
- int compressed_size);
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
-
-/* qcow2-snapshot.c functions */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);
-
-void qcow2_free_snapshots(BlockDriverState *bs);
-int qcow2_read_snapshots(BlockDriverState *bs);
-
-/* qcow2-cache.c functions */
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
-
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
- Qcow2Cache *dependency);
-void qcow2_cache_depends_on_flush(Qcow2Cache *c);
-
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table);
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
- void **table);
-int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
-
-#endif
diff --git a/contrib/qemu/block/qed-check.c b/contrib/qemu/block/qed-check.c
deleted file mode 100644
index b473dcd61f6..00000000000
--- a/contrib/qemu/block/qed-check.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Consistency Check
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-typedef struct {
- BDRVQEDState *s;
- BdrvCheckResult *result;
- bool fix; /* whether to fix invalid offsets */
-
- uint64_t nclusters;
- uint32_t *used_clusters; /* referenced cluster bitmap */
-
- QEDRequest request;
-} QEDCheck;
-
-static bool qed_test_bit(uint32_t *bitmap, uint64_t n) {
- return !!(bitmap[n / 32] & (1 << (n % 32)));
-}
-
-static void qed_set_bit(uint32_t *bitmap, uint64_t n) {
- bitmap[n / 32] |= 1 << (n % 32);
-}
-
-/**
- * Set bitmap bits for clusters
- *
- * @check: Check structure
- * @offset: Starting offset in bytes
- * @n: Number of clusters
- */
-static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset,
- unsigned int n)
-{
- uint64_t cluster = qed_bytes_to_clusters(check->s, offset);
- unsigned int corruptions = 0;
-
- while (n-- != 0) {
- /* Clusters should only be referenced once */
- if (qed_test_bit(check->used_clusters, cluster)) {
- corruptions++;
- }
-
- qed_set_bit(check->used_clusters, cluster);
- cluster++;
- }
-
- check->result->corruptions += corruptions;
- return corruptions == 0;
-}
-
-/**
- * Check an L2 table
- *
- * @ret: Number of invalid cluster offsets
- */
-static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
-{
- BDRVQEDState *s = check->s;
- unsigned int i, num_invalid = 0;
- uint64_t last_offset = 0;
-
- for (i = 0; i < s->table_nelems; i++) {
- uint64_t offset = table->offsets[i];
-
- if (qed_offset_is_unalloc_cluster(offset) ||
- qed_offset_is_zero_cluster(offset)) {
- continue;
- }
- check->result->bfi.allocated_clusters++;
- if (last_offset && (last_offset + s->header.cluster_size != offset)) {
- check->result->bfi.fragmented_clusters++;
- }
- last_offset = offset;
-
- /* Detect invalid cluster offset */
- if (!qed_check_cluster_offset(s, offset)) {
- if (check->fix) {
- table->offsets[i] = 0;
- check->result->corruptions_fixed++;
- } else {
- check->result->corruptions++;
- }
-
- num_invalid++;
- continue;
- }
-
- qed_set_used_clusters(check, offset, 1);
- }
-
- return num_invalid;
-}
-
-/**
- * Descend tables and check each cluster is referenced once only
- */
-static int qed_check_l1_table(QEDCheck *check, QEDTable *table)
-{
- BDRVQEDState *s = check->s;
- unsigned int i, num_invalid_l1 = 0;
- int ret, last_error = 0;
-
- /* Mark L1 table clusters used */
- qed_set_used_clusters(check, s->header.l1_table_offset,
- s->header.table_size);
-
- for (i = 0; i < s->table_nelems; i++) {
- unsigned int num_invalid_l2;
- uint64_t offset = table->offsets[i];
-
- if (qed_offset_is_unalloc_cluster(offset)) {
- continue;
- }
-
- /* Detect invalid L2 offset */
- if (!qed_check_table_offset(s, offset)) {
- /* Clear invalid offset */
- if (check->fix) {
- table->offsets[i] = 0;
- check->result->corruptions_fixed++;
- } else {
- check->result->corruptions++;
- }
-
- num_invalid_l1++;
- continue;
- }
-
- if (!qed_set_used_clusters(check, offset, s->header.table_size)) {
- continue; /* skip an invalid table */
- }
-
- ret = qed_read_l2_table_sync(s, &check->request, offset);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- continue;
- }
-
- num_invalid_l2 = qed_check_l2_table(check,
- check->request.l2_table->table);
-
- /* Write out fixed L2 table */
- if (num_invalid_l2 > 0 && check->fix) {
- ret = qed_write_l2_table_sync(s, &check->request, 0,
- s->table_nelems, false);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- continue;
- }
- }
- }
-
- /* Drop reference to final table */
- qed_unref_l2_cache_entry(check->request.l2_table);
- check->request.l2_table = NULL;
-
- /* Write out fixed L1 table */
- if (num_invalid_l1 > 0 && check->fix) {
- ret = qed_write_l1_table_sync(s, 0, s->table_nelems);
- if (ret) {
- check->result->check_errors++;
- last_error = ret;
- }
- }
-
- return last_error;
-}
-
-/**
- * Check for unreferenced (leaked) clusters
- */
-static void qed_check_for_leaks(QEDCheck *check)
-{
- BDRVQEDState *s = check->s;
- uint64_t i;
-
- for (i = s->header.header_size; i < check->nclusters; i++) {
- if (!qed_test_bit(check->used_clusters, i)) {
- check->result->leaks++;
- }
- }
-}
-
-/**
- * Mark an image clean once it passes check or has been repaired
- */
-static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
-{
- /* Skip if there were unfixable corruptions or I/O errors */
- if (result->corruptions > 0 || result->check_errors > 0) {
- return;
- }
-
- /* Skip if image is already marked clean */
- if (!(s->header.features & QED_F_NEED_CHECK)) {
- return;
- }
-
- /* Ensure fixes reach storage before clearing check bit */
- bdrv_flush(s->bs);
-
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header_sync(s);
-}
-
-int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
-{
- QEDCheck check = {
- .s = s,
- .result = result,
- .nclusters = qed_bytes_to_clusters(s, s->file_size),
- .request = { .l2_table = NULL },
- .fix = fix,
- };
- int ret;
-
- check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
- sizeof(check.used_clusters[0]));
-
- check.result->bfi.total_clusters =
- (s->header.image_size + s->header.cluster_size - 1) /
- s->header.cluster_size;
- ret = qed_check_l1_table(&check, s->l1_table);
- if (ret == 0) {
- /* Only check for leaks if entire image was scanned successfully */
- qed_check_for_leaks(&check);
-
- if (fix) {
- qed_check_mark_clean(s, result);
- }
- }
-
- g_free(check.used_clusters);
- return ret;
-}
diff --git a/contrib/qemu/block/qed-cluster.c b/contrib/qemu/block/qed-cluster.c
deleted file mode 100644
index f64b2af8f7e..00000000000
--- a/contrib/qemu/block/qed-cluster.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Cluster functions
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-/**
- * Count the number of contiguous data clusters
- *
- * @s: QED state
- * @table: L2 table
- * @index: First cluster index
- * @n: Maximum number of clusters
- * @offset: Set to first cluster offset
- *
- * This function scans tables for contiguous clusters. A contiguous run of
- * clusters may be allocated, unallocated, or zero.
- */
-static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
- QEDTable *table,
- unsigned int index,
- unsigned int n,
- uint64_t *offset)
-{
- unsigned int end = MIN(index + n, s->table_nelems);
- uint64_t last = table->offsets[index];
- unsigned int i;
-
- *offset = last;
-
- for (i = index + 1; i < end; i++) {
- if (qed_offset_is_unalloc_cluster(last)) {
- /* Counting unallocated clusters */
- if (!qed_offset_is_unalloc_cluster(table->offsets[i])) {
- break;
- }
- } else if (qed_offset_is_zero_cluster(last)) {
- /* Counting zero clusters */
- if (!qed_offset_is_zero_cluster(table->offsets[i])) {
- break;
- }
- } else {
- /* Counting allocated clusters */
- if (table->offsets[i] != last + s->header.cluster_size) {
- break;
- }
- last = table->offsets[i];
- }
- }
- return i - index;
-}
-
-typedef struct {
- BDRVQEDState *s;
- uint64_t pos;
- size_t len;
-
- QEDRequest *request;
-
- /* User callback */
- QEDFindClusterFunc *cb;
- void *opaque;
-} QEDFindClusterCB;
-
-static void qed_find_cluster_cb(void *opaque, int ret)
-{
- QEDFindClusterCB *find_cluster_cb = opaque;
- BDRVQEDState *s = find_cluster_cb->s;
- QEDRequest *request = find_cluster_cb->request;
- uint64_t offset = 0;
- size_t len = 0;
- unsigned int index;
- unsigned int n;
-
- if (ret) {
- goto out;
- }
-
- index = qed_l2_index(s, find_cluster_cb->pos);
- n = qed_bytes_to_clusters(s,
- qed_offset_into_cluster(s, find_cluster_cb->pos) +
- find_cluster_cb->len);
- n = qed_count_contiguous_clusters(s, request->l2_table->table,
- index, n, &offset);
-
- if (qed_offset_is_unalloc_cluster(offset)) {
- ret = QED_CLUSTER_L2;
- } else if (qed_offset_is_zero_cluster(offset)) {
- ret = QED_CLUSTER_ZERO;
- } else if (qed_check_cluster_offset(s, offset)) {
- ret = QED_CLUSTER_FOUND;
- } else {
- ret = -EINVAL;
- }
-
- len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
- qed_offset_into_cluster(s, find_cluster_cb->pos));
-
-out:
- find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
- g_free(find_cluster_cb);
-}
-
-/**
- * Find the offset of a data cluster
- *
- * @s: QED state
- * @request: L2 cache entry
- * @pos: Byte position in device
- * @len: Number of bytes
- * @cb: Completion function
- * @opaque: User data for completion function
- *
- * This function translates a position in the block device to an offset in the
- * image file. It invokes the cb completion callback to report back the
- * translated offset or unallocated range in the image file.
- *
- * If the L2 table exists, request->l2_table points to the L2 table cache entry
- * and the caller must free the reference when they are finished. The cache
- * entry is exposed in this way to avoid callers having to read the L2 table
- * again later during request processing. If request->l2_table is non-NULL it
- * will be unreferenced before taking on the new cache entry.
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
- size_t len, QEDFindClusterFunc *cb, void *opaque)
-{
- QEDFindClusterCB *find_cluster_cb;
- uint64_t l2_offset;
-
- /* Limit length to L2 boundary. Requests are broken up at the L2 boundary
- * so that a request acts on one L2 table at a time.
- */
- len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
-
- l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
- if (qed_offset_is_unalloc_cluster(l2_offset)) {
- cb(opaque, QED_CLUSTER_L1, 0, len);
- return;
- }
- if (!qed_check_table_offset(s, l2_offset)) {
- cb(opaque, -EINVAL, 0, 0);
- return;
- }
-
- find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
- find_cluster_cb->s = s;
- find_cluster_cb->pos = pos;
- find_cluster_cb->len = len;
- find_cluster_cb->cb = cb;
- find_cluster_cb->opaque = opaque;
- find_cluster_cb->request = request;
-
- qed_read_l2_table(s, request, l2_offset,
- qed_find_cluster_cb, find_cluster_cb);
-}
diff --git a/contrib/qemu/block/qed-gencb.c b/contrib/qemu/block/qed-gencb.c
deleted file mode 100644
index 7d7ac1ffc8e..00000000000
--- a/contrib/qemu/block/qed-gencb.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qed.h"
-
-void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
-{
- GenericCB *gencb = g_malloc(len);
- gencb->cb = cb;
- gencb->opaque = opaque;
- return gencb;
-}
-
-void gencb_complete(void *opaque, int ret)
-{
- GenericCB *gencb = opaque;
- BlockDriverCompletionFunc *cb = gencb->cb;
- void *user_opaque = gencb->opaque;
-
- g_free(gencb);
- cb(user_opaque, ret);
-}
diff --git a/contrib/qemu/block/qed-l2-cache.c b/contrib/qemu/block/qed-l2-cache.c
deleted file mode 100644
index e9b2aae44d9..00000000000
--- a/contrib/qemu/block/qed-l2-cache.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * QEMU Enhanced Disk Format L2 Cache
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-/*
- * L2 table cache usage is as follows:
- *
- * An open image has one L2 table cache that is used to avoid accessing the
- * image file for recently referenced L2 tables.
- *
- * Cluster offset lookup translates the logical offset within the block device
- * to a cluster offset within the image file. This is done by indexing into
- * the L1 and L2 tables which store cluster offsets. It is here where the L2
- * table cache serves up recently referenced L2 tables.
- *
- * If there is a cache miss, that L2 table is read from the image file and
- * committed to the cache. Subsequent accesses to that L2 table will be served
- * from the cache until the table is evicted from the cache.
- *
- * L2 tables are also committed to the cache when new L2 tables are allocated
- * in the image file. Since the L2 table cache is write-through, the new L2
- * table is first written out to the image file and then committed to the
- * cache.
- *
- * Multiple I/O requests may be using an L2 table cache entry at any given
- * time. That means an entry may be in use across several requests and
- * reference counting is needed to free the entry at the correct time. In
- * particular, an entry evicted from the cache will only be freed once all
- * references are dropped.
- *
- * An in-flight I/O request will hold a reference to a L2 table cache entry for
- * the period during which it needs to access the L2 table. This includes
- * cluster offset lookup, L2 table allocation, and L2 table update when a new
- * data cluster has been allocated.
- *
- * An interesting case occurs when two requests need to access an L2 table that
- * is not in the cache. Since the operation to read the table from the image
- * file takes some time to complete, both requests may see a cache miss and
- * start reading the L2 table from the image file. The first to finish will
- * commit its L2 table into the cache. When the second tries to commit its
- * table will be deleted in favor of the existing cache entry.
- */
-
-#include "trace.h"
-#include "qed.h"
-
-/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */
-#define MAX_L2_CACHE_SIZE 50
-
-/**
- * Initialize the L2 cache
- */
-void qed_init_l2_cache(L2TableCache *l2_cache)
-{
- QTAILQ_INIT(&l2_cache->entries);
- l2_cache->n_entries = 0;
-}
-
-/**
- * Free the L2 cache
- */
-void qed_free_l2_cache(L2TableCache *l2_cache)
-{
- CachedL2Table *entry, *next_entry;
-
- QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) {
- qemu_vfree(entry->table);
- g_free(entry);
- }
-}
-
-/**
- * Allocate an uninitialized entry from the cache
- *
- * The returned entry has a reference count of 1 and is owned by the caller.
- * The caller must allocate the actual table field for this entry and it must
- * be freeable using qemu_vfree().
- */
-CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
-{
- CachedL2Table *entry;
-
- entry = g_malloc0(sizeof(*entry));
- entry->ref++;
-
- trace_qed_alloc_l2_cache_entry(l2_cache, entry);
-
- return entry;
-}
-
-/**
- * Decrease an entry's reference count and free if necessary when the reference
- * count drops to zero.
- */
-void qed_unref_l2_cache_entry(CachedL2Table *entry)
-{
- if (!entry) {
- return;
- }
-
- entry->ref--;
- trace_qed_unref_l2_cache_entry(entry, entry->ref);
- if (entry->ref == 0) {
- qemu_vfree(entry->table);
- g_free(entry);
- }
-}
-
-/**
- * Find an entry in the L2 cache. This may return NULL and it's up to the
- * caller to satisfy the cache miss.
- *
- * For a cached entry, this function increases the reference count and returns
- * the entry.
- */
-CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
-{
- CachedL2Table *entry;
-
- QTAILQ_FOREACH(entry, &l2_cache->entries, node) {
- if (entry->offset == offset) {
- trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref);
- entry->ref++;
- return entry;
- }
- }
- return NULL;
-}
-
-/**
- * Commit an L2 cache entry into the cache. This is meant to be used as part of
- * the process to satisfy a cache miss. A caller would allocate an entry which
- * is not actually in the L2 cache and then once the entry was valid and
- * present on disk, the entry can be committed into the cache.
- *
- * Since the cache is write-through, it's important that this function is not
- * called until the entry is present on disk and the L1 has been updated to
- * point to the entry.
- *
- * N.B. This function steals a reference to the l2_table from the caller so the
- * caller must obtain a new reference by issuing a call to
- * qed_find_l2_cache_entry().
- */
-void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
-{
- CachedL2Table *entry;
-
- entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset);
- if (entry) {
- qed_unref_l2_cache_entry(entry);
- qed_unref_l2_cache_entry(l2_table);
- return;
- }
-
- /* Evict an unused cache entry so we have space. If all entries are in use
- * we can grow the cache temporarily and we try to shrink back down later.
- */
- if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
- CachedL2Table *next;
- QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
- if (entry->ref > 1) {
- continue;
- }
-
- QTAILQ_REMOVE(&l2_cache->entries, entry, node);
- l2_cache->n_entries--;
- qed_unref_l2_cache_entry(entry);
-
- /* Stop evicting when we've shrunk back to max size */
- if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
- break;
- }
- }
- }
-
- l2_cache->n_entries++;
- QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node);
-}
diff --git a/contrib/qemu/block/qed-table.c b/contrib/qemu/block/qed-table.c
deleted file mode 100644
index 76d2dcccf81..00000000000
--- a/contrib/qemu/block/qed-table.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Table I/O
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "trace.h"
-#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
-#include "qed.h"
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEDTable *table;
-
- struct iovec iov;
- QEMUIOVector qiov;
-} QEDReadTableCB;
-
-static void qed_read_table_cb(void *opaque, int ret)
-{
- QEDReadTableCB *read_table_cb = opaque;
- QEDTable *table = read_table_cb->table;
- int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
- int i;
-
- /* Handle I/O error */
- if (ret) {
- goto out;
- }
-
- /* Byteswap offsets */
- for (i = 0; i < noffsets; i++) {
- table->offsets[i] = le64_to_cpu(table->offsets[i]);
- }
-
-out:
- /* Completion */
- trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
- gencb_complete(&read_table_cb->gencb, ret);
-}
-
-static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
- cb, opaque);
- QEMUIOVector *qiov = &read_table_cb->qiov;
-
- trace_qed_read_table(s, offset, table);
-
- read_table_cb->s = s;
- read_table_cb->table = table;
- read_table_cb->iov.iov_base = table->offsets,
- read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
-
- qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
- bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
- qiov->size / BDRV_SECTOR_SIZE,
- qed_read_table_cb, read_table_cb);
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEDTable *orig_table;
- QEDTable *table;
- bool flush; /* flush after write? */
-
- struct iovec iov;
- QEMUIOVector qiov;
-} QEDWriteTableCB;
-
-static void qed_write_table_cb(void *opaque, int ret)
-{
- QEDWriteTableCB *write_table_cb = opaque;
-
- trace_qed_write_table_cb(write_table_cb->s,
- write_table_cb->orig_table,
- write_table_cb->flush,
- ret);
-
- if (ret) {
- goto out;
- }
-
- if (write_table_cb->flush) {
- /* We still need to flush first */
- write_table_cb->flush = false;
- bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
- write_table_cb);
- return;
- }
-
-out:
- qemu_vfree(write_table_cb->table);
- gencb_complete(&write_table_cb->gencb, ret);
-}
-
-/**
- * Write out an updated part or all of a table
- *
- * @s: QED state
- * @offset: Offset of table in image file, in bytes
- * @table: Table
- * @index: Index of first element
- * @n: Number of elements
- * @flush: Whether or not to sync to disk
- * @cb: Completion function
- * @opaque: Argument for completion function
- */
-static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDWriteTableCB *write_table_cb;
- unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
- unsigned int start, end, i;
- size_t len_bytes;
-
- trace_qed_write_table(s, offset, table, index, n);
-
- /* Calculate indices of the first and one after last elements */
- start = index & ~sector_mask;
- end = (index + n + sector_mask) & ~sector_mask;
-
- len_bytes = (end - start) * sizeof(uint64_t);
-
- write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
- write_table_cb->s = s;
- write_table_cb->orig_table = table;
- write_table_cb->flush = flush;
- write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
- write_table_cb->iov.iov_base = write_table_cb->table->offsets;
- write_table_cb->iov.iov_len = len_bytes;
- qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
-
- /* Byteswap table */
- for (i = start; i < end; i++) {
- uint64_t le_offset = cpu_to_le64(table->offsets[i]);
- write_table_cb->table->offsets[i - start] = le_offset;
- }
-
- /* Adjust for offset into table */
- offset += start * sizeof(uint64_t);
-
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
- &write_table_cb->qiov,
- write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
- qed_write_table_cb, write_table_cb);
-}
-
-/**
- * Propagate return value from async callback
- */
-static void qed_sync_cb(void *opaque, int ret)
-{
- *(int *)opaque = ret;
-}
-
-int qed_read_l1_table_sync(BDRVQEDState *s)
-{
- int ret = -EINPROGRESS;
-
- qed_read_table(s, s->header.l1_table_offset,
- s->l1_table, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
- qed_write_table(s, s->header.l1_table_offset,
- s->l1_table, index, n, false, cb, opaque);
-}
-
-int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
- unsigned int n)
-{
- int ret = -EINPROGRESS;
-
- qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- uint64_t l2_offset;
- QEDRequest *request;
-} QEDReadL2TableCB;
-
-static void qed_read_l2_table_cb(void *opaque, int ret)
-{
- QEDReadL2TableCB *read_l2_table_cb = opaque;
- QEDRequest *request = read_l2_table_cb->request;
- BDRVQEDState *s = read_l2_table_cb->s;
- CachedL2Table *l2_table = request->l2_table;
- uint64_t l2_offset = read_l2_table_cb->l2_offset;
-
- if (ret) {
- /* can't trust loaded L2 table anymore */
- qed_unref_l2_cache_entry(l2_table);
- request->l2_table = NULL;
- } else {
- l2_table->offset = l2_offset;
-
- qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
- /* This is guaranteed to succeed because we just committed the entry
- * to the cache.
- */
- request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
- assert(request->l2_table != NULL);
- }
-
- gencb_complete(&read_l2_table_cb->gencb, ret);
-}
-
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- QEDReadL2TableCB *read_l2_table_cb;
-
- qed_unref_l2_cache_entry(request->l2_table);
-
- /* Check for cached L2 entry */
- request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
- if (request->l2_table) {
- cb(opaque, 0);
- return;
- }
-
- request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
- request->l2_table->table = qed_alloc_table(s);
-
- read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
- read_l2_table_cb->s = s;
- read_l2_table_cb->l2_offset = offset;
- read_l2_table_cb->request = request;
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
- qed_read_table(s, offset, request->l2_table->table,
- qed_read_l2_table_cb, read_l2_table_cb);
-}
-
-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
-{
- int ret = -EINPROGRESS;
-
- qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
-
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
- qed_write_table(s, request->l2_table->offset,
- request->l2_table->table, index, n, flush, cb, opaque);
-}
-
-int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush)
-{
- int ret = -EINPROGRESS;
-
- qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
- while (ret == -EINPROGRESS) {
- qemu_aio_wait();
- }
-
- return ret;
-}
diff --git a/contrib/qemu/block/qed.c b/contrib/qemu/block/qed.c
deleted file mode 100644
index f767b0528ce..00000000000
--- a/contrib/qemu/block/qed.c
+++ /dev/null
@@ -1,1596 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/timer.h"
-#include "trace.h"
-#include "qed.h"
-#include "qapi/qmp/qerror.h"
-#include "migration/migration.h"
-
-static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- QEDAIOCB *acb = (QEDAIOCB *)blockacb;
- bool finished = false;
-
- /* Wait for the request to finish */
- acb->finished = &finished;
- while (!finished) {
- qemu_aio_wait();
- }
-}
-
-static const AIOCBInfo qed_aiocb_info = {
- .aiocb_size = sizeof(QEDAIOCB),
- .cancel = qed_aio_cancel,
-};
-
-static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
- const char *filename)
-{
- const QEDHeader *header = (const QEDHeader *)buf;
-
- if (buf_size < sizeof(*header)) {
- return 0;
- }
- if (le32_to_cpu(header->magic) != QED_MAGIC) {
- return 0;
- }
- return 100;
-}
-
-/**
- * Check whether an image format is raw
- *
- * @fmt: Backing file format, may be NULL
- */
-static bool qed_fmt_is_raw(const char *fmt)
-{
- return fmt && strcmp(fmt, "raw") == 0;
-}
-
-static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
-{
- cpu->magic = le32_to_cpu(le->magic);
- cpu->cluster_size = le32_to_cpu(le->cluster_size);
- cpu->table_size = le32_to_cpu(le->table_size);
- cpu->header_size = le32_to_cpu(le->header_size);
- cpu->features = le64_to_cpu(le->features);
- cpu->compat_features = le64_to_cpu(le->compat_features);
- cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
- cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
- cpu->image_size = le64_to_cpu(le->image_size);
- cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
- cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
-}
-
-static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
-{
- le->magic = cpu_to_le32(cpu->magic);
- le->cluster_size = cpu_to_le32(cpu->cluster_size);
- le->table_size = cpu_to_le32(cpu->table_size);
- le->header_size = cpu_to_le32(cpu->header_size);
- le->features = cpu_to_le64(cpu->features);
- le->compat_features = cpu_to_le64(cpu->compat_features);
- le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
- le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
- le->image_size = cpu_to_le64(cpu->image_size);
- le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
- le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
-}
-
-int qed_write_header_sync(BDRVQEDState *s)
-{
- QEDHeader le;
- int ret;
-
- qed_header_cpu_to_le(&s->header, &le);
- ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
- if (ret != sizeof(le)) {
- return ret;
- }
- return 0;
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- struct iovec iov;
- QEMUIOVector qiov;
- int nsectors;
- uint8_t *buf;
-} QEDWriteHeaderCB;
-
-static void qed_write_header_cb(void *opaque, int ret)
-{
- QEDWriteHeaderCB *write_header_cb = opaque;
-
- qemu_vfree(write_header_cb->buf);
- gencb_complete(write_header_cb, ret);
-}
-
-static void qed_write_header_read_cb(void *opaque, int ret)
-{
- QEDWriteHeaderCB *write_header_cb = opaque;
- BDRVQEDState *s = write_header_cb->s;
-
- if (ret) {
- qed_write_header_cb(write_header_cb, ret);
- return;
- }
-
- /* Update header */
- qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
-
- bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
- write_header_cb->nsectors, qed_write_header_cb,
- write_header_cb);
-}
-
-/**
- * Update header in-place (does not rewrite backing filename or other strings)
- *
- * This function only updates known header fields in-place and does not affect
- * extra data after the QED header.
- */
-static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
- void *opaque)
-{
- /* We must write full sectors for O_DIRECT but cannot necessarily generate
- * the data following the header if an unrecognized compat feature is
- * active. Therefore, first read the sectors containing the header, update
- * them, and write back.
- */
-
- int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
- BDRV_SECTOR_SIZE;
- size_t len = nsectors * BDRV_SECTOR_SIZE;
- QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
- cb, opaque);
-
- write_header_cb->s = s;
- write_header_cb->nsectors = nsectors;
- write_header_cb->buf = qemu_blockalign(s->bs, len);
- write_header_cb->iov.iov_base = write_header_cb->buf;
- write_header_cb->iov.iov_len = len;
- qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
-
- bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
- qed_write_header_read_cb, write_header_cb);
-}
-
-static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
-{
- uint64_t table_entries;
- uint64_t l2_size;
-
- table_entries = (table_size * cluster_size) / sizeof(uint64_t);
- l2_size = table_entries * cluster_size;
-
- return l2_size * table_entries;
-}
-
-static bool qed_is_cluster_size_valid(uint32_t cluster_size)
-{
- if (cluster_size < QED_MIN_CLUSTER_SIZE ||
- cluster_size > QED_MAX_CLUSTER_SIZE) {
- return false;
- }
- if (cluster_size & (cluster_size - 1)) {
- return false; /* not power of 2 */
- }
- return true;
-}
-
-static bool qed_is_table_size_valid(uint32_t table_size)
-{
- if (table_size < QED_MIN_TABLE_SIZE ||
- table_size > QED_MAX_TABLE_SIZE) {
- return false;
- }
- if (table_size & (table_size - 1)) {
- return false; /* not power of 2 */
- }
- return true;
-}
-
-static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
- uint32_t table_size)
-{
- if (image_size % BDRV_SECTOR_SIZE != 0) {
- return false; /* not multiple of sector size */
- }
- if (image_size > qed_max_image_size(cluster_size, table_size)) {
- return false; /* image is too large */
- }
- return true;
-}
-
-/**
- * Read a string of known length from the image file
- *
- * @file: Image file
- * @offset: File offset to start of string, in bytes
- * @n: String length in bytes
- * @buf: Destination buffer
- * @buflen: Destination buffer length in bytes
- * @ret: 0 on success, -errno on failure
- *
- * The string is NUL-terminated.
- */
-static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
- char *buf, size_t buflen)
-{
- int ret;
- if (n >= buflen) {
- return -EINVAL;
- }
- ret = bdrv_pread(file, offset, buf, n);
- if (ret < 0) {
- return ret;
- }
- buf[n] = '\0';
- return 0;
-}
-
-/**
- * Allocate new clusters
- *
- * @s: QED state
- * @n: Number of contiguous clusters to allocate
- * @ret: Offset of first allocated cluster
- *
- * This function only produces the offset where the new clusters should be
- * written. It updates BDRVQEDState but does not make any changes to the image
- * file.
- */
-static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
-{
- uint64_t offset = s->file_size;
- s->file_size += n * s->header.cluster_size;
- return offset;
-}
-
-QEDTable *qed_alloc_table(BDRVQEDState *s)
-{
- /* Honor O_DIRECT memory alignment requirements */
- return qemu_blockalign(s->bs,
- s->header.cluster_size * s->header.table_size);
-}
-
-/**
- * Allocate a new zeroed L2 table
- */
-static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
-{
- CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
-
- l2_table->table = qed_alloc_table(s);
- l2_table->offset = qed_alloc_clusters(s, s->header.table_size);
-
- memset(l2_table->table->offsets, 0,
- s->header.cluster_size * s->header.table_size);
- return l2_table;
-}
-
-static void qed_aio_next_io(void *opaque, int ret);
-
-static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
-{
- assert(!s->allocating_write_reqs_plugged);
-
- s->allocating_write_reqs_plugged = true;
-}
-
-static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
-{
- QEDAIOCB *acb;
-
- assert(s->allocating_write_reqs_plugged);
-
- s->allocating_write_reqs_plugged = false;
-
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
- if (acb) {
- qed_aio_next_io(acb, 0);
- }
-}
-
-static void qed_finish_clear_need_check(void *opaque, int ret)
-{
- /* Do nothing */
-}
-
-static void qed_flush_after_clear_need_check(void *opaque, int ret)
-{
- BDRVQEDState *s = opaque;
-
- bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
-
- /* No need to wait until flush completes */
- qed_unplug_allocating_write_reqs(s);
-}
-
-static void qed_clear_need_check(void *opaque, int ret)
-{
- BDRVQEDState *s = opaque;
-
- if (ret) {
- qed_unplug_allocating_write_reqs(s);
- return;
- }
-
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header(s, qed_flush_after_clear_need_check, s);
-}
-
-static void qed_need_check_timer_cb(void *opaque)
-{
- BDRVQEDState *s = opaque;
-
- /* The timer should only fire when allocating writes have drained */
- assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
-
- trace_qed_need_check_timer_cb(s);
-
- qed_plug_allocating_write_reqs(s);
-
- /* Ensure writes are on disk before clearing flag */
- bdrv_aio_flush(s->bs, qed_clear_need_check, s);
-}
-
-static void qed_start_need_check_timer(BDRVQEDState *s)
-{
- trace_qed_start_need_check_timer(s);
-
- /* Use vm_clock so we don't alter the image file while suspended for
- * migration.
- */
- qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
- get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
-}
-
-/* It's okay to call this multiple times or when no timer is started */
-static void qed_cancel_need_check_timer(BDRVQEDState *s)
-{
- trace_qed_cancel_need_check_timer(s);
- qemu_del_timer(s->need_check_timer);
-}
-
-static void bdrv_qed_rebind(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- s->bs = bs;
-}
-
-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
-{
- BDRVQEDState *s = bs->opaque;
- QEDHeader le_header;
- int64_t file_size;
- int ret;
-
- s->bs = bs;
- QSIMPLEQ_INIT(&s->allocating_write_reqs);
-
- ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
- if (ret < 0) {
- return ret;
- }
- qed_header_le_to_cpu(&le_header, &s->header);
-
- if (s->header.magic != QED_MAGIC) {
- return -EMEDIUMTYPE;
- }
- if (s->header.features & ~QED_FEATURE_MASK) {
- /* image uses unsupported feature bits */
- char buf[64];
- snprintf(buf, sizeof(buf), "%" PRIx64,
- s->header.features & ~QED_FEATURE_MASK);
- qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bs->device_name, "QED", buf);
- return -ENOTSUP;
- }
- if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
- return -EINVAL;
- }
-
- /* Round down file size to the last cluster */
- file_size = bdrv_getlength(bs->file);
- if (file_size < 0) {
- return file_size;
- }
- s->file_size = qed_start_of_cluster(s, file_size);
-
- if (!qed_is_table_size_valid(s->header.table_size)) {
- return -EINVAL;
- }
- if (!qed_is_image_size_valid(s->header.image_size,
- s->header.cluster_size,
- s->header.table_size)) {
- return -EINVAL;
- }
- if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
- return -EINVAL;
- }
-
- s->table_nelems = (s->header.cluster_size * s->header.table_size) /
- sizeof(uint64_t);
- s->l2_shift = ffs(s->header.cluster_size) - 1;
- s->l2_mask = s->table_nelems - 1;
- s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
-
- if ((s->header.features & QED_F_BACKING_FILE)) {
- if ((uint64_t)s->header.backing_filename_offset +
- s->header.backing_filename_size >
- s->header.cluster_size * s->header.header_size) {
- return -EINVAL;
- }
-
- ret = qed_read_string(bs->file, s->header.backing_filename_offset,
- s->header.backing_filename_size, bs->backing_file,
- sizeof(bs->backing_file));
- if (ret < 0) {
- return ret;
- }
-
- if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
- pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
- }
- }
-
- /* Reset unknown autoclear feature bits. This is a backwards
- * compatibility mechanism that allows images to be opened by older
- * programs, which "knock out" unknown feature bits. When an image is
- * opened by a newer program again it can detect that the autoclear
- * feature is no longer valid.
- */
- if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
- !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
- s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
-
- ret = qed_write_header_sync(s);
- if (ret) {
- return ret;
- }
-
- /* From here on only known autoclear feature bits are valid */
- bdrv_flush(bs->file);
- }
-
- s->l1_table = qed_alloc_table(s);
- qed_init_l2_cache(&s->l2_cache);
-
- ret = qed_read_l1_table_sync(s);
- if (ret) {
- goto out;
- }
-
- /* If image was not closed cleanly, check consistency */
- if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
- /* Read-only images cannot be fixed. There is no risk of corruption
- * since write operations are not possible. Therefore, allow
- * potentially inconsistent images to be opened read-only. This can
- * aid data recovery from an otherwise inconsistent image.
- */
- if (!bdrv_is_read_only(bs->file) &&
- !(flags & BDRV_O_INCOMING)) {
- BdrvCheckResult result = {0};
-
- ret = qed_check(s, &result, true);
- if (ret) {
- goto out;
- }
- }
- }
-
- s->need_check_timer = qemu_new_timer_ns(vm_clock,
- qed_need_check_timer_cb, s);
-
-out:
- if (ret) {
- qed_free_l2_cache(&s->l2_cache);
- qemu_vfree(s->l1_table);
- }
- return ret;
-}
-
-/* We have nothing to do for QED reopen, stubs just return
- * success */
-static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
- BlockReopenQueue *queue, Error **errp)
-{
- return 0;
-}
-
-static void bdrv_qed_close(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
-
- qed_cancel_need_check_timer(s);
- qemu_free_timer(s->need_check_timer);
-
- /* Ensure writes reach stable storage */
- bdrv_flush(bs->file);
-
- /* Clean shutdown, no check required on next open */
- if (s->header.features & QED_F_NEED_CHECK) {
- s->header.features &= ~QED_F_NEED_CHECK;
- qed_write_header_sync(s);
- }
-
- qed_free_l2_cache(&s->l2_cache);
- qemu_vfree(s->l1_table);
-}
-
-static int qed_create(const char *filename, uint32_t cluster_size,
- uint64_t image_size, uint32_t table_size,
- const char *backing_file, const char *backing_fmt)
-{
- QEDHeader header = {
- .magic = QED_MAGIC,
- .cluster_size = cluster_size,
- .table_size = table_size,
- .header_size = 1,
- .features = 0,
- .compat_features = 0,
- .l1_table_offset = cluster_size,
- .image_size = image_size,
- };
- QEDHeader le_header;
- uint8_t *l1_table = NULL;
- size_t l1_size = header.cluster_size * header.table_size;
- int ret = 0;
- BlockDriverState *bs = NULL;
-
- ret = bdrv_create_file(filename, NULL);
- if (ret < 0) {
- return ret;
- }
-
- ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB);
- if (ret < 0) {
- return ret;
- }
-
- /* File must start empty and grow, check truncate is supported */
- ret = bdrv_truncate(bs, 0);
- if (ret < 0) {
- goto out;
- }
-
- if (backing_file) {
- header.features |= QED_F_BACKING_FILE;
- header.backing_filename_offset = sizeof(le_header);
- header.backing_filename_size = strlen(backing_file);
-
- if (qed_fmt_is_raw(backing_fmt)) {
- header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
- }
- }
-
- qed_header_cpu_to_le(&header, &le_header);
- ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
- if (ret < 0) {
- goto out;
- }
- ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
- header.backing_filename_size);
- if (ret < 0) {
- goto out;
- }
-
- l1_table = g_malloc0(l1_size);
- ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
- if (ret < 0) {
- goto out;
- }
-
- ret = 0; /* success */
-out:
- g_free(l1_table);
- bdrv_delete(bs);
- return ret;
-}
-
-static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
-{
- uint64_t image_size = 0;
- uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
- uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
- const char *backing_file = NULL;
- const char *backing_fmt = NULL;
-
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- image_size = options->value.n;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
- backing_fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- cluster_size = options->value.n;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
- if (options->value.n) {
- table_size = options->value.n;
- }
- }
- options++;
- }
-
- if (!qed_is_cluster_size_valid(cluster_size)) {
- fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
- QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
- return -EINVAL;
- }
- if (!qed_is_table_size_valid(table_size)) {
- fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
- QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
- return -EINVAL;
- }
- if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
- fprintf(stderr, "QED image size must be a non-zero multiple of "
- "cluster size and less than %" PRIu64 " bytes\n",
- qed_max_image_size(cluster_size, table_size));
- return -EINVAL;
- }
-
- return qed_create(filename, cluster_size, image_size, table_size,
- backing_file, backing_fmt);
-}
-
-typedef struct {
- Coroutine *co;
- int is_allocated;
- int *pnum;
-} QEDIsAllocatedCB;
-
-static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
-{
- QEDIsAllocatedCB *cb = opaque;
- *cb->pnum = len / BDRV_SECTOR_SIZE;
- cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
- if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
- }
-}
-
-static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- BDRVQEDState *s = bs->opaque;
- uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
- size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
- QEDIsAllocatedCB cb = {
- .is_allocated = -1,
- .pnum = pnum,
- };
- QEDRequest request = { .l2_table = NULL };
-
- qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
-
- /* Now sleep if the callback wasn't invoked immediately */
- while (cb.is_allocated == -1) {
- cb.co = qemu_coroutine_self();
- qemu_coroutine_yield();
- }
-
- qed_unref_l2_cache_entry(request.l2_table);
-
- return cb.is_allocated;
-}
-
-static int bdrv_qed_make_empty(BlockDriverState *bs)
-{
- return -ENOTSUP;
-}
-
-static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
-{
- return acb->common.bs->opaque;
-}
-
-/**
- * Read from the backing file or zero-fill if no backing file
- *
- * @s: QED state
- * @pos: Byte position in device
- * @qiov: Destination I/O vector
- * @cb: Completion function
- * @opaque: User data for completion function
- *
- * This function reads qiov->size bytes starting at pos from the backing file.
- * If there is no backing file then zeroes are read.
- */
-static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
- QEMUIOVector *qiov,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- uint64_t backing_length = 0;
- size_t size;
-
- /* If there is a backing file, get its length. Treat the absence of a
- * backing file like a zero length backing file.
- */
- if (s->bs->backing_hd) {
- int64_t l = bdrv_getlength(s->bs->backing_hd);
- if (l < 0) {
- cb(opaque, l);
- return;
- }
- backing_length = l;
- }
-
- /* Zero all sectors if reading beyond the end of the backing file */
- if (pos >= backing_length ||
- pos + qiov->size > backing_length) {
- qemu_iovec_memset(qiov, 0, 0, qiov->size);
- }
-
- /* Complete now if there are no backing file sectors to read */
- if (pos >= backing_length) {
- cb(opaque, 0);
- return;
- }
-
- /* If the read straddles the end of the backing file, shorten it */
- size = MIN((uint64_t)backing_length - pos, qiov->size);
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
- bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
- qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
-}
-
-typedef struct {
- GenericCB gencb;
- BDRVQEDState *s;
- QEMUIOVector qiov;
- struct iovec iov;
- uint64_t offset;
-} CopyFromBackingFileCB;
-
-static void qed_copy_from_backing_file_cb(void *opaque, int ret)
-{
- CopyFromBackingFileCB *copy_cb = opaque;
- qemu_vfree(copy_cb->iov.iov_base);
- gencb_complete(&copy_cb->gencb, ret);
-}
-
-static void qed_copy_from_backing_file_write(void *opaque, int ret)
-{
- CopyFromBackingFileCB *copy_cb = opaque;
- BDRVQEDState *s = copy_cb->s;
-
- if (ret) {
- qed_copy_from_backing_file_cb(copy_cb, ret);
- return;
- }
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
- bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
- &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
- qed_copy_from_backing_file_cb, copy_cb);
-}
-
-/**
- * Copy data from backing file into the image
- *
- * @s: QED state
- * @pos: Byte position in device
- * @len: Number of bytes
- * @offset: Byte offset in image file
- * @cb: Completion function
- * @opaque: User data for completion function
- */
-static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
- uint64_t len, uint64_t offset,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- CopyFromBackingFileCB *copy_cb;
-
- /* Skip copy entirely if there is no work to do */
- if (len == 0) {
- cb(opaque, 0);
- return;
- }
-
- copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
- copy_cb->s = s;
- copy_cb->offset = offset;
- copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
- copy_cb->iov.iov_len = len;
- qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
-
- qed_read_backing_file(s, pos, &copy_cb->qiov,
- qed_copy_from_backing_file_write, copy_cb);
-}
-
-/**
- * Link one or more contiguous clusters into a table
- *
- * @s: QED state
- * @table: L2 table
- * @index: First cluster index
- * @n: Number of contiguous clusters
- * @cluster: First cluster offset
- *
- * The cluster offset may be an allocated byte offset in the image file, the
- * zero cluster marker, or the unallocated cluster marker.
- */
-static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
- unsigned int n, uint64_t cluster)
-{
- int i;
- for (i = index; i < index + n; i++) {
- table->offsets[i] = cluster;
- if (!qed_offset_is_unalloc_cluster(cluster) &&
- !qed_offset_is_zero_cluster(cluster)) {
- cluster += s->header.cluster_size;
- }
- }
-}
-
-static void qed_aio_complete_bh(void *opaque)
-{
- QEDAIOCB *acb = opaque;
- BlockDriverCompletionFunc *cb = acb->common.cb;
- void *user_opaque = acb->common.opaque;
- int ret = acb->bh_ret;
- bool *finished = acb->finished;
-
- qemu_bh_delete(acb->bh);
- qemu_aio_release(acb);
-
- /* Invoke callback */
- cb(user_opaque, ret);
-
- /* Signal cancel completion */
- if (finished) {
- *finished = true;
- }
-}
-
-static void qed_aio_complete(QEDAIOCB *acb, int ret)
-{
- BDRVQEDState *s = acb_to_s(acb);
-
- trace_qed_aio_complete(s, acb, ret);
-
- /* Free resources */
- qemu_iovec_destroy(&acb->cur_qiov);
- qed_unref_l2_cache_entry(acb->request.l2_table);
-
- /* Free the buffer we may have allocated for zero writes */
- if (acb->flags & QED_AIOCB_ZERO) {
- qemu_vfree(acb->qiov->iov[0].iov_base);
- acb->qiov->iov[0].iov_base = NULL;
- }
-
- /* Arrange for a bh to invoke the completion function */
- acb->bh_ret = ret;
- acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
- qemu_bh_schedule(acb->bh);
-
- /* Start next allocating write request waiting behind this one. Note that
- * requests enqueue themselves when they first hit an unallocated cluster
- * but they wait until the entire request is finished before waking up the
- * next request in the queue. This ensures that we don't cycle through
- * requests multiple times but rather finish one at a time completely.
- */
- if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
- QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
- acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
- if (acb) {
- qed_aio_next_io(acb, 0);
- } else if (s->header.features & QED_F_NEED_CHECK) {
- qed_start_need_check_timer(s);
- }
- }
-}
-
-/**
- * Commit the current L2 table to the cache
- */
-static void qed_commit_l2_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- CachedL2Table *l2_table = acb->request.l2_table;
- uint64_t l2_offset = l2_table->offset;
-
- qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
- /* This is guaranteed to succeed because we just committed the entry to the
- * cache.
- */
- acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
- assert(acb->request.l2_table != NULL);
-
- qed_aio_next_io(opaque, ret);
-}
-
-/**
- * Update L1 table with new L2 table offset and write it out
- */
-static void qed_aio_write_l1_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- int index;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- index = qed_l1_index(s, acb->cur_pos);
- s->l1_table->offsets[index] = acb->request.l2_table->offset;
-
- qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
-}
-
-/**
- * Update L2 table with new cluster offsets and write them out
- */
-static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
-{
- BDRVQEDState *s = acb_to_s(acb);
- bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
- int index;
-
- if (ret) {
- goto err;
- }
-
- if (need_alloc) {
- qed_unref_l2_cache_entry(acb->request.l2_table);
- acb->request.l2_table = qed_new_l2_table(s);
- }
-
- index = qed_l2_index(s, acb->cur_pos);
- qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
- offset);
-
- if (need_alloc) {
- /* Write out the whole new L2 table */
- qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
- qed_aio_write_l1_update, acb);
- } else {
- /* Write out only the updated part of the L2 table */
- qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
- qed_aio_next_io, acb);
- }
- return;
-
-err:
- qed_aio_complete(acb, ret);
-}
-
-static void qed_aio_write_l2_update_cb(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
-}
-
-/**
- * Flush new data clusters before updating the L2 table
- *
- * This flush is necessary when a backing file is in use. A crash during an
- * allocating write could result in empty clusters in the image. If the write
- * only touched a subregion of the cluster, then backing image sectors have
- * been lost in the untouched region. The solution is to flush after writing a
- * new data cluster and before updating the L2 table.
- */
-static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
-
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
- qed_aio_complete(acb, -EIO);
- }
-}
-
-/**
- * Write data to the image file
- */
-static void qed_aio_write_main(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t offset = acb->cur_cluster +
- qed_offset_into_cluster(s, acb->cur_pos);
- BlockDriverCompletionFunc *next_fn;
-
- trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
- next_fn = qed_aio_next_io;
- } else {
- if (s->bs->backing_hd) {
- next_fn = qed_aio_write_flush_before_l2_update;
- } else {
- next_fn = qed_aio_write_l2_update_cb;
- }
- }
-
- BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- next_fn, acb);
-}
-
-/**
- * Populate back untouched region of new data cluster
- */
-static void qed_aio_write_postfill(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t start = acb->cur_pos + acb->cur_qiov.size;
- uint64_t len =
- qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
- uint64_t offset = acb->cur_cluster +
- qed_offset_into_cluster(s, acb->cur_pos) +
- acb->cur_qiov.size;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- trace_qed_aio_write_postfill(s, acb, start, len, offset);
- qed_copy_from_backing_file(s, start, len, offset,
- qed_aio_write_main, acb);
-}
-
-/**
- * Populate front untouched region of new data cluster
- */
-static void qed_aio_write_prefill(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
- uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
-
- trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
- qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
- qed_aio_write_postfill, acb);
-}
-
-/**
- * Check if the QED_F_NEED_CHECK bit should be set during allocating write
- */
-static bool qed_should_set_need_check(BDRVQEDState *s)
-{
- /* The flush before L2 update path ensures consistency */
- if (s->bs->backing_hd) {
- return false;
- }
-
- return !(s->header.features & QED_F_NEED_CHECK);
-}
-
-static void qed_aio_write_zero_cluster(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
-
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- qed_aio_write_l2_update(acb, 0, 1);
-}
-
-/**
- * Write new data cluster
- *
- * @acb: Write request
- * @len: Length in bytes
- *
- * This path is taken when writing to previously unallocated clusters.
- */
-static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
-{
- BDRVQEDState *s = acb_to_s(acb);
- BlockDriverCompletionFunc *cb;
-
- /* Cancel timer when the first allocating request comes in */
- if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
- qed_cancel_need_check_timer(s);
- }
-
- /* Freeze this request if another allocating write is in progress */
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
- QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
- }
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
- s->allocating_write_reqs_plugged) {
- return; /* wait for existing request to finish */
- }
-
- acb->cur_nclusters = qed_bytes_to_clusters(s,
- qed_offset_into_cluster(s, acb->cur_pos) + len);
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- if (acb->flags & QED_AIOCB_ZERO) {
- /* Skip ahead if the clusters are already zero */
- if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
- qed_aio_next_io(acb, 0);
- return;
- }
-
- cb = qed_aio_write_zero_cluster;
- } else {
- cb = qed_aio_write_prefill;
- acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
- }
-
- if (qed_should_set_need_check(s)) {
- s->header.features |= QED_F_NEED_CHECK;
- qed_write_header(s, cb, acb);
- } else {
- cb(acb, 0);
- }
-}
-
-/**
- * Write data cluster in place
- *
- * @acb: Write request
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * This path is taken when writing to already allocated clusters.
- */
-static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
-{
- /* Allocate buffer for zero writes */
- if (acb->flags & QED_AIOCB_ZERO) {
- struct iovec *iov = acb->qiov->iov;
-
- if (!iov->iov_base) {
- iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
- memset(iov->iov_base, 0, iov->iov_len);
- }
- }
-
- /* Calculate the I/O vector */
- acb->cur_cluster = offset;
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- /* Do the actual write */
- qed_aio_write_main(acb, 0);
-}
-
-/**
- * Write data cluster
- *
- * @opaque: Write request
- * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- * or -errno
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * Callback from qed_find_cluster().
- */
-static void qed_aio_write_data(void *opaque, int ret,
- uint64_t offset, size_t len)
-{
- QEDAIOCB *acb = opaque;
-
- trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len);
-
- acb->find_cluster_ret = ret;
-
- switch (ret) {
- case QED_CLUSTER_FOUND:
- qed_aio_write_inplace(acb, offset, len);
- break;
-
- case QED_CLUSTER_L2:
- case QED_CLUSTER_L1:
- case QED_CLUSTER_ZERO:
- qed_aio_write_alloc(acb, len);
- break;
-
- default:
- qed_aio_complete(acb, ret);
- break;
- }
-}
-
-/**
- * Read data cluster
- *
- * @opaque: Read request
- * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- * or -errno
- * @offset: Cluster offset in bytes
- * @len: Length in bytes
- *
- * Callback from qed_find_cluster().
- */
-static void qed_aio_read_data(void *opaque, int ret,
- uint64_t offset, size_t len)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- BlockDriverState *bs = acb->common.bs;
-
- /* Adjust offset into cluster */
- offset += qed_offset_into_cluster(s, acb->cur_pos);
-
- trace_qed_aio_read_data(s, acb, ret, offset, len);
-
- if (ret < 0) {
- goto err;
- }
-
- qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
- /* Handle zero cluster and backing file reads */
- if (ret == QED_CLUSTER_ZERO) {
- qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
- qed_aio_next_io(acb, 0);
- return;
- } else if (ret != QED_CLUSTER_FOUND) {
- qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
- qed_aio_next_io, acb);
- return;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
- &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
- qed_aio_next_io, acb);
- return;
-
-err:
- qed_aio_complete(acb, ret);
-}
-
-/**
- * Begin next I/O or complete the request
- */
-static void qed_aio_next_io(void *opaque, int ret)
-{
- QEDAIOCB *acb = opaque;
- BDRVQEDState *s = acb_to_s(acb);
- QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
- qed_aio_write_data : qed_aio_read_data;
-
- trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
-
- /* Handle I/O error */
- if (ret) {
- qed_aio_complete(acb, ret);
- return;
- }
-
- acb->qiov_offset += acb->cur_qiov.size;
- acb->cur_pos += acb->cur_qiov.size;
- qemu_iovec_reset(&acb->cur_qiov);
-
- /* Complete request */
- if (acb->cur_pos >= acb->end_pos) {
- qed_aio_complete(acb, 0);
- return;
- }
-
- /* Find next cluster and start I/O */
- qed_find_cluster(s, &acb->request,
- acb->cur_pos, acb->end_pos - acb->cur_pos,
- io_fn, acb);
-}
-
-static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque, int flags)
-{
- QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
-
- trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
- opaque, flags);
-
- acb->flags = flags;
- acb->finished = NULL;
- acb->qiov = qiov;
- acb->qiov_offset = 0;
- acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
- acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
- acb->request.l2_table = NULL;
- qemu_iovec_init(&acb->cur_qiov, qiov->niov);
-
- /* Start request */
- qed_aio_next_io(acb, 0);
- return &acb->common;
-}
-
-static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque)
-{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
- opaque, QED_AIOCB_WRITE);
-}
-
-typedef struct {
- Coroutine *co;
- int ret;
- bool done;
-} QEDWriteZeroesCB;
-
-static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
-{
- QEDWriteZeroesCB *cb = opaque;
-
- cb->done = true;
- cb->ret = ret;
- if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
- }
-}
-
-static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors)
-{
- BlockDriverAIOCB *blockacb;
- BDRVQEDState *s = bs->opaque;
- QEDWriteZeroesCB cb = { .done = false };
- QEMUIOVector qiov;
- struct iovec iov;
-
- /* Refuse if there are untouched backing file sectors */
- if (bs->backing_hd) {
- if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
- if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
- }
-
- /* Zero writes start without an I/O buffer. If a buffer becomes necessary
- * then it will be allocated during request processing.
- */
- iov.iov_base = NULL,
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
- qed_co_write_zeroes_cb, &cb,
- QED_AIOCB_WRITE | QED_AIOCB_ZERO);
- if (!blockacb) {
- return -EIO;
- }
- if (!cb.done) {
- cb.co = qemu_coroutine_self();
- qemu_coroutine_yield();
- }
- assert(cb.done);
- return cb.ret;
-}
-
-static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
-{
- BDRVQEDState *s = bs->opaque;
- uint64_t old_image_size;
- int ret;
-
- if (!qed_is_image_size_valid(offset, s->header.cluster_size,
- s->header.table_size)) {
- return -EINVAL;
- }
-
- /* Shrinking is currently not supported */
- if ((uint64_t)offset < s->header.image_size) {
- return -ENOTSUP;
- }
-
- old_image_size = s->header.image_size;
- s->header.image_size = offset;
- ret = qed_write_header_sync(s);
- if (ret < 0) {
- s->header.image_size = old_image_size;
- }
- return ret;
-}
-
-static int64_t bdrv_qed_getlength(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- return s->header.image_size;
-}
-
-static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
- BDRVQEDState *s = bs->opaque;
-
- memset(bdi, 0, sizeof(*bdi));
- bdi->cluster_size = s->header.cluster_size;
- bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
- return 0;
-}
-
-static int bdrv_qed_change_backing_file(BlockDriverState *bs,
- const char *backing_file,
- const char *backing_fmt)
-{
- BDRVQEDState *s = bs->opaque;
- QEDHeader new_header, le_header;
- void *buffer;
- size_t buffer_len, backing_file_len;
- int ret;
-
- /* Refuse to set backing filename if unknown compat feature bits are
- * active. If the image uses an unknown compat feature then we may not
- * know the layout of data following the header structure and cannot safely
- * add a new string.
- */
- if (backing_file && (s->header.compat_features &
- ~QED_COMPAT_FEATURE_MASK)) {
- return -ENOTSUP;
- }
-
- memcpy(&new_header, &s->header, sizeof(new_header));
-
- new_header.features &= ~(QED_F_BACKING_FILE |
- QED_F_BACKING_FORMAT_NO_PROBE);
-
- /* Adjust feature flags */
- if (backing_file) {
- new_header.features |= QED_F_BACKING_FILE;
-
- if (qed_fmt_is_raw(backing_fmt)) {
- new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
- }
- }
-
- /* Calculate new header size */
- backing_file_len = 0;
-
- if (backing_file) {
- backing_file_len = strlen(backing_file);
- }
-
- buffer_len = sizeof(new_header);
- new_header.backing_filename_offset = buffer_len;
- new_header.backing_filename_size = backing_file_len;
- buffer_len += backing_file_len;
-
- /* Make sure we can rewrite header without failing */
- if (buffer_len > new_header.header_size * new_header.cluster_size) {
- return -ENOSPC;
- }
-
- /* Prepare new header */
- buffer = g_malloc(buffer_len);
-
- qed_header_cpu_to_le(&new_header, &le_header);
- memcpy(buffer, &le_header, sizeof(le_header));
- buffer_len = sizeof(le_header);
-
- if (backing_file) {
- memcpy(buffer + buffer_len, backing_file, backing_file_len);
- buffer_len += backing_file_len;
- }
-
- /* Write new header */
- ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
- g_free(buffer);
- if (ret == 0) {
- memcpy(&s->header, &new_header, sizeof(new_header));
- }
- return ret;
-}
-
-static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
-
- bdrv_qed_close(bs);
- memset(s, 0, sizeof(BDRVQEDState));
- bdrv_qed_open(bs, NULL, bs->open_flags);
-}
-
-static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
- BdrvCheckMode fix)
-{
- BDRVQEDState *s = bs->opaque;
-
- return qed_check(s, result, !!fix);
-}
-
-static QEMUOptionParameter qed_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size (in bytes)"
- }, {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- }, {
- .name = BLOCK_OPT_BACKING_FMT,
- .type = OPT_STRING,
- .help = "Image format of the base image"
- }, {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "Cluster size (in bytes)",
- .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
- }, {
- .name = BLOCK_OPT_TABLE_SIZE,
- .type = OPT_SIZE,
- .help = "L1/L2 table size (in clusters)"
- },
- { /* end of list */ }
-};
-
-static BlockDriver bdrv_qed = {
- .format_name = "qed",
- .instance_size = sizeof(BDRVQEDState),
- .create_options = qed_create_options,
-
- .bdrv_probe = bdrv_qed_probe,
- .bdrv_rebind = bdrv_qed_rebind,
- .bdrv_open = bdrv_qed_open,
- .bdrv_close = bdrv_qed_close,
- .bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
- .bdrv_create = bdrv_qed_create,
- .bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
- .bdrv_make_empty = bdrv_qed_make_empty,
- .bdrv_aio_readv = bdrv_qed_aio_readv,
- .bdrv_aio_writev = bdrv_qed_aio_writev,
- .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
- .bdrv_truncate = bdrv_qed_truncate,
- .bdrv_getlength = bdrv_qed_getlength,
- .bdrv_get_info = bdrv_qed_get_info,
- .bdrv_change_backing_file = bdrv_qed_change_backing_file,
- .bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
- .bdrv_check = bdrv_qed_check,
-};
-
-static void bdrv_qed_init(void)
-{
- bdrv_register(&bdrv_qed);
-}
-
-block_init(bdrv_qed_init);
diff --git a/contrib/qemu/block/qed.h b/contrib/qemu/block/qed.h
deleted file mode 100644
index 2b4ddedf313..00000000000
--- a/contrib/qemu/block/qed.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef BLOCK_QED_H
-#define BLOCK_QED_H
-
-#include "block/block_int.h"
-
-/* The layout of a QED file is as follows:
- *
- * +--------+----------+----------+----------+-----+
- * | header | L1 table | cluster0 | cluster1 | ... |
- * +--------+----------+----------+----------+-----+
- *
- * There is a 2-level pagetable for cluster allocation:
- *
- * +----------+
- * | L1 table |
- * +----------+
- * ,------' | '------.
- * +----------+ | +----------+
- * | L2 table | ... | L2 table |
- * +----------+ +----------+
- * ,------' | '------.
- * +----------+ | +----------+
- * | Data | ... | Data |
- * +----------+ +----------+
- *
- * The L1 table is fixed size and always present. L2 tables are allocated on
- * demand. The L1 table size determines the maximum possible image size; it
- * can be influenced using the cluster_size and table_size values.
- *
- * All fields are little-endian on disk.
- */
-
-enum {
- QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
-
- /* The image supports a backing file */
- QED_F_BACKING_FILE = 0x01,
-
- /* The image needs a consistency check before use */
- QED_F_NEED_CHECK = 0x02,
-
- /* The backing file format must not be probed, treat as raw image */
- QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
-
- /* Feature bits must be used when the on-disk format changes */
- QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
- QED_F_NEED_CHECK |
- QED_F_BACKING_FORMAT_NO_PROBE,
- QED_COMPAT_FEATURE_MASK = 0, /* supported compat feature bits */
- QED_AUTOCLEAR_FEATURE_MASK = 0, /* supported autoclear feature bits */
-
- /* Data is stored in groups of sectors called clusters. Cluster size must
- * be large to avoid keeping too much metadata. I/O requests that have
- * sub-cluster size will require read-modify-write.
- */
- QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
- QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
- QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
-
- /* Allocated clusters are tracked using a 2-level pagetable. Table size is
- * a multiple of clusters so large maximum image sizes can be supported
- * without jacking up the cluster size too much.
- */
- QED_MIN_TABLE_SIZE = 1, /* in clusters */
- QED_MAX_TABLE_SIZE = 16,
- QED_DEFAULT_TABLE_SIZE = 4,
-
- /* Delay to flush and clean image after last allocating write completes */
- QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */
-};
-
-typedef struct {
- uint32_t magic; /* QED\0 */
-
- uint32_t cluster_size; /* in bytes */
- uint32_t table_size; /* for L1 and L2 tables, in clusters */
- uint32_t header_size; /* in clusters */
-
- uint64_t features; /* format feature bits */
- uint64_t compat_features; /* compatible feature bits */
- uint64_t autoclear_features; /* self-resetting feature bits */
-
- uint64_t l1_table_offset; /* in bytes */
- uint64_t image_size; /* total logical image size, in bytes */
-
- /* if (features & QED_F_BACKING_FILE) */
- uint32_t backing_filename_offset; /* in bytes from start of header */
- uint32_t backing_filename_size; /* in bytes */
-} QEDHeader;
-
-typedef struct {
- uint64_t offsets[0]; /* in bytes */
-} QEDTable;
-
-/* The L2 cache is a simple write-through cache for L2 structures */
-typedef struct CachedL2Table {
- QEDTable *table;
- uint64_t offset; /* offset=0 indicates an invalidate entry */
- QTAILQ_ENTRY(CachedL2Table) node;
- int ref;
-} CachedL2Table;
-
-typedef struct {
- QTAILQ_HEAD(, CachedL2Table) entries;
- unsigned int n_entries;
-} L2TableCache;
-
-typedef struct QEDRequest {
- CachedL2Table *l2_table;
-} QEDRequest;
-
-enum {
- QED_AIOCB_WRITE = 0x0001, /* read or write? */
- QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */
-};
-
-typedef struct QEDAIOCB {
- BlockDriverAIOCB common;
- QEMUBH *bh;
- int bh_ret; /* final return status for completion bh */
- QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
- int flags; /* QED_AIOCB_* bits ORed together */
- bool *finished; /* signal for cancel completion */
- uint64_t end_pos; /* request end on block device, in bytes */
-
- /* User scatter-gather list */
- QEMUIOVector *qiov;
- size_t qiov_offset; /* byte count already processed */
-
- /* Current cluster scatter-gather list */
- QEMUIOVector cur_qiov;
- uint64_t cur_pos; /* position on block device, in bytes */
- uint64_t cur_cluster; /* cluster offset in image file */
- unsigned int cur_nclusters; /* number of clusters being accessed */
- int find_cluster_ret; /* used for L1/L2 update */
-
- QEDRequest request;
-} QEDAIOCB;
-
-typedef struct {
- BlockDriverState *bs; /* device */
- uint64_t file_size; /* length of image file, in bytes */
-
- QEDHeader header; /* always cpu-endian */
- QEDTable *l1_table;
- L2TableCache l2_cache; /* l2 table cache */
- uint32_t table_nelems;
- uint32_t l1_shift;
- uint32_t l2_shift;
- uint32_t l2_mask;
-
- /* Allocating write request queue */
- QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
- bool allocating_write_reqs_plugged;
-
- /* Periodic flush and clear need check flag */
- QEMUTimer *need_check_timer;
-} BDRVQEDState;
-
-enum {
- QED_CLUSTER_FOUND, /* cluster found */
- QED_CLUSTER_ZERO, /* zero cluster found */
- QED_CLUSTER_L2, /* cluster missing in L2 */
- QED_CLUSTER_L1, /* cluster missing in L1 */
-};
-
-/**
- * qed_find_cluster() completion callback
- *
- * @opaque: User data for completion callback
- * @ret: QED_CLUSTER_FOUND Success
- * QED_CLUSTER_L2 Data cluster unallocated in L2
- * QED_CLUSTER_L1 L2 unallocated in L1
- * -errno POSIX error occurred
- * @offset: Data cluster offset
- * @len: Contiguous bytes starting from cluster offset
- *
- * This function is invoked when qed_find_cluster() completes.
- *
- * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
- * in the image file.
- *
- * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
- * table offset, respectively. len is number of contiguous unallocated bytes.
- */
-typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
-
-/**
- * Generic callback for chaining async callbacks
- */
-typedef struct {
- BlockDriverCompletionFunc *cb;
- void *opaque;
-} GenericCB;
-
-void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
-void gencb_complete(void *opaque, int ret);
-
-/**
- * Header functions
- */
-int qed_write_header_sync(BDRVQEDState *s);
-
-/**
- * L2 cache functions
- */
-void qed_init_l2_cache(L2TableCache *l2_cache);
-void qed_free_l2_cache(L2TableCache *l2_cache);
-CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
-void qed_unref_l2_cache_entry(CachedL2Table *entry);
-CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
-void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
-
-/**
- * Table I/O functions
- */
-int qed_read_l1_table_sync(BDRVQEDState *s);
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
- BlockDriverCompletionFunc *cb, void *opaque);
-int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
- unsigned int n);
-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- uint64_t offset);
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
- BlockDriverCompletionFunc *cb, void *opaque);
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush,
- BlockDriverCompletionFunc *cb, void *opaque);
-int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
- unsigned int index, unsigned int n, bool flush);
-
-/**
- * Cluster functions
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
- size_t len, QEDFindClusterFunc *cb, void *opaque);
-
-/**
- * Consistency check
- */
-int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
-
-QEDTable *qed_alloc_table(BDRVQEDState *s);
-
-/**
- * Round down to the start of a cluster
- */
-static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
-{
- return offset & ~(uint64_t)(s->header.cluster_size - 1);
-}
-
-static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
-{
- return offset & (s->header.cluster_size - 1);
-}
-
-static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
-{
- return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
- (s->header.cluster_size - 1);
-}
-
-static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
-{
- return pos >> s->l1_shift;
-}
-
-static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
-{
- return (pos >> s->l2_shift) & s->l2_mask;
-}
-
-/**
- * Test if a cluster offset is valid
- */
-static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
-{
- uint64_t header_size = (uint64_t)s->header.header_size *
- s->header.cluster_size;
-
- if (offset & (s->header.cluster_size - 1)) {
- return false;
- }
- return offset >= header_size && offset < s->file_size;
-}
-
-/**
- * Test if a table offset is valid
- */
-static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
-{
- uint64_t end_offset = offset + (s->header.table_size - 1) *
- s->header.cluster_size;
-
- /* Overflow check */
- if (end_offset <= offset) {
- return false;
- }
-
- return qed_check_cluster_offset(s, offset) &&
- qed_check_cluster_offset(s, end_offset);
-}
-
-static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
- uint64_t offset)
-{
- if (qed_offset_into_cluster(s, offset)) {
- return false;
- }
- return true;
-}
-
-static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
-{
- if (offset == 0) {
- return true;
- }
- return false;
-}
-
-static inline bool qed_offset_is_zero_cluster(uint64_t offset)
-{
- if (offset == 1) {
- return true;
- }
- return false;
-}
-
-#endif /* BLOCK_QED_H */
diff --git a/contrib/qemu/block/snapshot.c b/contrib/qemu/block/snapshot.c
deleted file mode 100644
index 6c6d9deea1f..00000000000
--- a/contrib/qemu/block/snapshot.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Block layer snapshot related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "block/snapshot.h"
-#include "block/block_int.h"
-
-int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
- const char *name)
-{
- QEMUSnapshotInfo *sn_tab, *sn;
- int nb_sns, i, ret;
-
- ret = -ENOENT;
- nb_sns = bdrv_snapshot_list(bs, &sn_tab);
- if (nb_sns < 0) {
- return ret;
- }
- for (i = 0; i < nb_sns; i++) {
- sn = &sn_tab[i];
- if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
- *sn_info = *sn;
- ret = 0;
- break;
- }
- }
- g_free(sn_tab);
- return ret;
-}
-
-int bdrv_can_snapshot(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
- return 0;
- }
-
- if (!drv->bdrv_snapshot_create) {
- if (bs->file != NULL) {
- return bdrv_can_snapshot(bs->file);
- }
- return 0;
- }
-
- return 1;
-}
-
-int bdrv_snapshot_create(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_create) {
- return drv->bdrv_snapshot_create(bs, sn_info);
- }
- if (bs->file) {
- return bdrv_snapshot_create(bs->file, sn_info);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_goto(BlockDriverState *bs,
- const char *snapshot_id)
-{
- BlockDriver *drv = bs->drv;
- int ret, open_ret;
-
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_goto) {
- return drv->bdrv_snapshot_goto(bs, snapshot_id);
- }
-
- if (bs->file) {
- drv->bdrv_close(bs);
- ret = bdrv_snapshot_goto(bs->file, snapshot_id);
- open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
- if (open_ret < 0) {
- bdrv_delete(bs->file);
- bs->drv = NULL;
- return open_ret;
- }
- return ret;
- }
-
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_delete) {
- return drv->bdrv_snapshot_delete(bs, snapshot_id);
- }
- if (bs->file) {
- return bdrv_snapshot_delete(bs->file, snapshot_id);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_list(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (drv->bdrv_snapshot_list) {
- return drv->bdrv_snapshot_list(bs, psn_info);
- }
- if (bs->file) {
- return bdrv_snapshot_list(bs->file, psn_info);
- }
- return -ENOTSUP;
-}
-
-int bdrv_snapshot_load_tmp(BlockDriverState *bs,
- const char *snapshot_name)
-{
- BlockDriver *drv = bs->drv;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (!bs->read_only) {
- return -EINVAL;
- }
- if (drv->bdrv_snapshot_load_tmp) {
- return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
- }
- return -ENOTSUP;
-}