diff options
Diffstat (limited to 'contrib/qemu/block')
| -rw-r--r-- | contrib/qemu/block/qcow.c | 914 | ||||
| -rw-r--r-- | contrib/qemu/block/qcow2-cache.c | 323 | ||||
| -rw-r--r-- | contrib/qemu/block/qcow2-cluster.c | 1478 | ||||
| -rw-r--r-- | contrib/qemu/block/qcow2-refcount.c | 1374 | ||||
| -rw-r--r-- | contrib/qemu/block/qcow2-snapshot.c | 660 | ||||
| -rw-r--r-- | contrib/qemu/block/qcow2.c | 1825 | ||||
| -rw-r--r-- | contrib/qemu/block/qcow2.h | 437 | ||||
| -rw-r--r-- | contrib/qemu/block/qed-check.c | 248 | ||||
| -rw-r--r-- | contrib/qemu/block/qed-cluster.c | 165 | ||||
| -rw-r--r-- | contrib/qemu/block/qed-gencb.c | 32 | ||||
| -rw-r--r-- | contrib/qemu/block/qed-l2-cache.c | 187 | ||||
| -rw-r--r-- | contrib/qemu/block/qed-table.c | 296 | ||||
| -rw-r--r-- | contrib/qemu/block/qed.c | 1596 | ||||
| -rw-r--r-- | contrib/qemu/block/qed.h | 344 | ||||
| -rw-r--r-- | contrib/qemu/block/snapshot.c | 157 | 
15 files changed, 0 insertions, 10036 deletions
diff --git a/contrib/qemu/block/qcow.c b/contrib/qemu/block/qcow.c deleted file mode 100644 index 5239bd68f1c..00000000000 --- a/contrib/qemu/block/qcow.c +++ /dev/null @@ -1,914 +0,0 @@ -/* - * Block driver for the QCOW format - * - * Copyright (c) 2004-2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block/block_int.h" -#include "qemu/module.h" -#include <zlib.h> -#include "qemu/aes.h" -#include "migration/migration.h" - -/**************************************************************/ -/* QEMU COW block driver with compression and encryption support */ - -#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) -#define QCOW_VERSION 1 - -#define QCOW_CRYPT_NONE 0 -#define QCOW_CRYPT_AES  1 - -#define QCOW_OFLAG_COMPRESSED (1LL << 63) - -typedef struct QCowHeader { -    uint32_t magic; -    uint32_t version; -    uint64_t backing_file_offset; -    uint32_t backing_file_size; -    uint32_t mtime; -    uint64_t size; /* in bytes */ -    uint8_t cluster_bits; -    uint8_t l2_bits; -    uint32_t crypt_method; -    uint64_t l1_table_offset; -} QCowHeader; - -#define L2_CACHE_SIZE 16 - -typedef struct BDRVQcowState { -    int cluster_bits; -    int cluster_size; -    int cluster_sectors; -    int l2_bits; -    int l2_size; -    int l1_size; -    uint64_t cluster_offset_mask; -    uint64_t l1_table_offset; -    uint64_t *l1_table; -    uint64_t *l2_cache; -    uint64_t l2_cache_offsets[L2_CACHE_SIZE]; -    uint32_t l2_cache_counts[L2_CACHE_SIZE]; -    uint8_t *cluster_cache; -    uint8_t *cluster_data; -    uint64_t cluster_cache_offset; -    uint32_t crypt_method; /* current crypt method, 0 if no key yet */ -    uint32_t crypt_method_header; -    AES_KEY aes_encrypt_key; -    AES_KEY aes_decrypt_key; -    CoMutex lock; -    Error *migration_blocker; -} BDRVQcowState; - -static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); - -static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) -{ -    const QCowHeader *cow_header = (const void *)buf; - -    if (buf_size >= sizeof(QCowHeader) && -        be32_to_cpu(cow_header->magic) == QCOW_MAGIC && -        be32_to_cpu(cow_header->version) == QCOW_VERSION) -        return 100; -    else -        return 0; -} - -static int qcow_open(BlockDriverState *bs, QDict *options, int flags) -{ -    BDRVQcowState *s = bs->opaque; -    int len, i, shift, ret; -    QCowHeader header; - -    ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); -    if (ret < 0) { -        goto fail; -    } -    be32_to_cpus(&header.magic); -    be32_to_cpus(&header.version); -    be64_to_cpus(&header.backing_file_offset); -    be32_to_cpus(&header.backing_file_size); -    be32_to_cpus(&header.mtime); -    be64_to_cpus(&header.size); -    be32_to_cpus(&header.crypt_method); -    be64_to_cpus(&header.l1_table_offset); - -    if (header.magic != QCOW_MAGIC) { -        ret = -EMEDIUMTYPE; -        goto fail; -    } -    if (header.version != QCOW_VERSION) { -        char version[64]; -        snprintf(version, sizeof(version), "QCOW version %d", header.version); -        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, -            bs->device_name, "qcow", version); -        ret = -ENOTSUP; -        goto fail; -    } - -    if (header.size <= 1 || header.cluster_bits < 9) { -        ret = -EINVAL; -        goto fail; -    } -    if (header.crypt_method > QCOW_CRYPT_AES) { -        ret = -EINVAL; -        goto fail; -    } -    s->crypt_method_header = header.crypt_method; -    if (s->crypt_method_header) { -        bs->encrypted = 1; -    } -    s->cluster_bits = header.cluster_bits; -    s->cluster_size = 1 << s->cluster_bits; -    s->cluster_sectors = 1 << (s->cluster_bits - 9); -    s->l2_bits = header.l2_bits; -    s->l2_size = 1 << s->l2_bits; -    bs->total_sectors = header.size / 512; -    s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; - -    /* read the level 1 table */ -    shift = s->cluster_bits + s->l2_bits; -    s->l1_size = (header.size + (1LL << shift) - 1) >> shift; - -    s->l1_table_offset = header.l1_table_offset; -    s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); - -    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, -               s->l1_size * sizeof(uint64_t)); -    if (ret < 0) { -        goto fail; -    } - -    for(i = 0;i < s->l1_size; i++) { -        be64_to_cpus(&s->l1_table[i]); -    } -    /* alloc L2 cache */ -    s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); -    s->cluster_cache = g_malloc(s->cluster_size); -    s->cluster_data = g_malloc(s->cluster_size); -    s->cluster_cache_offset = -1; - -    /* read the backing file name */ -    if (header.backing_file_offset != 0) { -        len = header.backing_file_size; -        if (len > 1023) { -            len = 1023; -        } -        ret = bdrv_pread(bs->file, header.backing_file_offset, -                   bs->backing_file, len); -        if (ret < 0) { -            goto fail; -        } -        bs->backing_file[len] = '\0'; -    } - -    /* Disable migration when qcow images are used */ -    error_set(&s->migration_blocker, -              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED, -              "qcow", bs->device_name, "live migration"); -    migrate_add_blocker(s->migration_blocker); - -    qemu_co_mutex_init(&s->lock); -    return 0; - - fail: -    g_free(s->l1_table); -    g_free(s->l2_cache); -    g_free(s->cluster_cache); -    g_free(s->cluster_data); -    return ret; -} - - -/* We have nothing to do for QCOW reopen, stubs just return - * success */ -static int qcow_reopen_prepare(BDRVReopenState *state, -                               BlockReopenQueue *queue, Error **errp) -{ -    return 0; -} - -static int qcow_set_key(BlockDriverState *bs, const char *key) -{ -    BDRVQcowState *s = bs->opaque; -    uint8_t keybuf[16]; -    int len, i; - -    memset(keybuf, 0, 16); -    len = strlen(key); -    if (len > 16) -        len = 16; -    /* XXX: we could compress the chars to 7 bits to increase -       entropy */ -    for(i = 0;i < len;i++) { -        keybuf[i] = key[i]; -    } -    s->crypt_method = s->crypt_method_header; - -    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) -        return -1; -    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) -        return -1; -    return 0; -} - -/* The crypt function is compatible with the linux cryptoloop -   algorithm for < 4 GB images. NOTE: out_buf == in_buf is -   supported */ -static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, -                            uint8_t *out_buf, const uint8_t *in_buf, -                            int nb_sectors, int enc, -                            const AES_KEY *key) -{ -    union { -        uint64_t ll[2]; -        uint8_t b[16]; -    } ivec; -    int i; - -    for(i = 0; i < nb_sectors; i++) { -        ivec.ll[0] = cpu_to_le64(sector_num); -        ivec.ll[1] = 0; -        AES_cbc_encrypt(in_buf, out_buf, 512, key, -                        ivec.b, enc); -        sector_num++; -        in_buf += 512; -        out_buf += 512; -    } -} - -/* 'allocate' is: - * - * 0 to not allocate. - * - * 1 to allocate a normal cluster (for sector indexes 'n_start' to - * 'n_end') - * - * 2 to allocate a compressed cluster of size - * 'compressed_size'. 'compressed_size' must be > 0 and < - * cluster_size - * - * return 0 if not allocated. - */ -static uint64_t get_cluster_offset(BlockDriverState *bs, -                                   uint64_t offset, int allocate, -                                   int compressed_size, -                                   int n_start, int n_end) -{ -    BDRVQcowState *s = bs->opaque; -    int min_index, i, j, l1_index, l2_index; -    uint64_t l2_offset, *l2_table, cluster_offset, tmp; -    uint32_t min_count; -    int new_l2_table; - -    l1_index = offset >> (s->l2_bits + s->cluster_bits); -    l2_offset = s->l1_table[l1_index]; -    new_l2_table = 0; -    if (!l2_offset) { -        if (!allocate) -            return 0; -        /* allocate a new l2 entry */ -        l2_offset = bdrv_getlength(bs->file); -        /* round to cluster size */ -        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); -        /* update the L1 entry */ -        s->l1_table[l1_index] = l2_offset; -        tmp = cpu_to_be64(l2_offset); -        if (bdrv_pwrite_sync(bs->file, -                s->l1_table_offset + l1_index * sizeof(tmp), -                &tmp, sizeof(tmp)) < 0) -            return 0; -        new_l2_table = 1; -    } -    for(i = 0; i < L2_CACHE_SIZE; i++) { -        if (l2_offset == s->l2_cache_offsets[i]) { -            /* increment the hit count */ -            if (++s->l2_cache_counts[i] == 0xffffffff) { -                for(j = 0; j < L2_CACHE_SIZE; j++) { -                    s->l2_cache_counts[j] >>= 1; -                } -            } -            l2_table = s->l2_cache + (i << s->l2_bits); -            goto found; -        } -    } -    /* not found: load a new entry in the least used one */ -    min_index = 0; -    min_count = 0xffffffff; -    for(i = 0; i < L2_CACHE_SIZE; i++) { -        if (s->l2_cache_counts[i] < min_count) { -            min_count = s->l2_cache_counts[i]; -            min_index = i; -        } -    } -    l2_table = s->l2_cache + (min_index << s->l2_bits); -    if (new_l2_table) { -        memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); -        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table, -                s->l2_size * sizeof(uint64_t)) < 0) -            return 0; -    } else { -        if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != -            s->l2_size * sizeof(uint64_t)) -            return 0; -    } -    s->l2_cache_offsets[min_index] = l2_offset; -    s->l2_cache_counts[min_index] = 1; - found: -    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); -    cluster_offset = be64_to_cpu(l2_table[l2_index]); -    if (!cluster_offset || -        ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { -        if (!allocate) -            return 0; -        /* allocate a new cluster */ -        if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && -            (n_end - n_start) < s->cluster_sectors) { -            /* if the cluster is already compressed, we must -               decompress it in the case it is not completely -               overwritten */ -            if (decompress_cluster(bs, cluster_offset) < 0) -                return 0; -            cluster_offset = bdrv_getlength(bs->file); -            cluster_offset = (cluster_offset + s->cluster_size - 1) & -                ~(s->cluster_size - 1); -            /* write the cluster content */ -            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) != -                s->cluster_size) -                return -1; -        } else { -            cluster_offset = bdrv_getlength(bs->file); -            if (allocate == 1) { -                /* round to cluster size */ -                cluster_offset = (cluster_offset + s->cluster_size - 1) & -                    ~(s->cluster_size - 1); -                bdrv_truncate(bs->file, cluster_offset + s->cluster_size); -                /* if encrypted, we must initialize the cluster -                   content which won't be written */ -                if (s->crypt_method && -                    (n_end - n_start) < s->cluster_sectors) { -                    uint64_t start_sect; -                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9; -                    memset(s->cluster_data + 512, 0x00, 512); -                    for(i = 0; i < s->cluster_sectors; i++) { -                        if (i < n_start || i >= n_end) { -                            encrypt_sectors(s, start_sect + i, -                                            s->cluster_data, -                                            s->cluster_data + 512, 1, 1, -                                            &s->aes_encrypt_key); -                            if (bdrv_pwrite(bs->file, cluster_offset + i * 512, -                                            s->cluster_data, 512) != 512) -                                return -1; -                        } -                    } -                } -            } else if (allocate == 2) { -                cluster_offset |= QCOW_OFLAG_COMPRESSED | -                    (uint64_t)compressed_size << (63 - s->cluster_bits); -            } -        } -        /* update L2 table */ -        tmp = cpu_to_be64(cluster_offset); -        l2_table[l2_index] = tmp; -        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp), -                &tmp, sizeof(tmp)) < 0) -            return 0; -    } -    return cluster_offset; -} - -static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs, -        int64_t sector_num, int nb_sectors, int *pnum) -{ -    BDRVQcowState *s = bs->opaque; -    int index_in_cluster, n; -    uint64_t cluster_offset; - -    qemu_co_mutex_lock(&s->lock); -    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); -    qemu_co_mutex_unlock(&s->lock); -    index_in_cluster = sector_num & (s->cluster_sectors - 1); -    n = s->cluster_sectors - index_in_cluster; -    if (n > nb_sectors) -        n = nb_sectors; -    *pnum = n; -    return (cluster_offset != 0); -} - -static int decompress_buffer(uint8_t *out_buf, int out_buf_size, -                             const uint8_t *buf, int buf_size) -{ -    z_stream strm1, *strm = &strm1; -    int ret, out_len; - -    memset(strm, 0, sizeof(*strm)); - -    strm->next_in = (uint8_t *)buf; -    strm->avail_in = buf_size; -    strm->next_out = out_buf; -    strm->avail_out = out_buf_size; - -    ret = inflateInit2(strm, -12); -    if (ret != Z_OK) -        return -1; -    ret = inflate(strm, Z_FINISH); -    out_len = strm->next_out - out_buf; -    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || -        out_len != out_buf_size) { -        inflateEnd(strm); -        return -1; -    } -    inflateEnd(strm); -    return 0; -} - -static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) -{ -    BDRVQcowState *s = bs->opaque; -    int ret, csize; -    uint64_t coffset; - -    coffset = cluster_offset & s->cluster_offset_mask; -    if (s->cluster_cache_offset != coffset) { -        csize = cluster_offset >> (63 - s->cluster_bits); -        csize &= (s->cluster_size - 1); -        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize); -        if (ret != csize) -            return -1; -        if (decompress_buffer(s->cluster_cache, s->cluster_size, -                              s->cluster_data, csize) < 0) { -            return -1; -        } -        s->cluster_cache_offset = coffset; -    } -    return 0; -} - -static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, -                         int nb_sectors, QEMUIOVector *qiov) -{ -    BDRVQcowState *s = bs->opaque; -    int index_in_cluster; -    int ret = 0, n; -    uint64_t cluster_offset; -    struct iovec hd_iov; -    QEMUIOVector hd_qiov; -    uint8_t *buf; -    void *orig_buf; - -    if (qiov->niov > 1) { -        buf = orig_buf = qemu_blockalign(bs, qiov->size); -    } else { -        orig_buf = NULL; -        buf = (uint8_t *)qiov->iov->iov_base; -    } - -    qemu_co_mutex_lock(&s->lock); - -    while (nb_sectors != 0) { -        /* prepare next request */ -        cluster_offset = get_cluster_offset(bs, sector_num << 9, -                                                 0, 0, 0, 0); -        index_in_cluster = sector_num & (s->cluster_sectors - 1); -        n = s->cluster_sectors - index_in_cluster; -        if (n > nb_sectors) { -            n = nb_sectors; -        } - -        if (!cluster_offset) { -            if (bs->backing_hd) { -                /* read from the base image */ -                hd_iov.iov_base = (void *)buf; -                hd_iov.iov_len = n * 512; -                qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); -                qemu_co_mutex_unlock(&s->lock); -                ret = bdrv_co_readv(bs->backing_hd, sector_num, -                                    n, &hd_qiov); -                qemu_co_mutex_lock(&s->lock); -                if (ret < 0) { -                    goto fail; -                } -            } else { -                /* Note: in this case, no need to wait */ -                memset(buf, 0, 512 * n); -            } -        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { -            /* add AIO support for compressed blocks ? */ -            if (decompress_cluster(bs, cluster_offset) < 0) { -                goto fail; -            } -            memcpy(buf, -                   s->cluster_cache + index_in_cluster * 512, 512 * n); -        } else { -            if ((cluster_offset & 511) != 0) { -                goto fail; -            } -            hd_iov.iov_base = (void *)buf; -            hd_iov.iov_len = n * 512; -            qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); -            qemu_co_mutex_unlock(&s->lock); -            ret = bdrv_co_readv(bs->file, -                                (cluster_offset >> 9) + index_in_cluster, -                                n, &hd_qiov); -            qemu_co_mutex_lock(&s->lock); -            if (ret < 0) { -                break; -            } -            if (s->crypt_method) { -                encrypt_sectors(s, sector_num, buf, buf, -                                n, 0, -                                &s->aes_decrypt_key); -            } -        } -        ret = 0; - -        nb_sectors -= n; -        sector_num += n; -        buf += n * 512; -    } - -done: -    qemu_co_mutex_unlock(&s->lock); - -    if (qiov->niov > 1) { -        qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size); -        qemu_vfree(orig_buf); -    } - -    return ret; - -fail: -    ret = -EIO; -    goto done; -} - -static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, -                          int nb_sectors, QEMUIOVector *qiov) -{ -    BDRVQcowState *s = bs->opaque; -    int index_in_cluster; -    uint64_t cluster_offset; -    const uint8_t *src_buf; -    int ret = 0, n; -    uint8_t *cluster_data = NULL; -    struct iovec hd_iov; -    QEMUIOVector hd_qiov; -    uint8_t *buf; -    void *orig_buf; - -    s->cluster_cache_offset = -1; /* disable compressed cache */ - -    if (qiov->niov > 1) { -        buf = orig_buf = qemu_blockalign(bs, qiov->size); -        qemu_iovec_to_buf(qiov, 0, buf, qiov->size); -    } else { -        orig_buf = NULL; -        buf = (uint8_t *)qiov->iov->iov_base; -    } - -    qemu_co_mutex_lock(&s->lock); - -    while (nb_sectors != 0) { - -        index_in_cluster = sector_num & (s->cluster_sectors - 1); -        n = s->cluster_sectors - index_in_cluster; -        if (n > nb_sectors) { -            n = nb_sectors; -        } -        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0, -                                            index_in_cluster, -                                            index_in_cluster + n); -        if (!cluster_offset || (cluster_offset & 511) != 0) { -            ret = -EIO; -            break; -        } -        if (s->crypt_method) { -            if (!cluster_data) { -                cluster_data = g_malloc0(s->cluster_size); -            } -            encrypt_sectors(s, sector_num, cluster_data, buf, -                            n, 1, &s->aes_encrypt_key); -            src_buf = cluster_data; -        } else { -            src_buf = buf; -        } - -        hd_iov.iov_base = (void *)src_buf; -        hd_iov.iov_len = n * 512; -        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); -        qemu_co_mutex_unlock(&s->lock); -        ret = bdrv_co_writev(bs->file, -                             (cluster_offset >> 9) + index_in_cluster, -                             n, &hd_qiov); -        qemu_co_mutex_lock(&s->lock); -        if (ret < 0) { -            break; -        } -        ret = 0; - -        nb_sectors -= n; -        sector_num += n; -        buf += n * 512; -    } -    qemu_co_mutex_unlock(&s->lock); - -    if (qiov->niov > 1) { -        qemu_vfree(orig_buf); -    } -    g_free(cluster_data); - -    return ret; -} - -static void qcow_close(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; - -    g_free(s->l1_table); -    g_free(s->l2_cache); -    g_free(s->cluster_cache); -    g_free(s->cluster_data); - -    migrate_del_blocker(s->migration_blocker); -    error_free(s->migration_blocker); -} - -static int qcow_create(const char *filename, QEMUOptionParameter *options) -{ -    int header_size, backing_filename_len, l1_size, shift, i; -    QCowHeader header; -    uint8_t *tmp; -    int64_t total_size = 0; -    const char *backing_file = NULL; -    int flags = 0; -    int ret; -    BlockDriverState *qcow_bs; - -    /* Read out options */ -    while (options && options->name) { -        if (!strcmp(options->name, BLOCK_OPT_SIZE)) { -            total_size = options->value.n / 512; -        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { -            backing_file = options->value.s; -        } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) { -            flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0; -        } -        options++; -    } - -    ret = bdrv_create_file(filename, options); -    if (ret < 0) { -        return ret; -    } - -    ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR); -    if (ret < 0) { -        return ret; -    } - -    ret = bdrv_truncate(qcow_bs, 0); -    if (ret < 0) { -        goto exit; -    } - -    memset(&header, 0, sizeof(header)); -    header.magic = cpu_to_be32(QCOW_MAGIC); -    header.version = cpu_to_be32(QCOW_VERSION); -    header.size = cpu_to_be64(total_size * 512); -    header_size = sizeof(header); -    backing_filename_len = 0; -    if (backing_file) { -        if (strcmp(backing_file, "fat:")) { -            header.backing_file_offset = cpu_to_be64(header_size); -            backing_filename_len = strlen(backing_file); -            header.backing_file_size = cpu_to_be32(backing_filename_len); -            header_size += backing_filename_len; -        } else { -            /* special backing file for vvfat */ -            backing_file = NULL; -        } -        header.cluster_bits = 9; /* 512 byte cluster to avoid copying -                                    unmodifyed sectors */ -        header.l2_bits = 12; /* 32 KB L2 tables */ -    } else { -        header.cluster_bits = 12; /* 4 KB clusters */ -        header.l2_bits = 9; /* 4 KB L2 tables */ -    } -    header_size = (header_size + 7) & ~7; -    shift = header.cluster_bits + header.l2_bits; -    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift; - -    header.l1_table_offset = cpu_to_be64(header_size); -    if (flags & BLOCK_FLAG_ENCRYPT) { -        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); -    } else { -        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); -    } - -    /* write all the data */ -    ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header)); -    if (ret != sizeof(header)) { -        goto exit; -    } - -    if (backing_file) { -        ret = bdrv_pwrite(qcow_bs, sizeof(header), -            backing_file, backing_filename_len); -        if (ret != backing_filename_len) { -            goto exit; -        } -    } - -    tmp = g_malloc0(BDRV_SECTOR_SIZE); -    for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/ -        BDRV_SECTOR_SIZE); i++) { -        ret = bdrv_pwrite(qcow_bs, header_size + -            BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE); -        if (ret != BDRV_SECTOR_SIZE) { -            g_free(tmp); -            goto exit; -        } -    } - -    g_free(tmp); -    ret = 0; -exit: -    bdrv_delete(qcow_bs); -    return ret; -} - -static int qcow_make_empty(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    uint32_t l1_length = s->l1_size * sizeof(uint64_t); -    int ret; - -    memset(s->l1_table, 0, l1_length); -    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table, -            l1_length) < 0) -        return -1; -    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length); -    if (ret < 0) -        return ret; - -    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); -    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); -    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); - -    return 0; -} - -/* XXX: put compressed sectors first, then all the cluster aligned -   tables to avoid losing bytes in alignment */ -static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, -                                 const uint8_t *buf, int nb_sectors) -{ -    BDRVQcowState *s = bs->opaque; -    z_stream strm; -    int ret, out_len; -    uint8_t *out_buf; -    uint64_t cluster_offset; - -    if (nb_sectors != s->cluster_sectors) { -        ret = -EINVAL; - -        /* Zero-pad last write if image size is not cluster aligned */ -        if (sector_num + nb_sectors == bs->total_sectors && -            nb_sectors < s->cluster_sectors) { -            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size); -            memset(pad_buf, 0, s->cluster_size); -            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE); -            ret = qcow_write_compressed(bs, sector_num, -                                        pad_buf, s->cluster_sectors); -            qemu_vfree(pad_buf); -        } -        return ret; -    } - -    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); - -    /* best compression, small window, no zlib header */ -    memset(&strm, 0, sizeof(strm)); -    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, -                       Z_DEFLATED, -12, -                       9, Z_DEFAULT_STRATEGY); -    if (ret != 0) { -        ret = -EINVAL; -        goto fail; -    } - -    strm.avail_in = s->cluster_size; -    strm.next_in = (uint8_t *)buf; -    strm.avail_out = s->cluster_size; -    strm.next_out = out_buf; - -    ret = deflate(&strm, Z_FINISH); -    if (ret != Z_STREAM_END && ret != Z_OK) { -        deflateEnd(&strm); -        ret = -EINVAL; -        goto fail; -    } -    out_len = strm.next_out - out_buf; - -    deflateEnd(&strm); - -    if (ret != Z_STREAM_END || out_len >= s->cluster_size) { -        /* could not compress: write normal cluster */ -        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); -        if (ret < 0) { -            goto fail; -        } -    } else { -        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, -                                            out_len, 0, 0); -        if (cluster_offset == 0) { -            ret = -EIO; -            goto fail; -        } - -        cluster_offset &= s->cluster_offset_mask; -        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); -        if (ret < 0) { -            goto fail; -        } -    } - -    ret = 0; -fail: -    g_free(out_buf); -    return ret; -} - -static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) -{ -    BDRVQcowState *s = bs->opaque; -    bdi->cluster_size = s->cluster_size; -    return 0; -} - - -static QEMUOptionParameter qcow_create_options[] = { -    { -        .name = BLOCK_OPT_SIZE, -        .type = OPT_SIZE, -        .help = "Virtual disk size" -    }, -    { -        .name = BLOCK_OPT_BACKING_FILE, -        .type = OPT_STRING, -        .help = "File name of a base image" -    }, -    { -        .name = BLOCK_OPT_ENCRYPT, -        .type = OPT_FLAG, -        .help = "Encrypt the image" -    }, -    { NULL } -}; - -static BlockDriver bdrv_qcow = { -    .format_name	= "qcow", -    .instance_size	= sizeof(BDRVQcowState), -    .bdrv_probe		= qcow_probe, -    .bdrv_open		= qcow_open, -    .bdrv_close		= qcow_close, -    .bdrv_reopen_prepare = qcow_reopen_prepare, -    .bdrv_create	= qcow_create, -    .bdrv_has_zero_init     = bdrv_has_zero_init_1, - -    .bdrv_co_readv          = qcow_co_readv, -    .bdrv_co_writev         = qcow_co_writev, -    .bdrv_co_is_allocated   = qcow_co_is_allocated, - -    .bdrv_set_key           = qcow_set_key, -    .bdrv_make_empty        = qcow_make_empty, -    .bdrv_write_compressed  = qcow_write_compressed, -    .bdrv_get_info          = qcow_get_info, - -    .create_options = qcow_create_options, -}; - -static void bdrv_qcow_init(void) -{ -    bdrv_register(&bdrv_qcow); -} - -block_init(bdrv_qcow_init); diff --git a/contrib/qemu/block/qcow2-cache.c b/contrib/qemu/block/qcow2-cache.c deleted file mode 100644 index 2f3114ecc24..00000000000 --- a/contrib/qemu/block/qcow2-cache.c +++ /dev/null @@ -1,323 +0,0 @@ -/* - * L2/refcount table cache for the QCOW2 format - * - * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "block/block_int.h" -#include "qemu-common.h" -#include "qcow2.h" -#include "trace.h" - -typedef struct Qcow2CachedTable { -    void*   table; -    int64_t offset; -    bool    dirty; -    int     cache_hits; -    int     ref; -} Qcow2CachedTable; - -struct Qcow2Cache { -    Qcow2CachedTable*       entries; -    struct Qcow2Cache*      depends; -    int                     size; -    bool                    depends_on_flush; -}; - -Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) -{ -    BDRVQcowState *s = bs->opaque; -    Qcow2Cache *c; -    int i; - -    c = g_malloc0(sizeof(*c)); -    c->size = num_tables; -    c->entries = g_malloc0(sizeof(*c->entries) * num_tables); - -    for (i = 0; i < c->size; i++) { -        c->entries[i].table = qemu_blockalign(bs, s->cluster_size); -    } - -    return c; -} - -int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c) -{ -    int i; - -    for (i = 0; i < c->size; i++) { -        assert(c->entries[i].ref == 0); -        qemu_vfree(c->entries[i].table); -    } - -    g_free(c->entries); -    g_free(c); - -    return 0; -} - -static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) -{ -    int ret; - -    ret = qcow2_cache_flush(bs, c->depends); -    if (ret < 0) { -        return ret; -    } - -    c->depends = NULL; -    c->depends_on_flush = false; - -    return 0; -} - -static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) -{ -    BDRVQcowState *s = bs->opaque; -    int ret = 0; - -    if (!c->entries[i].dirty || !c->entries[i].offset) { -        return 0; -    } - -    trace_qcow2_cache_entry_flush(qemu_coroutine_self(), -                                  c == s->l2_table_cache, i); - -    if (c->depends) { -        ret = qcow2_cache_flush_dependency(bs, c); -    } else if (c->depends_on_flush) { -        ret = bdrv_flush(bs->file); -        if (ret >= 0) { -            c->depends_on_flush = false; -        } -    } - -    if (ret < 0) { -        return ret; -    } - -    if (c == s->refcount_block_cache) { -        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); -    } else if (c == s->l2_table_cache) { -        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); -    } - -    ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table, -        s->cluster_size); -    if (ret < 0) { -        return ret; -    } - -    c->entries[i].dirty = false; - -    return 0; -} - -int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) -{ -    BDRVQcowState *s = bs->opaque; -    int result = 0; -    int ret; -    int i; - -    trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache); - -    for (i = 0; i < c->size; i++) { -        ret = qcow2_cache_entry_flush(bs, c, i); -        if (ret < 0 && result != -ENOSPC) { -            result = ret; -        } -    } - -    if (result == 0) { -        ret = bdrv_flush(bs->file); -        if (ret < 0) { -            result = ret; -        } -    } - -    return result; -} - -int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, -    Qcow2Cache *dependency) -{ -    int ret; - -    if (dependency->depends) { -        ret = qcow2_cache_flush_dependency(bs, dependency); -        if (ret < 0) { -            return ret; -        } -    } - -    if (c->depends && (c->depends != dependency)) { -        ret = qcow2_cache_flush_dependency(bs, c); -        if (ret < 0) { -            return ret; -        } -    } - -    c->depends = dependency; -    return 0; -} - -void qcow2_cache_depends_on_flush(Qcow2Cache *c) -{ -    c->depends_on_flush = true; -} - -static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c) -{ -    int i; -    int min_count = INT_MAX; -    int min_index = -1; - - -    for (i = 0; i < c->size; i++) { -        if (c->entries[i].ref) { -            continue; -        } - -        if (c->entries[i].cache_hits < min_count) { -            min_index = i; -            min_count = c->entries[i].cache_hits; -        } - -        /* Give newer hits priority */ -        /* TODO Check how to optimize the replacement strategy */ -        c->entries[i].cache_hits /= 2; -    } - -    if (min_index == -1) { -        /* This can't happen in current synchronous code, but leave the check -         * here as a reminder for whoever starts using AIO with the cache */ -        abort(); -    } -    return min_index; -} - -static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, -    uint64_t offset, void **table, bool read_from_disk) -{ -    BDRVQcowState *s = bs->opaque; -    int i; -    int ret; - -    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache, -                          offset, read_from_disk); - -    /* Check if the table is already cached */ -    for (i = 0; i < c->size; i++) { -        if (c->entries[i].offset == offset) { -            goto found; -        } -    } - -    /* If not, write a table back and replace it */ -    i = qcow2_cache_find_entry_to_replace(c); -    trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(), -                                        c == s->l2_table_cache, i); -    if (i < 0) { -        return i; -    } - -    ret = qcow2_cache_entry_flush(bs, c, i); -    if (ret < 0) { -        return ret; -    } - -    trace_qcow2_cache_get_read(qemu_coroutine_self(), -                               c == s->l2_table_cache, i); -    c->entries[i].offset = 0; -    if (read_from_disk) { -        if (c == s->l2_table_cache) { -            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); -        } - -        ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size); -        if (ret < 0) { -            return ret; -        } -    } - -    /* Give the table some hits for the start so that it won't be replaced -     * immediately. The number 32 is completely arbitrary. */ -    c->entries[i].cache_hits = 32; -    c->entries[i].offset = offset; - -    /* And return the right table */ -found: -    c->entries[i].cache_hits++; -    c->entries[i].ref++; -    *table = c->entries[i].table; - -    trace_qcow2_cache_get_done(qemu_coroutine_self(), -                               c == s->l2_table_cache, i); - -    return 0; -} - -int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, -    void **table) -{ -    return qcow2_cache_do_get(bs, c, offset, table, true); -} - -int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, -    void **table) -{ -    return qcow2_cache_do_get(bs, c, offset, table, false); -} - -int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table) -{ -    int i; - -    for (i = 0; i < c->size; i++) { -        if (c->entries[i].table == *table) { -            goto found; -        } -    } -    return -ENOENT; - -found: -    c->entries[i].ref--; -    *table = NULL; - -    assert(c->entries[i].ref >= 0); -    return 0; -} - -void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table) -{ -    int i; - -    for (i = 0; i < c->size; i++) { -        if (c->entries[i].table == table) { -            goto found; -        } -    } -    abort(); - -found: -    c->entries[i].dirty = true; -} diff --git a/contrib/qemu/block/qcow2-cluster.c b/contrib/qemu/block/qcow2-cluster.c deleted file mode 100644 index cca76d4fcdd..00000000000 --- a/contrib/qemu/block/qcow2-cluster.c +++ /dev/null @@ -1,1478 +0,0 @@ -/* - * Block driver for the QCOW version 2 format - * - * Copyright (c) 2004-2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include <zlib.h> - -#include "qemu-common.h" -#include "block/block_int.h" -#include "block/qcow2.h" -#include "trace.h" - -int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, -                        bool exact_size) -{ -    BDRVQcowState *s = bs->opaque; -    int new_l1_size2, ret, i; -    uint64_t *new_l1_table; -    int64_t new_l1_table_offset, new_l1_size; -    uint8_t data[12]; - -    if (min_size <= s->l1_size) -        return 0; - -    if (exact_size) { -        new_l1_size = min_size; -    } else { -        /* Bump size up to reduce the number of times we have to grow */ -        new_l1_size = s->l1_size; -        if (new_l1_size == 0) { -            new_l1_size = 1; -        } -        while (min_size > new_l1_size) { -            new_l1_size = (new_l1_size * 3 + 1) / 2; -        } -    } - -    if (new_l1_size > INT_MAX) { -        return -EFBIG; -    } - -#ifdef DEBUG_ALLOC2 -    fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", -            s->l1_size, new_l1_size); -#endif - -    new_l1_size2 = sizeof(uint64_t) * new_l1_size; -    new_l1_table = g_malloc0(align_offset(new_l1_size2, 512)); -    memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); - -    /* write new table (align to cluster) */ -    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); -    new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); -    if (new_l1_table_offset < 0) { -        g_free(new_l1_table); -        return new_l1_table_offset; -    } - -    ret = qcow2_cache_flush(bs, s->refcount_block_cache); -    if (ret < 0) { -        goto fail; -    } - -    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); -    for(i = 0; i < s->l1_size; i++) -        new_l1_table[i] = cpu_to_be64(new_l1_table[i]); -    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2); -    if (ret < 0) -        goto fail; -    for(i = 0; i < s->l1_size; i++) -        new_l1_table[i] = be64_to_cpu(new_l1_table[i]); - -    /* set new table */ -    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); -    cpu_to_be32w((uint32_t*)data, new_l1_size); -    cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset); -    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data)); -    if (ret < 0) { -        goto fail; -    } -    g_free(s->l1_table); -    qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t), -                        QCOW2_DISCARD_OTHER); -    s->l1_table_offset = new_l1_table_offset; -    s->l1_table = new_l1_table; -    s->l1_size = new_l1_size; -    return 0; - fail: -    g_free(new_l1_table); -    qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, -                        QCOW2_DISCARD_OTHER); -    return ret; -} - -/* - * l2_load - * - * Loads a L2 table into memory. If the table is in the cache, the cache - * is used; otherwise the L2 table is loaded from the image file. - * - * Returns a pointer to the L2 table on success, or NULL if the read from - * the image file failed. - */ - -static int l2_load(BlockDriverState *bs, uint64_t l2_offset, -    uint64_t **l2_table) -{ -    BDRVQcowState *s = bs->opaque; -    int ret; - -    ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table); - -    return ret; -} - -/* - * Writes one sector of the L1 table to the disk (can't update single entries - * and we really don't want bdrv_pread to perform a read-modify-write) - */ -#define L1_ENTRIES_PER_SECTOR (512 / 8) -static int write_l1_entry(BlockDriverState *bs, int l1_index) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t buf[L1_ENTRIES_PER_SECTOR]; -    int l1_start_index; -    int i, ret; - -    l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1); -    for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) { -        buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); -    } - -    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); -    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index, -        buf, sizeof(buf)); -    if (ret < 0) { -        return ret; -    } - -    return 0; -} - -/* - * l2_allocate - * - * Allocate a new l2 entry in the file. If l1_index points to an already - * used entry in the L2 table (i.e. we are doing a copy on write for the L2 - * table) copy the contents of the old L2 table into the newly allocated one. - * Otherwise the new table is initialized with zeros. - * - */ - -static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t old_l2_offset; -    uint64_t *l2_table; -    int64_t l2_offset; -    int ret; - -    old_l2_offset = s->l1_table[l1_index]; - -    trace_qcow2_l2_allocate(bs, l1_index); - -    /* allocate a new l2 entry */ - -    l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); -    if (l2_offset < 0) { -        return l2_offset; -    } - -    ret = qcow2_cache_flush(bs, s->refcount_block_cache); -    if (ret < 0) { -        goto fail; -    } - -    /* allocate a new entry in the l2 cache */ - -    trace_qcow2_l2_allocate_get_empty(bs, l1_index); -    ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); -    if (ret < 0) { -        return ret; -    } - -    l2_table = *table; - -    if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { -        /* if there was no old l2 table, clear the new table */ -        memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); -    } else { -        uint64_t* old_table; - -        /* if there was an old l2 table, read it from the disk */ -        BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); -        ret = qcow2_cache_get(bs, s->l2_table_cache, -            old_l2_offset & L1E_OFFSET_MASK, -            (void**) &old_table); -        if (ret < 0) { -            goto fail; -        } - -        memcpy(l2_table, old_table, s->cluster_size); - -        ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table); -        if (ret < 0) { -            goto fail; -        } -    } - -    /* write the l2 table to the file */ -    BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); - -    trace_qcow2_l2_allocate_write_l2(bs, l1_index); -    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); -    ret = qcow2_cache_flush(bs, s->l2_table_cache); -    if (ret < 0) { -        goto fail; -    } - -    /* update the L1 entry */ -    trace_qcow2_l2_allocate_write_l1(bs, l1_index); -    s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; -    ret = write_l1_entry(bs, l1_index); -    if (ret < 0) { -        goto fail; -    } - -    *table = l2_table; -    trace_qcow2_l2_allocate_done(bs, l1_index, 0); -    return 0; - -fail: -    trace_qcow2_l2_allocate_done(bs, l1_index, ret); -    qcow2_cache_put(bs, s->l2_table_cache, (void**) table); -    s->l1_table[l1_index] = old_l2_offset; -    return ret; -} - -/* - * Checks how many clusters in a given L2 table are contiguous in the image - * file. As soon as one of the flags in the bitmask stop_flags changes compared - * to the first cluster, the search is stopped and the cluster is not counted - * as contiguous. (This allows it, for example, to stop at the first compressed - * cluster which may require a different handling) - */ -static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size, -        uint64_t *l2_table, uint64_t start, uint64_t stop_flags) -{ -    int i; -    uint64_t mask = stop_flags | L2E_OFFSET_MASK; -    uint64_t offset = be64_to_cpu(l2_table[0]) & mask; - -    if (!offset) -        return 0; - -    for (i = start; i < start + nb_clusters; i++) { -        uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; -        if (offset + (uint64_t) i * cluster_size != l2_entry) { -            break; -        } -    } - -	return (i - start); -} - -static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table) -{ -    int i; - -    for (i = 0; i < nb_clusters; i++) { -        int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); - -        if (type != QCOW2_CLUSTER_UNALLOCATED) { -            break; -        } -    } - -    return i; -} - -/* The crypt function is compatible with the linux cryptoloop -   algorithm for < 4 GB images. NOTE: out_buf == in_buf is -   supported */ -void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, -                           uint8_t *out_buf, const uint8_t *in_buf, -                           int nb_sectors, int enc, -                           const AES_KEY *key) -{ -    union { -        uint64_t ll[2]; -        uint8_t b[16]; -    } ivec; -    int i; - -    for(i = 0; i < nb_sectors; i++) { -        ivec.ll[0] = cpu_to_le64(sector_num); -        ivec.ll[1] = 0; -        AES_cbc_encrypt(in_buf, out_buf, 512, key, -                        ivec.b, enc); -        sector_num++; -        in_buf += 512; -        out_buf += 512; -    } -} - -static int coroutine_fn copy_sectors(BlockDriverState *bs, -                                     uint64_t start_sect, -                                     uint64_t cluster_offset, -                                     int n_start, int n_end) -{ -    BDRVQcowState *s = bs->opaque; -    QEMUIOVector qiov; -    struct iovec iov; -    int n, ret; - -    /* -     * If this is the last cluster and it is only partially used, we must only -     * copy until the end of the image, or bdrv_check_request will fail for the -     * bdrv_read/write calls below. -     */ -    if (start_sect + n_end > bs->total_sectors) { -        n_end = bs->total_sectors - start_sect; -    } - -    n = n_end - n_start; -    if (n <= 0) { -        return 0; -    } - -    iov.iov_len = n * BDRV_SECTOR_SIZE; -    iov.iov_base = qemu_blockalign(bs, iov.iov_len); - -    qemu_iovec_init_external(&qiov, &iov, 1); - -    BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); - -    /* Call .bdrv_co_readv() directly instead of using the public block-layer -     * interface.  This avoids double I/O throttling and request tracking, -     * which can lead to deadlock when block layer copy-on-read is enabled. -     */ -    ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov); -    if (ret < 0) { -        goto out; -    } - -    if (s->crypt_method) { -        qcow2_encrypt_sectors(s, start_sect + n_start, -                        iov.iov_base, iov.iov_base, n, 1, -                        &s->aes_encrypt_key); -    } - -    BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); -    ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov); -    if (ret < 0) { -        goto out; -    } - -    ret = 0; -out: -    qemu_vfree(iov.iov_base); -    return ret; -} - - -/* - * get_cluster_offset - * - * For a given offset of the disk image, find the cluster offset in - * qcow2 file. The offset is stored in *cluster_offset. - * - * on entry, *num is the number of contiguous sectors we'd like to - * access following offset. - * - * on exit, *num is the number of contiguous sectors we can read. - * - * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error - * cases. - */ -int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, -    int *num, uint64_t *cluster_offset) -{ -    BDRVQcowState *s = bs->opaque; -    unsigned int l2_index; -    uint64_t l1_index, l2_offset, *l2_table; -    int l1_bits, c; -    unsigned int index_in_cluster, nb_clusters; -    uint64_t nb_available, nb_needed; -    int ret; - -    index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); -    nb_needed = *num + index_in_cluster; - -    l1_bits = s->l2_bits + s->cluster_bits; - -    /* compute how many bytes there are between the offset and -     * the end of the l1 entry -     */ - -    nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1)); - -    /* compute the number of available sectors */ - -    nb_available = (nb_available >> 9) + index_in_cluster; - -    if (nb_needed > nb_available) { -        nb_needed = nb_available; -    } - -    *cluster_offset = 0; - -    /* seek the the l2 offset in the l1 table */ - -    l1_index = offset >> l1_bits; -    if (l1_index >= s->l1_size) { -        ret = QCOW2_CLUSTER_UNALLOCATED; -        goto out; -    } - -    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; -    if (!l2_offset) { -        ret = QCOW2_CLUSTER_UNALLOCATED; -        goto out; -    } - -    /* load the l2 table in memory */ - -    ret = l2_load(bs, l2_offset, &l2_table); -    if (ret < 0) { -        return ret; -    } - -    /* find the cluster offset for the given disk offset */ - -    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); -    *cluster_offset = be64_to_cpu(l2_table[l2_index]); -    nb_clusters = size_to_clusters(s, nb_needed << 9); - -    ret = qcow2_get_cluster_type(*cluster_offset); -    switch (ret) { -    case QCOW2_CLUSTER_COMPRESSED: -        /* Compressed clusters can only be processed one by one */ -        c = 1; -        *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; -        break; -    case QCOW2_CLUSTER_ZERO: -        if (s->qcow_version < 3) { -            return -EIO; -        } -        c = count_contiguous_clusters(nb_clusters, s->cluster_size, -                &l2_table[l2_index], 0, -                QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO); -        *cluster_offset = 0; -        break; -    case QCOW2_CLUSTER_UNALLOCATED: -        /* how many empty clusters ? */ -        c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); -        *cluster_offset = 0; -        break; -    case QCOW2_CLUSTER_NORMAL: -        /* how many allocated clusters ? */ -        c = count_contiguous_clusters(nb_clusters, s->cluster_size, -                &l2_table[l2_index], 0, -                QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO); -        *cluster_offset &= L2E_OFFSET_MASK; -        break; -    default: -        abort(); -    } - -    qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); - -    nb_available = (c * s->cluster_sectors); - -out: -    if (nb_available > nb_needed) -        nb_available = nb_needed; - -    *num = nb_available - index_in_cluster; - -    return ret; -} - -/* - * get_cluster_table - * - * for a given disk offset, load (and allocate if needed) - * the l2 table. - * - * the l2 table offset in the qcow2 file and the cluster index - * in the l2 table are given to the caller. - * - * Returns 0 on success, -errno in failure case - */ -static int get_cluster_table(BlockDriverState *bs, uint64_t offset, -                             uint64_t **new_l2_table, -                             int *new_l2_index) -{ -    BDRVQcowState *s = bs->opaque; -    unsigned int l2_index; -    uint64_t l1_index, l2_offset; -    uint64_t *l2_table = NULL; -    int ret; - -    /* seek the the l2 offset in the l1 table */ - -    l1_index = offset >> (s->l2_bits + s->cluster_bits); -    if (l1_index >= s->l1_size) { -        ret = qcow2_grow_l1_table(bs, l1_index + 1, false); -        if (ret < 0) { -            return ret; -        } -    } - -    assert(l1_index < s->l1_size); -    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; - -    /* seek the l2 table of the given l2 offset */ - -    if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) { -        /* load the l2 table in memory */ -        ret = l2_load(bs, l2_offset, &l2_table); -        if (ret < 0) { -            return ret; -        } -    } else { -        /* First allocate a new L2 table (and do COW if needed) */ -        ret = l2_allocate(bs, l1_index, &l2_table); -        if (ret < 0) { -            return ret; -        } - -        /* Then decrease the refcount of the old table */ -        if (l2_offset) { -            qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), -                                QCOW2_DISCARD_OTHER); -        } -    } - -    /* find the cluster offset for the given disk offset */ - -    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); - -    *new_l2_table = l2_table; -    *new_l2_index = l2_index; - -    return 0; -} - -/* - * alloc_compressed_cluster_offset - * - * For a given offset of the disk image, return cluster offset in - * qcow2 file. - * - * If the offset is not found, allocate a new compressed cluster. - * - * Return the cluster offset if successful, - * Return 0, otherwise. - * - */ - -uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, -                                               uint64_t offset, -                                               int compressed_size) -{ -    BDRVQcowState *s = bs->opaque; -    int l2_index, ret; -    uint64_t *l2_table; -    int64_t cluster_offset; -    int nb_csectors; - -    ret = get_cluster_table(bs, offset, &l2_table, &l2_index); -    if (ret < 0) { -        return 0; -    } - -    /* Compression can't overwrite anything. Fail if the cluster was already -     * allocated. */ -    cluster_offset = be64_to_cpu(l2_table[l2_index]); -    if (cluster_offset & L2E_OFFSET_MASK) { -        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -        return 0; -    } - -    cluster_offset = qcow2_alloc_bytes(bs, compressed_size); -    if (cluster_offset < 0) { -        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -        return 0; -    } - -    nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - -                  (cluster_offset >> 9); - -    cluster_offset |= QCOW_OFLAG_COMPRESSED | -                      ((uint64_t)nb_csectors << s->csize_shift); - -    /* update L2 table */ - -    /* compressed clusters never have the copied flag */ - -    BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); -    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); -    l2_table[l2_index] = cpu_to_be64(cluster_offset); -    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -    if (ret < 0) { -        return 0; -    } - -    return cluster_offset; -} - -static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) -{ -    BDRVQcowState *s = bs->opaque; -    int ret; - -    if (r->nb_sectors == 0) { -        return 0; -    } - -    qemu_co_mutex_unlock(&s->lock); -    ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, -                       r->offset / BDRV_SECTOR_SIZE, -                       r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); -    qemu_co_mutex_lock(&s->lock); - -    if (ret < 0) { -        return ret; -    } - -    /* -     * Before we update the L2 table to actually point to the new cluster, we -     * need to be sure that the refcounts have been increased and COW was -     * handled. -     */ -    qcow2_cache_depends_on_flush(s->l2_table_cache); - -    return 0; -} - -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) -{ -    BDRVQcowState *s = bs->opaque; -    int i, j = 0, l2_index, ret; -    uint64_t *old_cluster, *l2_table; -    uint64_t cluster_offset = m->alloc_offset; - -    trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); -    assert(m->nb_clusters > 0); - -    old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); - -    /* copy content of unmodified sectors */ -    ret = perform_cow(bs, m, &m->cow_start); -    if (ret < 0) { -        goto err; -    } - -    ret = perform_cow(bs, m, &m->cow_end); -    if (ret < 0) { -        goto err; -    } - -    /* Update L2 table. */ -    if (s->use_lazy_refcounts) { -        qcow2_mark_dirty(bs); -    } -    if (qcow2_need_accurate_refcounts(s)) { -        qcow2_cache_set_dependency(bs, s->l2_table_cache, -                                   s->refcount_block_cache); -    } - -    ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); -    if (ret < 0) { -        goto err; -    } -    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); - -    for (i = 0; i < m->nb_clusters; i++) { -        /* if two concurrent writes happen to the same unallocated cluster -	 * each write allocates separate cluster and writes data concurrently. -	 * The first one to complete updates l2 table with pointer to its -	 * cluster the second one has to do RMW (which is done above by -	 * copy_sectors()), update l2 table with its cluster pointer and free -	 * old cluster. This is what this loop does */ -        if(l2_table[l2_index + i] != 0) -            old_cluster[j++] = l2_table[l2_index + i]; - -        l2_table[l2_index + i] = cpu_to_be64((cluster_offset + -                    (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); -     } - - -    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -    if (ret < 0) { -        goto err; -    } - -    /* -     * If this was a COW, we need to decrease the refcount of the old cluster. -     * Also flush bs->file to get the right order for L2 and refcount update. -     * -     * Don't discard clusters that reach a refcount of 0 (e.g. compressed -     * clusters), the next write will reuse them anyway. -     */ -    if (j != 0) { -        for (i = 0; i < j; i++) { -            qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, -                                    QCOW2_DISCARD_NEVER); -        } -    } - -    ret = 0; -err: -    g_free(old_cluster); -    return ret; - } - -/* - * Returns the number of contiguous clusters that can be used for an allocating - * write, but require COW to be performed (this includes yet unallocated space, - * which must copy from the backing file) - */ -static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, -    uint64_t *l2_table, int l2_index) -{ -    int i; - -    for (i = 0; i < nb_clusters; i++) { -        uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); -        int cluster_type = qcow2_get_cluster_type(l2_entry); - -        switch(cluster_type) { -        case QCOW2_CLUSTER_NORMAL: -            if (l2_entry & QCOW_OFLAG_COPIED) { -                goto out; -            } -            break; -        case QCOW2_CLUSTER_UNALLOCATED: -        case QCOW2_CLUSTER_COMPRESSED: -        case QCOW2_CLUSTER_ZERO: -            break; -        default: -            abort(); -        } -    } - -out: -    assert(i <= nb_clusters); -    return i; -} - -/* - * Check if there already is an AIO write request in flight which allocates - * the same cluster. In this case we need to wait until the previous - * request has completed and updated the L2 table accordingly. - * - * Returns: - *   0       if there was no dependency. *cur_bytes indicates the number of - *           bytes from guest_offset that can be read before the next - *           dependency must be processed (or the request is complete) - * - *   -EAGAIN if we had to wait for another request, previously gathered - *           information on cluster allocation may be invalid now. The caller - *           must start over anyway, so consider *cur_bytes undefined. - */ -static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, -    uint64_t *cur_bytes, QCowL2Meta **m) -{ -    BDRVQcowState *s = bs->opaque; -    QCowL2Meta *old_alloc; -    uint64_t bytes = *cur_bytes; - -    QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { - -        uint64_t start = guest_offset; -        uint64_t end = start + bytes; -        uint64_t old_start = l2meta_cow_start(old_alloc); -        uint64_t old_end = l2meta_cow_end(old_alloc); - -        if (end <= old_start || start >= old_end) { -            /* No intersection */ -        } else { -            if (start < old_start) { -                /* Stop at the start of a running allocation */ -                bytes = old_start - start; -            } else { -                bytes = 0; -            } - -            /* Stop if already an l2meta exists. After yielding, it wouldn't -             * be valid any more, so we'd have to clean up the old L2Metas -             * and deal with requests depending on them before starting to -             * gather new ones. Not worth the trouble. */ -            if (bytes == 0 && *m) { -                *cur_bytes = 0; -                return 0; -            } - -            if (bytes == 0) { -                /* Wait for the dependency to complete. We need to recheck -                 * the free/allocated clusters when we continue. */ -                qemu_co_mutex_unlock(&s->lock); -                qemu_co_queue_wait(&old_alloc->dependent_requests); -                qemu_co_mutex_lock(&s->lock); -                return -EAGAIN; -            } -        } -    } - -    /* Make sure that existing clusters and new allocations are only used up to -     * the next dependency if we shortened the request above */ -    *cur_bytes = bytes; - -    return 0; -} - -/* - * Checks how many already allocated clusters that don't require a copy on - * write there are at the given guest_offset (up to *bytes). If - * *host_offset is not zero, only physically contiguous clusters beginning at - * this host offset are counted. - * - * Note that guest_offset may not be cluster aligned. In this case, the - * returned *host_offset points to exact byte referenced by guest_offset and - * therefore isn't cluster aligned as well. - * - * Returns: - *   0:     if no allocated clusters are available at the given offset. - *          *bytes is normally unchanged. It is set to 0 if the cluster - *          is allocated and doesn't need COW, but doesn't have the right - *          physical offset. - * - *   1:     if allocated clusters that don't require a COW are available at - *          the requested offset. *bytes may have decreased and describes - *          the length of the area that can be written to. - * - *  -errno: in error cases - */ -static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, -    uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) -{ -    BDRVQcowState *s = bs->opaque; -    int l2_index; -    uint64_t cluster_offset; -    uint64_t *l2_table; -    unsigned int nb_clusters; -    unsigned int keep_clusters; -    int ret, pret; - -    trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, -                              *bytes); - -    assert(*host_offset == 0 ||    offset_into_cluster(s, guest_offset) -                                == offset_into_cluster(s, *host_offset)); - -    /* -     * Calculate the number of clusters to look for. We stop at L2 table -     * boundaries to keep things simple. -     */ -    nb_clusters = -        size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); - -    l2_index = offset_to_l2_index(s, guest_offset); -    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); - -    /* Find L2 entry for the first involved cluster */ -    ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); -    if (ret < 0) { -        return ret; -    } - -    cluster_offset = be64_to_cpu(l2_table[l2_index]); - -    /* Check how many clusters are already allocated and don't need COW */ -    if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL -        && (cluster_offset & QCOW_OFLAG_COPIED)) -    { -        /* If a specific host_offset is required, check it */ -        bool offset_matches = -            (cluster_offset & L2E_OFFSET_MASK) == *host_offset; - -        if (*host_offset != 0 && !offset_matches) { -            *bytes = 0; -            ret = 0; -            goto out; -        } - -        /* We keep all QCOW_OFLAG_COPIED clusters */ -        keep_clusters = -            count_contiguous_clusters(nb_clusters, s->cluster_size, -                                      &l2_table[l2_index], 0, -                                      QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); -        assert(keep_clusters <= nb_clusters); - -        *bytes = MIN(*bytes, -                 keep_clusters * s->cluster_size -                 - offset_into_cluster(s, guest_offset)); - -        ret = 1; -    } else { -        ret = 0; -    } - -    /* Cleanup */ -out: -    pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -    if (pret < 0) { -        return pret; -    } - -    /* Only return a host offset if we actually made progress. Otherwise we -     * would make requirements for handle_alloc() that it can't fulfill */ -    if (ret) { -        *host_offset = (cluster_offset & L2E_OFFSET_MASK) -                     + offset_into_cluster(s, guest_offset); -    } - -    return ret; -} - -/* - * Allocates new clusters for the given guest_offset. - * - * At most *nb_clusters are allocated, and on return *nb_clusters is updated to - * contain the number of clusters that have been allocated and are contiguous - * in the image file. - * - * If *host_offset is non-zero, it specifies the offset in the image file at - * which the new clusters must start. *nb_clusters can be 0 on return in this - * case if the cluster at host_offset is already in use. If *host_offset is - * zero, the clusters can be allocated anywhere in the image file. - * - * *host_offset is updated to contain the offset into the image file at which - * the first allocated cluster starts. - * - * Return 0 on success and -errno in error cases. -EAGAIN means that the - * function has been waiting for another request and the allocation must be - * restarted, but the whole request should not be failed. - */ -static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, -    uint64_t *host_offset, unsigned int *nb_clusters) -{ -    BDRVQcowState *s = bs->opaque; - -    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, -                                         *host_offset, *nb_clusters); - -    /* Allocate new clusters */ -    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); -    if (*host_offset == 0) { -        int64_t cluster_offset = -            qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); -        if (cluster_offset < 0) { -            return cluster_offset; -        } -        *host_offset = cluster_offset; -        return 0; -    } else { -        int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); -        if (ret < 0) { -            return ret; -        } -        *nb_clusters = ret; -        return 0; -    } -} - -/* - * Allocates new clusters for an area that either is yet unallocated or needs a - * copy on write. If *host_offset is non-zero, clusters are only allocated if - * the new allocation can match the specified host offset. - * - * Note that guest_offset may not be cluster aligned. In this case, the - * returned *host_offset points to exact byte referenced by guest_offset and - * therefore isn't cluster aligned as well. - * - * Returns: - *   0:     if no clusters could be allocated. *bytes is set to 0, - *          *host_offset is left unchanged. - * - *   1:     if new clusters were allocated. *bytes may be decreased if the - *          new allocation doesn't cover all of the requested area. - *          *host_offset is updated to contain the host offset of the first - *          newly allocated cluster. - * - *  -errno: in error cases - */ -static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, -    uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) -{ -    BDRVQcowState *s = bs->opaque; -    int l2_index; -    uint64_t *l2_table; -    uint64_t entry; -    unsigned int nb_clusters; -    int ret; - -    uint64_t alloc_cluster_offset; - -    trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, -                             *bytes); -    assert(*bytes > 0); - -    /* -     * Calculate the number of clusters to look for. We stop at L2 table -     * boundaries to keep things simple. -     */ -    nb_clusters = -        size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); - -    l2_index = offset_to_l2_index(s, guest_offset); -    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); - -    /* Find L2 entry for the first involved cluster */ -    ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); -    if (ret < 0) { -        return ret; -    } - -    entry = be64_to_cpu(l2_table[l2_index]); - -    /* For the moment, overwrite compressed clusters one by one */ -    if (entry & QCOW_OFLAG_COMPRESSED) { -        nb_clusters = 1; -    } else { -        nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); -    } - -    /* This function is only called when there were no non-COW clusters, so if -     * we can't find any unallocated or COW clusters either, something is -     * wrong with our code. */ -    assert(nb_clusters > 0); - -    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -    if (ret < 0) { -        return ret; -    } - -    /* Allocate, if necessary at a given offset in the image file */ -    alloc_cluster_offset = start_of_cluster(s, *host_offset); -    ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, -                                  &nb_clusters); -    if (ret < 0) { -        goto fail; -    } - -    /* Can't extend contiguous allocation */ -    if (nb_clusters == 0) { -        *bytes = 0; -        return 0; -    } - -    /* -     * Save info needed for meta data update. -     * -     * requested_sectors: Number of sectors from the start of the first -     * newly allocated cluster to the end of the (possibly shortened -     * before) write request. -     * -     * avail_sectors: Number of sectors from the start of the first -     * newly allocated to the end of the last newly allocated cluster. -     * -     * nb_sectors: The number of sectors from the start of the first -     * newly allocated cluster to the end of the area that the write -     * request actually writes to (excluding COW at the end) -     */ -    int requested_sectors = -        (*bytes + offset_into_cluster(s, guest_offset)) -        >> BDRV_SECTOR_BITS; -    int avail_sectors = nb_clusters -                        << (s->cluster_bits - BDRV_SECTOR_BITS); -    int alloc_n_start = offset_into_cluster(s, guest_offset) -                        >> BDRV_SECTOR_BITS; -    int nb_sectors = MIN(requested_sectors, avail_sectors); -    QCowL2Meta *old_m = *m; - -    *m = g_malloc0(sizeof(**m)); - -    **m = (QCowL2Meta) { -        .next           = old_m, - -        .alloc_offset   = alloc_cluster_offset, -        .offset         = start_of_cluster(s, guest_offset), -        .nb_clusters    = nb_clusters, -        .nb_available   = nb_sectors, - -        .cow_start = { -            .offset     = 0, -            .nb_sectors = alloc_n_start, -        }, -        .cow_end = { -            .offset     = nb_sectors * BDRV_SECTOR_SIZE, -            .nb_sectors = avail_sectors - nb_sectors, -        }, -    }; -    qemu_co_queue_init(&(*m)->dependent_requests); -    QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); - -    *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); -    *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE) -                         - offset_into_cluster(s, guest_offset)); -    assert(*bytes != 0); - -    return 1; - -fail: -    if (*m && (*m)->nb_clusters > 0) { -        QLIST_REMOVE(*m, next_in_flight); -    } -    return ret; -} - -/* - * alloc_cluster_offset - * - * For a given offset on the virtual disk, find the cluster offset in qcow2 - * file. If the offset is not found, allocate a new cluster. - * - * If the cluster was already allocated, m->nb_clusters is set to 0 and - * other fields in m are meaningless. - * - * If the cluster is newly allocated, m->nb_clusters is set to the number of - * contiguous clusters that have been allocated. In this case, the other - * fields of m are valid and contain information about the first allocated - * cluster. - * - * If the request conflicts with another write request in flight, the coroutine - * is queued and will be reentered when the dependency has completed. - * - * Return 0 on success and -errno in error cases - */ -int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, -    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t start, remaining; -    uint64_t cluster_offset; -    uint64_t cur_bytes; -    int ret; - -    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, -                                      n_start, n_end); - -    assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset)); -    offset = start_of_cluster(s, offset); - -again: -    start = offset + (n_start << BDRV_SECTOR_BITS); -    remaining = (n_end - n_start) << BDRV_SECTOR_BITS; -    cluster_offset = 0; -    *host_offset = 0; -    cur_bytes = 0; -    *m = NULL; - -    while (true) { - -        if (!*host_offset) { -            *host_offset = start_of_cluster(s, cluster_offset); -        } - -        assert(remaining >= cur_bytes); - -        start           += cur_bytes; -        remaining       -= cur_bytes; -        cluster_offset  += cur_bytes; - -        if (remaining == 0) { -            break; -        } - -        cur_bytes = remaining; - -        /* -         * Now start gathering as many contiguous clusters as possible: -         * -         * 1. Check for overlaps with in-flight allocations -         * -         *      a) Overlap not in the first cluster -> shorten this request and -         *         let the caller handle the rest in its next loop iteration. -         * -         *      b) Real overlaps of two requests. Yield and restart the search -         *         for contiguous clusters (the situation could have changed -         *         while we were sleeping) -         * -         *      c) TODO: Request starts in the same cluster as the in-flight -         *         allocation ends. Shorten the COW of the in-fight allocation, -         *         set cluster_offset to write to the same cluster and set up -         *         the right synchronisation between the in-flight request and -         *         the new one. -         */ -        ret = handle_dependencies(bs, start, &cur_bytes, m); -        if (ret == -EAGAIN) { -            /* Currently handle_dependencies() doesn't yield if we already had -             * an allocation. If it did, we would have to clean up the L2Meta -             * structs before starting over. */ -            assert(*m == NULL); -            goto again; -        } else if (ret < 0) { -            return ret; -        } else if (cur_bytes == 0) { -            break; -        } else { -            /* handle_dependencies() may have decreased cur_bytes (shortened -             * the allocations below) so that the next dependency is processed -             * correctly during the next loop iteration. */ -        } - -        /* -         * 2. Count contiguous COPIED clusters. -         */ -        ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); -        if (ret < 0) { -            return ret; -        } else if (ret) { -            continue; -        } else if (cur_bytes == 0) { -            break; -        } - -        /* -         * 3. If the request still hasn't completed, allocate new clusters, -         *    considering any cluster_offset of steps 1c or 2. -         */ -        ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); -        if (ret < 0) { -            return ret; -        } else if (ret) { -            continue; -        } else { -            assert(cur_bytes == 0); -            break; -        } -    } - -    *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS); -    assert(*num > 0); -    assert(*host_offset != 0); - -    return 0; -} - -static int decompress_buffer(uint8_t *out_buf, int out_buf_size, -                             const uint8_t *buf, int buf_size) -{ -    z_stream strm1, *strm = &strm1; -    int ret, out_len; - -    memset(strm, 0, sizeof(*strm)); - -    strm->next_in = (uint8_t *)buf; -    strm->avail_in = buf_size; -    strm->next_out = out_buf; -    strm->avail_out = out_buf_size; - -    ret = inflateInit2(strm, -12); -    if (ret != Z_OK) -        return -1; -    ret = inflate(strm, Z_FINISH); -    out_len = strm->next_out - out_buf; -    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || -        out_len != out_buf_size) { -        inflateEnd(strm); -        return -1; -    } -    inflateEnd(strm); -    return 0; -} - -int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) -{ -    BDRVQcowState *s = bs->opaque; -    int ret, csize, nb_csectors, sector_offset; -    uint64_t coffset; - -    coffset = cluster_offset & s->cluster_offset_mask; -    if (s->cluster_cache_offset != coffset) { -        nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; -        sector_offset = coffset & 511; -        csize = nb_csectors * 512 - sector_offset; -        BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); -        ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors); -        if (ret < 0) { -            return ret; -        } -        if (decompress_buffer(s->cluster_cache, s->cluster_size, -                              s->cluster_data + sector_offset, csize) < 0) { -            return -EIO; -        } -        s->cluster_cache_offset = coffset; -    } -    return 0; -} - -/* - * This discards as many clusters of nb_clusters as possible at once (i.e. - * all clusters in the same L2 table) and returns the number of discarded - * clusters. - */ -static int discard_single_l2(BlockDriverState *bs, uint64_t offset, -    unsigned int nb_clusters) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t *l2_table; -    int l2_index; -    int ret; -    int i; - -    ret = get_cluster_table(bs, offset, &l2_table, &l2_index); -    if (ret < 0) { -        return ret; -    } - -    /* Limit nb_clusters to one L2 table */ -    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); - -    for (i = 0; i < nb_clusters; i++) { -        uint64_t old_offset; - -        old_offset = be64_to_cpu(l2_table[l2_index + i]); -        if ((old_offset & L2E_OFFSET_MASK) == 0) { -            continue; -        } - -        /* First remove L2 entries */ -        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); -        l2_table[l2_index + i] = cpu_to_be64(0); - -        /* Then decrease the refcount */ -        qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); -    } - -    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -    if (ret < 0) { -        return ret; -    } - -    return nb_clusters; -} - -int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, -    int nb_sectors) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t end_offset; -    unsigned int nb_clusters; -    int ret; - -    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); - -    /* Round start up and end down */ -    offset = align_offset(offset, s->cluster_size); -    end_offset &= ~(s->cluster_size - 1); - -    if (offset > end_offset) { -        return 0; -    } - -    nb_clusters = size_to_clusters(s, end_offset - offset); - -    s->cache_discards = true; - -    /* Each L2 table is handled by its own loop iteration */ -    while (nb_clusters > 0) { -        ret = discard_single_l2(bs, offset, nb_clusters); -        if (ret < 0) { -            goto fail; -        } - -        nb_clusters -= ret; -        offset += (ret * s->cluster_size); -    } - -    ret = 0; -fail: -    s->cache_discards = false; -    qcow2_process_discards(bs, ret); - -    return ret; -} - -/* - * This zeroes as many clusters of nb_clusters as possible at once (i.e. - * all clusters in the same L2 table) and returns the number of zeroed - * clusters. - */ -static int zero_single_l2(BlockDriverState *bs, uint64_t offset, -    unsigned int nb_clusters) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t *l2_table; -    int l2_index; -    int ret; -    int i; - -    ret = get_cluster_table(bs, offset, &l2_table, &l2_index); -    if (ret < 0) { -        return ret; -    } - -    /* Limit nb_clusters to one L2 table */ -    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); - -    for (i = 0; i < nb_clusters; i++) { -        uint64_t old_offset; - -        old_offset = be64_to_cpu(l2_table[l2_index + i]); - -        /* Update L2 entries */ -        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); -        if (old_offset & QCOW_OFLAG_COMPRESSED) { -            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); -            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); -        } else { -            l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); -        } -    } - -    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -    if (ret < 0) { -        return ret; -    } - -    return nb_clusters; -} - -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) -{ -    BDRVQcowState *s = bs->opaque; -    unsigned int nb_clusters; -    int ret; - -    /* The zero flag is only supported by version 3 and newer */ -    if (s->qcow_version < 3) { -        return -ENOTSUP; -    } - -    /* Each L2 table is handled by its own loop iteration */ -    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); - -    s->cache_discards = true; - -    while (nb_clusters > 0) { -        ret = zero_single_l2(bs, offset, nb_clusters); -        if (ret < 0) { -            goto fail; -        } - -        nb_clusters -= ret; -        offset += (ret * s->cluster_size); -    } - -    ret = 0; -fail: -    s->cache_discards = false; -    qcow2_process_discards(bs, ret); - -    return ret; -} diff --git a/contrib/qemu/block/qcow2-refcount.c b/contrib/qemu/block/qcow2-refcount.c deleted file mode 100644 index 1244693f39e..00000000000 --- a/contrib/qemu/block/qcow2-refcount.c +++ /dev/null @@ -1,1374 +0,0 @@ -/* - * Block driver for the QCOW version 2 format - * - * Copyright (c) 2004-2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu-common.h" -#include "block/block_int.h" -#include "block/qcow2.h" - -static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size); -static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, -                            int64_t offset, int64_t length, -                            int addend, enum qcow2_discard_type type); - - -/*********************************************************/ -/* refcount handling */ - -int qcow2_refcount_init(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    int ret, refcount_table_size2, i; - -    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); -    s->refcount_table = g_malloc(refcount_table_size2); -    if (s->refcount_table_size > 0) { -        BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD); -        ret = bdrv_pread(bs->file, s->refcount_table_offset, -                         s->refcount_table, refcount_table_size2); -        if (ret != refcount_table_size2) -            goto fail; -        for(i = 0; i < s->refcount_table_size; i++) -            be64_to_cpus(&s->refcount_table[i]); -    } -    return 0; - fail: -    return -ENOMEM; -} - -void qcow2_refcount_close(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    g_free(s->refcount_table); -} - - -static int load_refcount_block(BlockDriverState *bs, -                               int64_t refcount_block_offset, -                               void **refcount_block) -{ -    BDRVQcowState *s = bs->opaque; -    int ret; - -    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD); -    ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, -        refcount_block); - -    return ret; -} - -/* - * Returns the refcount of the cluster given by its index. Any non-negative - * return value is the refcount of the cluster, negative values are -errno - * and indicate an error. - */ -static int get_refcount(BlockDriverState *bs, int64_t cluster_index) -{ -    BDRVQcowState *s = bs->opaque; -    int refcount_table_index, block_index; -    int64_t refcount_block_offset; -    int ret; -    uint16_t *refcount_block; -    uint16_t refcount; - -    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); -    if (refcount_table_index >= s->refcount_table_size) -        return 0; -    refcount_block_offset = s->refcount_table[refcount_table_index]; -    if (!refcount_block_offset) -        return 0; - -    ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, -        (void**) &refcount_block); -    if (ret < 0) { -        return ret; -    } - -    block_index = cluster_index & -        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); -    refcount = be16_to_cpu(refcount_block[block_index]); - -    ret = qcow2_cache_put(bs, s->refcount_block_cache, -        (void**) &refcount_block); -    if (ret < 0) { -        return ret; -    } - -    return refcount; -} - -/* - * Rounds the refcount table size up to avoid growing the table for each single - * refcount block that is allocated. - */ -static unsigned int next_refcount_table_size(BDRVQcowState *s, -    unsigned int min_size) -{ -    unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1; -    unsigned int refcount_table_clusters = -        MAX(1, s->refcount_table_size >> (s->cluster_bits - 3)); - -    while (min_clusters > refcount_table_clusters) { -        refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2; -    } - -    return refcount_table_clusters << (s->cluster_bits - 3); -} - - -/* Checks if two offsets are described by the same refcount block */ -static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a, -    uint64_t offset_b) -{ -    uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT); -    uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT); - -    return (block_a == block_b); -} - -/* - * Loads a refcount block. If it doesn't exist yet, it is allocated first - * (including growing the refcount table if needed). - * - * Returns 0 on success or -errno in error case - */ -static int alloc_refcount_block(BlockDriverState *bs, -    int64_t cluster_index, uint16_t **refcount_block) -{ -    BDRVQcowState *s = bs->opaque; -    unsigned int refcount_table_index; -    int ret; - -    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); - -    /* Find the refcount block for the given cluster */ -    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); - -    if (refcount_table_index < s->refcount_table_size) { - -        uint64_t refcount_block_offset = -            s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; - -        /* If it's already there, we're done */ -        if (refcount_block_offset) { -             return load_refcount_block(bs, refcount_block_offset, -                 (void**) refcount_block); -        } -    } - -    /* -     * If we came here, we need to allocate something. Something is at least -     * a cluster for the new refcount block. It may also include a new refcount -     * table if the old refcount table is too small. -     * -     * Note that allocating clusters here needs some special care: -     * -     * - We can't use the normal qcow2_alloc_clusters(), it would try to -     *   increase the refcount and very likely we would end up with an endless -     *   recursion. Instead we must place the refcount blocks in a way that -     *   they can describe them themselves. -     * -     * - We need to consider that at this point we are inside update_refcounts -     *   and doing the initial refcount increase. This means that some clusters -     *   have already been allocated by the caller, but their refcount isn't -     *   accurate yet. free_cluster_index tells us where this allocation ends -     *   as long as we don't overwrite it by freeing clusters. -     * -     * - alloc_clusters_noref and qcow2_free_clusters may load a different -     *   refcount block into the cache -     */ - -    *refcount_block = NULL; - -    /* We write to the refcount table, so we might depend on L2 tables */ -    ret = qcow2_cache_flush(bs, s->l2_table_cache); -    if (ret < 0) { -        return ret; -    } - -    /* Allocate the refcount block itself and mark it as used */ -    int64_t new_block = alloc_clusters_noref(bs, s->cluster_size); -    if (new_block < 0) { -        return new_block; -    } - -#ifdef DEBUG_ALLOC2 -    fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64 -        " at %" PRIx64 "\n", -        refcount_table_index, cluster_index << s->cluster_bits, new_block); -#endif - -    if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) { -        /* Zero the new refcount block before updating it */ -        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, -            (void**) refcount_block); -        if (ret < 0) { -            goto fail_block; -        } - -        memset(*refcount_block, 0, s->cluster_size); - -        /* The block describes itself, need to update the cache */ -        int block_index = (new_block >> s->cluster_bits) & -            ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); -        (*refcount_block)[block_index] = cpu_to_be16(1); -    } else { -        /* Described somewhere else. This can recurse at most twice before we -         * arrive at a block that describes itself. */ -        ret = update_refcount(bs, new_block, s->cluster_size, 1, -                              QCOW2_DISCARD_NEVER); -        if (ret < 0) { -            goto fail_block; -        } - -        ret = qcow2_cache_flush(bs, s->refcount_block_cache); -        if (ret < 0) { -            goto fail_block; -        } - -        /* Initialize the new refcount block only after updating its refcount, -         * update_refcount uses the refcount cache itself */ -        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, -            (void**) refcount_block); -        if (ret < 0) { -            goto fail_block; -        } - -        memset(*refcount_block, 0, s->cluster_size); -    } - -    /* Now the new refcount block needs to be written to disk */ -    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE); -    qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block); -    ret = qcow2_cache_flush(bs, s->refcount_block_cache); -    if (ret < 0) { -        goto fail_block; -    } - -    /* If the refcount table is big enough, just hook the block up there */ -    if (refcount_table_index < s->refcount_table_size) { -        uint64_t data64 = cpu_to_be64(new_block); -        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP); -        ret = bdrv_pwrite_sync(bs->file, -            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t), -            &data64, sizeof(data64)); -        if (ret < 0) { -            goto fail_block; -        } - -        s->refcount_table[refcount_table_index] = new_block; -        return 0; -    } - -    ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block); -    if (ret < 0) { -        goto fail_block; -    } - -    /* -     * If we come here, we need to grow the refcount table. Again, a new -     * refcount table needs some space and we can't simply allocate to avoid -     * endless recursion. -     * -     * Therefore let's grab new refcount blocks at the end of the image, which -     * will describe themselves and the new refcount table. This way we can -     * reference them only in the new table and do the switch to the new -     * refcount table at once without producing an inconsistent state in -     * between. -     */ -    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW); - -    /* Calculate the number of refcount blocks needed so far */ -    uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT); -    uint64_t blocks_used = (s->free_cluster_index + -        refcount_block_clusters - 1) / refcount_block_clusters; - -    /* And now we need at least one block more for the new metadata */ -    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1); -    uint64_t last_table_size; -    uint64_t blocks_clusters; -    do { -        uint64_t table_clusters = -            size_to_clusters(s, table_size * sizeof(uint64_t)); -        blocks_clusters = 1 + -            ((table_clusters + refcount_block_clusters - 1) -            / refcount_block_clusters); -        uint64_t meta_clusters = table_clusters + blocks_clusters; - -        last_table_size = table_size; -        table_size = next_refcount_table_size(s, blocks_used + -            ((meta_clusters + refcount_block_clusters - 1) -            / refcount_block_clusters)); - -    } while (last_table_size != table_size); - -#ifdef DEBUG_ALLOC2 -    fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n", -        s->refcount_table_size, table_size); -#endif - -    /* Create the new refcount table and blocks */ -    uint64_t meta_offset = (blocks_used * refcount_block_clusters) * -        s->cluster_size; -    uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size; -    uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size); -    uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t)); - -    assert(meta_offset >= (s->free_cluster_index * s->cluster_size)); - -    /* Fill the new refcount table */ -    memcpy(new_table, s->refcount_table, -        s->refcount_table_size * sizeof(uint64_t)); -    new_table[refcount_table_index] = new_block; - -    int i; -    for (i = 0; i < blocks_clusters; i++) { -        new_table[blocks_used + i] = meta_offset + (i * s->cluster_size); -    } - -    /* Fill the refcount blocks */ -    uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t)); -    int block = 0; -    for (i = 0; i < table_clusters + blocks_clusters; i++) { -        new_blocks[block++] = cpu_to_be16(1); -    } - -    /* Write refcount blocks to disk */ -    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS); -    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks, -        blocks_clusters * s->cluster_size); -    g_free(new_blocks); -    if (ret < 0) { -        goto fail_table; -    } - -    /* Write refcount table to disk */ -    for(i = 0; i < table_size; i++) { -        cpu_to_be64s(&new_table[i]); -    } - -    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE); -    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table, -        table_size * sizeof(uint64_t)); -    if (ret < 0) { -        goto fail_table; -    } - -    for(i = 0; i < table_size; i++) { -        be64_to_cpus(&new_table[i]); -    } - -    /* Hook up the new refcount table in the qcow2 header */ -    uint8_t data[12]; -    cpu_to_be64w((uint64_t*)data, table_offset); -    cpu_to_be32w((uint32_t*)(data + 8), table_clusters); -    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE); -    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset), -        data, sizeof(data)); -    if (ret < 0) { -        goto fail_table; -    } - -    /* And switch it in memory */ -    uint64_t old_table_offset = s->refcount_table_offset; -    uint64_t old_table_size = s->refcount_table_size; - -    g_free(s->refcount_table); -    s->refcount_table = new_table; -    s->refcount_table_size = table_size; -    s->refcount_table_offset = table_offset; - -    /* Free old table. Remember, we must not change free_cluster_index */ -    uint64_t old_free_cluster_index = s->free_cluster_index; -    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t), -                        QCOW2_DISCARD_OTHER); -    s->free_cluster_index = old_free_cluster_index; - -    ret = load_refcount_block(bs, new_block, (void**) refcount_block); -    if (ret < 0) { -        return ret; -    } - -    return 0; - -fail_table: -    g_free(new_table); -fail_block: -    if (*refcount_block != NULL) { -        qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block); -    } -    return ret; -} - -void qcow2_process_discards(BlockDriverState *bs, int ret) -{ -    BDRVQcowState *s = bs->opaque; -    Qcow2DiscardRegion *d, *next; - -    QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) { -        QTAILQ_REMOVE(&s->discards, d, next); - -        /* Discard is optional, ignore the return value */ -        if (ret >= 0) { -            bdrv_discard(bs->file, -                         d->offset >> BDRV_SECTOR_BITS, -                         d->bytes >> BDRV_SECTOR_BITS); -        } - -        g_free(d); -    } -} - -static void update_refcount_discard(BlockDriverState *bs, -                                    uint64_t offset, uint64_t length) -{ -    BDRVQcowState *s = bs->opaque; -    Qcow2DiscardRegion *d, *p, *next; - -    QTAILQ_FOREACH(d, &s->discards, next) { -        uint64_t new_start = MIN(offset, d->offset); -        uint64_t new_end = MAX(offset + length, d->offset + d->bytes); - -        if (new_end - new_start <= length + d->bytes) { -            /* There can't be any overlap, areas ending up here have no -             * references any more and therefore shouldn't get freed another -             * time. */ -            assert(d->bytes + length == new_end - new_start); -            d->offset = new_start; -            d->bytes = new_end - new_start; -            goto found; -        } -    } - -    d = g_malloc(sizeof(*d)); -    *d = (Qcow2DiscardRegion) { -        .bs     = bs, -        .offset = offset, -        .bytes  = length, -    }; -    QTAILQ_INSERT_TAIL(&s->discards, d, next); - -found: -    /* Merge discard requests if they are adjacent now */ -    QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) { -        if (p == d -            || p->offset > d->offset + d->bytes -            || d->offset > p->offset + p->bytes) -        { -            continue; -        } - -        /* Still no overlap possible */ -        assert(p->offset == d->offset + d->bytes -            || d->offset == p->offset + p->bytes); - -        QTAILQ_REMOVE(&s->discards, p, next); -        d->offset = MIN(d->offset, p->offset); -        d->bytes += p->bytes; -    } -} - -/* XXX: cache several refcount block clusters ? */ -static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, -    int64_t offset, int64_t length, int addend, enum qcow2_discard_type type) -{ -    BDRVQcowState *s = bs->opaque; -    int64_t start, last, cluster_offset; -    uint16_t *refcount_block = NULL; -    int64_t old_table_index = -1; -    int ret; - -#ifdef DEBUG_ALLOC2 -    fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n", -           offset, length, addend); -#endif -    if (length < 0) { -        return -EINVAL; -    } else if (length == 0) { -        return 0; -    } - -    if (addend < 0) { -        qcow2_cache_set_dependency(bs, s->refcount_block_cache, -            s->l2_table_cache); -    } - -    start = offset & ~(s->cluster_size - 1); -    last = (offset + length - 1) & ~(s->cluster_size - 1); -    for(cluster_offset = start; cluster_offset <= last; -        cluster_offset += s->cluster_size) -    { -        int block_index, refcount; -        int64_t cluster_index = cluster_offset >> s->cluster_bits; -        int64_t table_index = -            cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); - -        /* Load the refcount block and allocate it if needed */ -        if (table_index != old_table_index) { -            if (refcount_block) { -                ret = qcow2_cache_put(bs, s->refcount_block_cache, -                    (void**) &refcount_block); -                if (ret < 0) { -                    goto fail; -                } -            } - -            ret = alloc_refcount_block(bs, cluster_index, &refcount_block); -            if (ret < 0) { -                goto fail; -            } -        } -        old_table_index = table_index; - -        qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block); - -        /* we can update the count and save it */ -        block_index = cluster_index & -            ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); - -        refcount = be16_to_cpu(refcount_block[block_index]); -        refcount += addend; -        if (refcount < 0 || refcount > 0xffff) { -            ret = -EINVAL; -            goto fail; -        } -        if (refcount == 0 && cluster_index < s->free_cluster_index) { -            s->free_cluster_index = cluster_index; -        } -        refcount_block[block_index] = cpu_to_be16(refcount); - -        if (refcount == 0 && s->discard_passthrough[type]) { -            update_refcount_discard(bs, cluster_offset, s->cluster_size); -        } -    } - -    ret = 0; -fail: -    if (!s->cache_discards) { -        qcow2_process_discards(bs, ret); -    } - -    /* Write last changed block to disk */ -    if (refcount_block) { -        int wret; -        wret = qcow2_cache_put(bs, s->refcount_block_cache, -            (void**) &refcount_block); -        if (wret < 0) { -            return ret < 0 ? ret : wret; -        } -    } - -    /* -     * Try do undo any updates if an error is returned (This may succeed in -     * some cases like ENOSPC for allocating a new refcount block) -     */ -    if (ret < 0) { -        int dummy; -        dummy = update_refcount(bs, offset, cluster_offset - offset, -addend, -                                QCOW2_DISCARD_NEVER); -        (void)dummy; -    } - -    return ret; -} - -/* - * Increases or decreases the refcount of a given cluster by one. - * addend must be 1 or -1. - * - * If the return value is non-negative, it is the new refcount of the cluster. - * If it is negative, it is -errno and indicates an error. - */ -static int update_cluster_refcount(BlockDriverState *bs, -                                   int64_t cluster_index, -                                   int addend, -                                   enum qcow2_discard_type type) -{ -    BDRVQcowState *s = bs->opaque; -    int ret; - -    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend, -                          type); -    if (ret < 0) { -        return ret; -    } - -    return get_refcount(bs, cluster_index); -} - - - -/*********************************************************/ -/* cluster allocation functions */ - - - -/* return < 0 if error */ -static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size) -{ -    BDRVQcowState *s = bs->opaque; -    int i, nb_clusters, refcount; - -    nb_clusters = size_to_clusters(s, size); -retry: -    for(i = 0; i < nb_clusters; i++) { -        int64_t next_cluster_index = s->free_cluster_index++; -        refcount = get_refcount(bs, next_cluster_index); - -        if (refcount < 0) { -            return refcount; -        } else if (refcount != 0) { -            goto retry; -        } -    } -#ifdef DEBUG_ALLOC2 -    fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n", -            size, -            (s->free_cluster_index - nb_clusters) << s->cluster_bits); -#endif -    return (s->free_cluster_index - nb_clusters) << s->cluster_bits; -} - -int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size) -{ -    int64_t offset; -    int ret; - -    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); -    offset = alloc_clusters_noref(bs, size); -    if (offset < 0) { -        return offset; -    } - -    ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER); -    if (ret < 0) { -        return ret; -    } - -    return offset; -} - -int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, -    int nb_clusters) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t cluster_index; -    uint64_t old_free_cluster_index; -    int i, refcount, ret; - -    /* Check how many clusters there are free */ -    cluster_index = offset >> s->cluster_bits; -    for(i = 0; i < nb_clusters; i++) { -        refcount = get_refcount(bs, cluster_index++); - -        if (refcount < 0) { -            return refcount; -        } else if (refcount != 0) { -            break; -        } -    } - -    /* And then allocate them */ -    old_free_cluster_index = s->free_cluster_index; -    s->free_cluster_index = cluster_index + i; - -    ret = update_refcount(bs, offset, i << s->cluster_bits, 1, -                          QCOW2_DISCARD_NEVER); -    if (ret < 0) { -        return ret; -    } - -    s->free_cluster_index = old_free_cluster_index; - -    return i; -} - -/* only used to allocate compressed sectors. We try to allocate -   contiguous sectors. size must be <= cluster_size */ -int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) -{ -    BDRVQcowState *s = bs->opaque; -    int64_t offset, cluster_offset; -    int free_in_cluster; - -    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES); -    assert(size > 0 && size <= s->cluster_size); -    if (s->free_byte_offset == 0) { -        offset = qcow2_alloc_clusters(bs, s->cluster_size); -        if (offset < 0) { -            return offset; -        } -        s->free_byte_offset = offset; -    } - redo: -    free_in_cluster = s->cluster_size - -        (s->free_byte_offset & (s->cluster_size - 1)); -    if (size <= free_in_cluster) { -        /* enough space in current cluster */ -        offset = s->free_byte_offset; -        s->free_byte_offset += size; -        free_in_cluster -= size; -        if (free_in_cluster == 0) -            s->free_byte_offset = 0; -        if ((offset & (s->cluster_size - 1)) != 0) -            update_cluster_refcount(bs, offset >> s->cluster_bits, 1, -                                    QCOW2_DISCARD_NEVER); -    } else { -        offset = qcow2_alloc_clusters(bs, s->cluster_size); -        if (offset < 0) { -            return offset; -        } -        cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1); -        if ((cluster_offset + s->cluster_size) == offset) { -            /* we are lucky: contiguous data */ -            offset = s->free_byte_offset; -            update_cluster_refcount(bs, offset >> s->cluster_bits, 1, -                                    QCOW2_DISCARD_NEVER); -            s->free_byte_offset += size; -        } else { -            s->free_byte_offset = offset; -            goto redo; -        } -    } - -    /* The cluster refcount was incremented, either by qcow2_alloc_clusters() -     * or explicitly by update_cluster_refcount().  Refcount blocks must be -     * flushed before the caller's L2 table updates. -     */ -    qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); -    return offset; -} - -void qcow2_free_clusters(BlockDriverState *bs, -                          int64_t offset, int64_t size, -                          enum qcow2_discard_type type) -{ -    int ret; - -    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE); -    ret = update_refcount(bs, offset, size, -1, type); -    if (ret < 0) { -        fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret)); -        /* TODO Remember the clusters to free them later and avoid leaking */ -    } -} - -/* - * Free a cluster using its L2 entry (handles clusters of all types, e.g. - * normal cluster, compressed cluster, etc.) - */ -void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, -                             int nb_clusters, enum qcow2_discard_type type) -{ -    BDRVQcowState *s = bs->opaque; - -    switch (qcow2_get_cluster_type(l2_entry)) { -    case QCOW2_CLUSTER_COMPRESSED: -        { -            int nb_csectors; -            nb_csectors = ((l2_entry >> s->csize_shift) & -                           s->csize_mask) + 1; -            qcow2_free_clusters(bs, -                (l2_entry & s->cluster_offset_mask) & ~511, -                nb_csectors * 512, type); -        } -        break; -    case QCOW2_CLUSTER_NORMAL: -        qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, -                            nb_clusters << s->cluster_bits, type); -        break; -    case QCOW2_CLUSTER_UNALLOCATED: -    case QCOW2_CLUSTER_ZERO: -        break; -    default: -        abort(); -    } -} - - - -/*********************************************************/ -/* snapshots and image creation */ - - - -/* update the refcounts of snapshots and the copied flag */ -int qcow2_update_snapshot_refcount(BlockDriverState *bs, -    int64_t l1_table_offset, int l1_size, int addend) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated; -    int64_t old_offset, old_l2_offset; -    int i, j, l1_modified = 0, nb_csectors, refcount; -    int ret; - -    l2_table = NULL; -    l1_table = NULL; -    l1_size2 = l1_size * sizeof(uint64_t); - -    s->cache_discards = true; - -    /* WARNING: qcow2_snapshot_goto relies on this function not using the -     * l1_table_offset when it is the current s->l1_table_offset! Be careful -     * when changing this! */ -    if (l1_table_offset != s->l1_table_offset) { -        l1_table = g_malloc0(align_offset(l1_size2, 512)); -        l1_allocated = 1; - -        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); -        if (ret < 0) { -            goto fail; -        } - -        for(i = 0;i < l1_size; i++) -            be64_to_cpus(&l1_table[i]); -    } else { -        assert(l1_size == s->l1_size); -        l1_table = s->l1_table; -        l1_allocated = 0; -    } - -    for(i = 0; i < l1_size; i++) { -        l2_offset = l1_table[i]; -        if (l2_offset) { -            old_l2_offset = l2_offset; -            l2_offset &= L1E_OFFSET_MASK; - -            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, -                (void**) &l2_table); -            if (ret < 0) { -                goto fail; -            } - -            for(j = 0; j < s->l2_size; j++) { -                offset = be64_to_cpu(l2_table[j]); -                if (offset != 0) { -                    old_offset = offset; -                    offset &= ~QCOW_OFLAG_COPIED; -                    if (offset & QCOW_OFLAG_COMPRESSED) { -                        nb_csectors = ((offset >> s->csize_shift) & -                                       s->csize_mask) + 1; -                        if (addend != 0) { -                            int ret; -                            ret = update_refcount(bs, -                                (offset & s->cluster_offset_mask) & ~511, -                                nb_csectors * 512, addend, -                                QCOW2_DISCARD_SNAPSHOT); -                            if (ret < 0) { -                                goto fail; -                            } -                        } -                        /* compressed clusters are never modified */ -                        refcount = 2; -                    } else { -                        uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits; -                        if (addend != 0) { -                            refcount = update_cluster_refcount(bs, cluster_index, addend, -                                                               QCOW2_DISCARD_SNAPSHOT); -                        } else { -                            refcount = get_refcount(bs, cluster_index); -                        } - -                        if (refcount < 0) { -                            ret = refcount; -                            goto fail; -                        } -                    } - -                    if (refcount == 1) { -                        offset |= QCOW_OFLAG_COPIED; -                    } -                    if (offset != old_offset) { -                        if (addend > 0) { -                            qcow2_cache_set_dependency(bs, s->l2_table_cache, -                                s->refcount_block_cache); -                        } -                        l2_table[j] = cpu_to_be64(offset); -                        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); -                    } -                } -            } - -            ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -            if (ret < 0) { -                goto fail; -            } - - -            if (addend != 0) { -                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend, -                                                   QCOW2_DISCARD_SNAPSHOT); -            } else { -                refcount = get_refcount(bs, l2_offset >> s->cluster_bits); -            } -            if (refcount < 0) { -                ret = refcount; -                goto fail; -            } else if (refcount == 1) { -                l2_offset |= QCOW_OFLAG_COPIED; -            } -            if (l2_offset != old_l2_offset) { -                l1_table[i] = l2_offset; -                l1_modified = 1; -            } -        } -    } - -    ret = bdrv_flush(bs); -fail: -    if (l2_table) { -        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -    } - -    s->cache_discards = false; -    qcow2_process_discards(bs, ret); - -    /* Update L1 only if it isn't deleted anyway (addend = -1) */ -    if (ret == 0 && addend >= 0 && l1_modified) { -        for (i = 0; i < l1_size; i++) { -            cpu_to_be64s(&l1_table[i]); -        } - -        ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2); - -        for (i = 0; i < l1_size; i++) { -            be64_to_cpus(&l1_table[i]); -        } -    } -    if (l1_allocated) -        g_free(l1_table); -    return ret; -} - - - - -/*********************************************************/ -/* refcount checking functions */ - - - -/* - * Increases the refcount for a range of clusters in a given refcount table. - * This is used to construct a temporary refcount table out of L1 and L2 tables - * which can be compared the the refcount table saved in the image. - * - * Modifies the number of errors in res. - */ -static void inc_refcounts(BlockDriverState *bs, -                          BdrvCheckResult *res, -                          uint16_t *refcount_table, -                          int refcount_table_size, -                          int64_t offset, int64_t size) -{ -    BDRVQcowState *s = bs->opaque; -    int64_t start, last, cluster_offset; -    int k; - -    if (size <= 0) -        return; - -    start = offset & ~(s->cluster_size - 1); -    last = (offset + size - 1) & ~(s->cluster_size - 1); -    for(cluster_offset = start; cluster_offset <= last; -        cluster_offset += s->cluster_size) { -        k = cluster_offset >> s->cluster_bits; -        if (k < 0) { -            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n", -                cluster_offset); -            res->corruptions++; -        } else if (k >= refcount_table_size) { -            fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after " -                "the end of the image file, can't properly check refcounts.\n", -                cluster_offset); -            res->check_errors++; -        } else { -            if (++refcount_table[k] == 0) { -                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 -                    "\n", cluster_offset); -                res->corruptions++; -            } -        } -    } -} - -/* Flags for check_refcounts_l1() and check_refcounts_l2() */ -enum { -    CHECK_OFLAG_COPIED = 0x1,   /* check QCOW_OFLAG_COPIED matches refcount */ -    CHECK_FRAG_INFO = 0x2,      /* update BlockFragInfo counters */ -}; - -/* - * Increases the refcount in the given refcount table for the all clusters - * referenced in the L2 table. While doing so, performs some checks on L2 - * entries. - * - * Returns the number of errors found by the checks or -errno if an internal - * error occurred. - */ -static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, -    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset, -    int flags) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t *l2_table, l2_entry; -    uint64_t next_contiguous_offset = 0; -    int i, l2_size, nb_csectors, refcount; - -    /* Read L2 table from disk */ -    l2_size = s->l2_size * sizeof(uint64_t); -    l2_table = g_malloc(l2_size); - -    if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size) -        goto fail; - -    /* Do the actual checks */ -    for(i = 0; i < s->l2_size; i++) { -        l2_entry = be64_to_cpu(l2_table[i]); - -        switch (qcow2_get_cluster_type(l2_entry)) { -        case QCOW2_CLUSTER_COMPRESSED: -            /* Compressed clusters don't have QCOW_OFLAG_COPIED */ -            if (l2_entry & QCOW_OFLAG_COPIED) { -                fprintf(stderr, "ERROR: cluster %" PRId64 ": " -                    "copied flag must never be set for compressed " -                    "clusters\n", l2_entry >> s->cluster_bits); -                l2_entry &= ~QCOW_OFLAG_COPIED; -                res->corruptions++; -            } - -            /* Mark cluster as used */ -            nb_csectors = ((l2_entry >> s->csize_shift) & -                           s->csize_mask) + 1; -            l2_entry &= s->cluster_offset_mask; -            inc_refcounts(bs, res, refcount_table, refcount_table_size, -                l2_entry & ~511, nb_csectors * 512); - -            if (flags & CHECK_FRAG_INFO) { -                res->bfi.allocated_clusters++; -                res->bfi.compressed_clusters++; - -                /* Compressed clusters are fragmented by nature.  Since they -                 * take up sub-sector space but we only have sector granularity -                 * I/O we need to re-read the same sectors even for adjacent -                 * compressed clusters. -                 */ -                res->bfi.fragmented_clusters++; -            } -            break; - -        case QCOW2_CLUSTER_ZERO: -            if ((l2_entry & L2E_OFFSET_MASK) == 0) { -                break; -            } -            /* fall through */ - -        case QCOW2_CLUSTER_NORMAL: -        { -            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ -            uint64_t offset = l2_entry & L2E_OFFSET_MASK; - -            if (flags & CHECK_OFLAG_COPIED) { -                refcount = get_refcount(bs, offset >> s->cluster_bits); -                if (refcount < 0) { -                    fprintf(stderr, "Can't get refcount for offset %" -                        PRIx64 ": %s\n", l2_entry, strerror(-refcount)); -                    goto fail; -                } -                if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { -                    fprintf(stderr, "ERROR OFLAG_COPIED: offset=%" -                        PRIx64 " refcount=%d\n", l2_entry, refcount); -                    res->corruptions++; -                } -            } - -            if (flags & CHECK_FRAG_INFO) { -                res->bfi.allocated_clusters++; -                if (next_contiguous_offset && -                    offset != next_contiguous_offset) { -                    res->bfi.fragmented_clusters++; -                } -                next_contiguous_offset = offset + s->cluster_size; -            } - -            /* Mark cluster as used */ -            inc_refcounts(bs, res, refcount_table,refcount_table_size, -                offset, s->cluster_size); - -            /* Correct offsets are cluster aligned */ -            if (offset & (s->cluster_size - 1)) { -                fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not " -                    "properly aligned; L2 entry corrupted.\n", offset); -                res->corruptions++; -            } -            break; -        } - -        case QCOW2_CLUSTER_UNALLOCATED: -            break; - -        default: -            abort(); -        } -    } - -    g_free(l2_table); -    return 0; - -fail: -    fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n"); -    g_free(l2_table); -    return -EIO; -} - -/* - * Increases the refcount for the L1 table, its L2 tables and all referenced - * clusters in the given refcount table. While doing so, performs some checks - * on L1 and L2 entries. - * - * Returns the number of errors found by the checks or -errno if an internal - * error occurred. - */ -static int check_refcounts_l1(BlockDriverState *bs, -                              BdrvCheckResult *res, -                              uint16_t *refcount_table, -                              int refcount_table_size, -                              int64_t l1_table_offset, int l1_size, -                              int flags) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t *l1_table, l2_offset, l1_size2; -    int i, refcount, ret; - -    l1_size2 = l1_size * sizeof(uint64_t); - -    /* Mark L1 table as used */ -    inc_refcounts(bs, res, refcount_table, refcount_table_size, -        l1_table_offset, l1_size2); - -    /* Read L1 table entries from disk */ -    if (l1_size2 == 0) { -        l1_table = NULL; -    } else { -        l1_table = g_malloc(l1_size2); -        if (bdrv_pread(bs->file, l1_table_offset, -                       l1_table, l1_size2) != l1_size2) -            goto fail; -        for(i = 0;i < l1_size; i++) -            be64_to_cpus(&l1_table[i]); -    } - -    /* Do the actual checks */ -    for(i = 0; i < l1_size; i++) { -        l2_offset = l1_table[i]; -        if (l2_offset) { -            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ -            if (flags & CHECK_OFLAG_COPIED) { -                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) -                    >> s->cluster_bits); -                if (refcount < 0) { -                    fprintf(stderr, "Can't get refcount for l2_offset %" -                        PRIx64 ": %s\n", l2_offset, strerror(-refcount)); -                    goto fail; -                } -                if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) { -                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64 -                        " refcount=%d\n", l2_offset, refcount); -                    res->corruptions++; -                } -            } - -            /* Mark L2 table as used */ -            l2_offset &= L1E_OFFSET_MASK; -            inc_refcounts(bs, res, refcount_table, refcount_table_size, -                l2_offset, s->cluster_size); - -            /* L2 tables are cluster aligned */ -            if (l2_offset & (s->cluster_size - 1)) { -                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not " -                    "cluster aligned; L1 entry corrupted\n", l2_offset); -                res->corruptions++; -            } - -            /* Process and check L2 entries */ -            ret = check_refcounts_l2(bs, res, refcount_table, -                                     refcount_table_size, l2_offset, flags); -            if (ret < 0) { -                goto fail; -            } -        } -    } -    g_free(l1_table); -    return 0; - -fail: -    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); -    res->check_errors++; -    g_free(l1_table); -    return -EIO; -} - -/* - * Checks an image for refcount consistency. - * - * Returns 0 if no errors are found, the number of errors in case the image is - * detected as corrupted, and -errno when an internal error occurred. - */ -int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, -                          BdrvCheckMode fix) -{ -    BDRVQcowState *s = bs->opaque; -    int64_t size, i, highest_cluster; -    int nb_clusters, refcount1, refcount2; -    QCowSnapshot *sn; -    uint16_t *refcount_table; -    int ret; - -    size = bdrv_getlength(bs->file); -    nb_clusters = size_to_clusters(s, size); -    refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t)); - -    res->bfi.total_clusters = -        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE); - -    /* header */ -    inc_refcounts(bs, res, refcount_table, nb_clusters, -        0, s->cluster_size); - -    /* current L1 table */ -    ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, -                             s->l1_table_offset, s->l1_size, -                             CHECK_OFLAG_COPIED | CHECK_FRAG_INFO); -    if (ret < 0) { -        goto fail; -    } - -    /* snapshots */ -    for(i = 0; i < s->nb_snapshots; i++) { -        sn = s->snapshots + i; -        ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, -            sn->l1_table_offset, sn->l1_size, 0); -        if (ret < 0) { -            goto fail; -        } -    } -    inc_refcounts(bs, res, refcount_table, nb_clusters, -        s->snapshots_offset, s->snapshots_size); - -    /* refcount data */ -    inc_refcounts(bs, res, refcount_table, nb_clusters, -        s->refcount_table_offset, -        s->refcount_table_size * sizeof(uint64_t)); - -    for(i = 0; i < s->refcount_table_size; i++) { -        uint64_t offset, cluster; -        offset = s->refcount_table[i]; -        cluster = offset >> s->cluster_bits; - -        /* Refcount blocks are cluster aligned */ -        if (offset & (s->cluster_size - 1)) { -            fprintf(stderr, "ERROR refcount block %" PRId64 " is not " -                "cluster aligned; refcount table entry corrupted\n", i); -            res->corruptions++; -            continue; -        } - -        if (cluster >= nb_clusters) { -            fprintf(stderr, "ERROR refcount block %" PRId64 -                    " is outside image\n", i); -            res->corruptions++; -            continue; -        } - -        if (offset != 0) { -            inc_refcounts(bs, res, refcount_table, nb_clusters, -                offset, s->cluster_size); -            if (refcount_table[cluster] != 1) { -                fprintf(stderr, "ERROR refcount block %" PRId64 -                    " refcount=%d\n", -                    i, refcount_table[cluster]); -                res->corruptions++; -            } -        } -    } - -    /* compare ref counts */ -    for (i = 0, highest_cluster = 0; i < nb_clusters; i++) { -        refcount1 = get_refcount(bs, i); -        if (refcount1 < 0) { -            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", -                i, strerror(-refcount1)); -            res->check_errors++; -            continue; -        } - -        refcount2 = refcount_table[i]; - -        if (refcount1 > 0 || refcount2 > 0) { -            highest_cluster = i; -        } - -        if (refcount1 != refcount2) { - -            /* Check if we're allowed to fix the mismatch */ -            int *num_fixed = NULL; -            if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) { -                num_fixed = &res->leaks_fixed; -            } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) { -                num_fixed = &res->corruptions_fixed; -            } - -            fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n", -                   num_fixed != NULL     ? "Repairing" : -                   refcount1 < refcount2 ? "ERROR" : -                                           "Leaked", -                   i, refcount1, refcount2); - -            if (num_fixed) { -                ret = update_refcount(bs, i << s->cluster_bits, 1, -                                      refcount2 - refcount1, -                                      QCOW2_DISCARD_ALWAYS); -                if (ret >= 0) { -                    (*num_fixed)++; -                    continue; -                } -            } - -            /* And if we couldn't, print an error */ -            if (refcount1 < refcount2) { -                res->corruptions++; -            } else { -                res->leaks++; -            } -        } -    } - -    res->image_end_offset = (highest_cluster + 1) * s->cluster_size; -    ret = 0; - -fail: -    g_free(refcount_table); - -    return ret; -} - diff --git a/contrib/qemu/block/qcow2-snapshot.c b/contrib/qemu/block/qcow2-snapshot.c deleted file mode 100644 index 0caac9055f8..00000000000 --- a/contrib/qemu/block/qcow2-snapshot.c +++ /dev/null @@ -1,660 +0,0 @@ -/* - * Block driver for the QCOW version 2 format - * - * Copyright (c) 2004-2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu-common.h" -#include "block/block_int.h" -#include "block/qcow2.h" - -typedef struct QEMU_PACKED QCowSnapshotHeader { -    /* header is 8 byte aligned */ -    uint64_t l1_table_offset; - -    uint32_t l1_size; -    uint16_t id_str_size; -    uint16_t name_size; - -    uint32_t date_sec; -    uint32_t date_nsec; - -    uint64_t vm_clock_nsec; - -    uint32_t vm_state_size; -    uint32_t extra_data_size; /* for extension */ -    /* extra data follows */ -    /* id_str follows */ -    /* name follows  */ -} QCowSnapshotHeader; - -typedef struct QEMU_PACKED QCowSnapshotExtraData { -    uint64_t vm_state_size_large; -    uint64_t disk_size; -} QCowSnapshotExtraData; - -void qcow2_free_snapshots(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    int i; - -    for(i = 0; i < s->nb_snapshots; i++) { -        g_free(s->snapshots[i].name); -        g_free(s->snapshots[i].id_str); -    } -    g_free(s->snapshots); -    s->snapshots = NULL; -    s->nb_snapshots = 0; -} - -int qcow2_read_snapshots(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    QCowSnapshotHeader h; -    QCowSnapshotExtraData extra; -    QCowSnapshot *sn; -    int i, id_str_size, name_size; -    int64_t offset; -    uint32_t extra_data_size; -    int ret; - -    if (!s->nb_snapshots) { -        s->snapshots = NULL; -        s->snapshots_size = 0; -        return 0; -    } - -    offset = s->snapshots_offset; -    s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot)); - -    for(i = 0; i < s->nb_snapshots; i++) { -        /* Read statically sized part of the snapshot header */ -        offset = align_offset(offset, 8); -        ret = bdrv_pread(bs->file, offset, &h, sizeof(h)); -        if (ret < 0) { -            goto fail; -        } - -        offset += sizeof(h); -        sn = s->snapshots + i; -        sn->l1_table_offset = be64_to_cpu(h.l1_table_offset); -        sn->l1_size = be32_to_cpu(h.l1_size); -        sn->vm_state_size = be32_to_cpu(h.vm_state_size); -        sn->date_sec = be32_to_cpu(h.date_sec); -        sn->date_nsec = be32_to_cpu(h.date_nsec); -        sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec); -        extra_data_size = be32_to_cpu(h.extra_data_size); - -        id_str_size = be16_to_cpu(h.id_str_size); -        name_size = be16_to_cpu(h.name_size); - -        /* Read extra data */ -        ret = bdrv_pread(bs->file, offset, &extra, -                         MIN(sizeof(extra), extra_data_size)); -        if (ret < 0) { -            goto fail; -        } -        offset += extra_data_size; - -        if (extra_data_size >= 8) { -            sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large); -        } - -        if (extra_data_size >= 16) { -            sn->disk_size = be64_to_cpu(extra.disk_size); -        } else { -            sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; -        } - -        /* Read snapshot ID */ -        sn->id_str = g_malloc(id_str_size + 1); -        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size); -        if (ret < 0) { -            goto fail; -        } -        offset += id_str_size; -        sn->id_str[id_str_size] = '\0'; - -        /* Read snapshot name */ -        sn->name = g_malloc(name_size + 1); -        ret = bdrv_pread(bs->file, offset, sn->name, name_size); -        if (ret < 0) { -            goto fail; -        } -        offset += name_size; -        sn->name[name_size] = '\0'; -    } - -    s->snapshots_size = offset - s->snapshots_offset; -    return 0; - -fail: -    qcow2_free_snapshots(bs); -    return ret; -} - -/* add at the end of the file a new list of snapshots */ -static int qcow2_write_snapshots(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    QCowSnapshot *sn; -    QCowSnapshotHeader h; -    QCowSnapshotExtraData extra; -    int i, name_size, id_str_size, snapshots_size; -    struct { -        uint32_t nb_snapshots; -        uint64_t snapshots_offset; -    } QEMU_PACKED header_data; -    int64_t offset, snapshots_offset; -    int ret; - -    /* compute the size of the snapshots */ -    offset = 0; -    for(i = 0; i < s->nb_snapshots; i++) { -        sn = s->snapshots + i; -        offset = align_offset(offset, 8); -        offset += sizeof(h); -        offset += sizeof(extra); -        offset += strlen(sn->id_str); -        offset += strlen(sn->name); -    } -    snapshots_size = offset; - -    /* Allocate space for the new snapshot list */ -    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size); -    offset = snapshots_offset; -    if (offset < 0) { -        return offset; -    } -    ret = bdrv_flush(bs); -    if (ret < 0) { -        return ret; -    } - -    /* Write all snapshots to the new list */ -    for(i = 0; i < s->nb_snapshots; i++) { -        sn = s->snapshots + i; -        memset(&h, 0, sizeof(h)); -        h.l1_table_offset = cpu_to_be64(sn->l1_table_offset); -        h.l1_size = cpu_to_be32(sn->l1_size); -        /* If it doesn't fit in 32 bit, older implementations should treat it -         * as a disk-only snapshot rather than truncate the VM state */ -        if (sn->vm_state_size <= 0xffffffff) { -            h.vm_state_size = cpu_to_be32(sn->vm_state_size); -        } -        h.date_sec = cpu_to_be32(sn->date_sec); -        h.date_nsec = cpu_to_be32(sn->date_nsec); -        h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec); -        h.extra_data_size = cpu_to_be32(sizeof(extra)); - -        memset(&extra, 0, sizeof(extra)); -        extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size); -        extra.disk_size = cpu_to_be64(sn->disk_size); - -        id_str_size = strlen(sn->id_str); -        name_size = strlen(sn->name); -        h.id_str_size = cpu_to_be16(id_str_size); -        h.name_size = cpu_to_be16(name_size); -        offset = align_offset(offset, 8); - -        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h)); -        if (ret < 0) { -            goto fail; -        } -        offset += sizeof(h); - -        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra)); -        if (ret < 0) { -            goto fail; -        } -        offset += sizeof(extra); - -        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size); -        if (ret < 0) { -            goto fail; -        } -        offset += id_str_size; - -        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size); -        if (ret < 0) { -            goto fail; -        } -        offset += name_size; -    } - -    /* -     * Update the header to point to the new snapshot table. This requires the -     * new table and its refcounts to be stable on disk. -     */ -    ret = bdrv_flush(bs); -    if (ret < 0) { -        goto fail; -    } - -    QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) != -        offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots)); - -    header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots); -    header_data.snapshots_offset    = cpu_to_be64(snapshots_offset); - -    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), -                           &header_data, sizeof(header_data)); -    if (ret < 0) { -        goto fail; -    } - -    /* free the old snapshot table */ -    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size, -                        QCOW2_DISCARD_SNAPSHOT); -    s->snapshots_offset = snapshots_offset; -    s->snapshots_size = snapshots_size; -    return 0; - -fail: -    return ret; -} - -static void find_new_snapshot_id(BlockDriverState *bs, -                                 char *id_str, int id_str_size) -{ -    BDRVQcowState *s = bs->opaque; -    QCowSnapshot *sn; -    int i, id, id_max = 0; - -    for(i = 0; i < s->nb_snapshots; i++) { -        sn = s->snapshots + i; -        id = strtoul(sn->id_str, NULL, 10); -        if (id > id_max) -            id_max = id; -    } -    snprintf(id_str, id_str_size, "%d", id_max + 1); -} - -static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str) -{ -    BDRVQcowState *s = bs->opaque; -    int i; - -    for(i = 0; i < s->nb_snapshots; i++) { -        if (!strcmp(s->snapshots[i].id_str, id_str)) -            return i; -    } -    return -1; -} - -static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name) -{ -    BDRVQcowState *s = bs->opaque; -    int i, ret; - -    ret = find_snapshot_by_id(bs, name); -    if (ret >= 0) -        return ret; -    for(i = 0; i < s->nb_snapshots; i++) { -        if (!strcmp(s->snapshots[i].name, name)) -            return i; -    } -    return -1; -} - -/* if no id is provided, a new one is constructed */ -int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) -{ -    BDRVQcowState *s = bs->opaque; -    QCowSnapshot *new_snapshot_list = NULL; -    QCowSnapshot *old_snapshot_list = NULL; -    QCowSnapshot sn1, *sn = &sn1; -    int i, ret; -    uint64_t *l1_table = NULL; -    int64_t l1_table_offset; - -    memset(sn, 0, sizeof(*sn)); - -    /* Generate an ID if it wasn't passed */ -    if (sn_info->id_str[0] == '\0') { -        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str)); -    } - -    /* Check that the ID is unique */ -    if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) { -        return -EEXIST; -    } - -    /* Populate sn with passed data */ -    sn->id_str = g_strdup(sn_info->id_str); -    sn->name = g_strdup(sn_info->name); - -    sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; -    sn->vm_state_size = sn_info->vm_state_size; -    sn->date_sec = sn_info->date_sec; -    sn->date_nsec = sn_info->date_nsec; -    sn->vm_clock_nsec = sn_info->vm_clock_nsec; - -    /* Allocate the L1 table of the snapshot and copy the current one there. */ -    l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); -    if (l1_table_offset < 0) { -        ret = l1_table_offset; -        goto fail; -    } - -    sn->l1_table_offset = l1_table_offset; -    sn->l1_size = s->l1_size; - -    l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); -    for(i = 0; i < s->l1_size; i++) { -        l1_table[i] = cpu_to_be64(s->l1_table[i]); -    } - -    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table, -                      s->l1_size * sizeof(uint64_t)); -    if (ret < 0) { -        goto fail; -    } - -    g_free(l1_table); -    l1_table = NULL; - -    /* -     * Increase the refcounts of all clusters and make sure everything is -     * stable on disk before updating the snapshot table to contain a pointer -     * to the new L1 table. -     */ -    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); -    if (ret < 0) { -        goto fail; -    } - -    /* Append the new snapshot to the snapshot list */ -    new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot)); -    if (s->snapshots) { -        memcpy(new_snapshot_list, s->snapshots, -               s->nb_snapshots * sizeof(QCowSnapshot)); -        old_snapshot_list = s->snapshots; -    } -    s->snapshots = new_snapshot_list; -    s->snapshots[s->nb_snapshots++] = *sn; - -    ret = qcow2_write_snapshots(bs); -    if (ret < 0) { -        g_free(s->snapshots); -        s->snapshots = old_snapshot_list; -        goto fail; -    } - -    g_free(old_snapshot_list); - -#ifdef DEBUG_ALLOC -    { -      BdrvCheckResult result = {0}; -      qcow2_check_refcounts(bs, &result, 0); -    } -#endif -    return 0; - -fail: -    g_free(sn->id_str); -    g_free(sn->name); -    g_free(l1_table); - -    return ret; -} - -/* copy the snapshot 'snapshot_name' into the current disk image */ -int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) -{ -    BDRVQcowState *s = bs->opaque; -    QCowSnapshot *sn; -    int i, snapshot_index; -    int cur_l1_bytes, sn_l1_bytes; -    int ret; -    uint64_t *sn_l1_table = NULL; - -    /* Search the snapshot */ -    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); -    if (snapshot_index < 0) { -        return -ENOENT; -    } -    sn = &s->snapshots[snapshot_index]; - -    if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) { -        error_report("qcow2: Loading snapshots with different disk " -            "size is not implemented"); -        ret = -ENOTSUP; -        goto fail; -    } - -    /* -     * Make sure that the current L1 table is big enough to contain the whole -     * L1 table of the snapshot. If the snapshot L1 table is smaller, the -     * current one must be padded with zeros. -     */ -    ret = qcow2_grow_l1_table(bs, sn->l1_size, true); -    if (ret < 0) { -        goto fail; -    } - -    cur_l1_bytes = s->l1_size * sizeof(uint64_t); -    sn_l1_bytes = sn->l1_size * sizeof(uint64_t); - -    /* -     * Copy the snapshot L1 table to the current L1 table. -     * -     * Before overwriting the old current L1 table on disk, make sure to -     * increase all refcounts for the clusters referenced by the new one. -     * Decrease the refcount referenced by the old one only when the L1 -     * table is overwritten. -     */ -    sn_l1_table = g_malloc0(cur_l1_bytes); - -    ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes); -    if (ret < 0) { -        goto fail; -    } - -    ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, -                                         sn->l1_size, 1); -    if (ret < 0) { -        goto fail; -    } - -    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table, -                           cur_l1_bytes); -    if (ret < 0) { -        goto fail; -    } - -    /* -     * Decrease refcount of clusters of current L1 table. -     * -     * At this point, the in-memory s->l1_table points to the old L1 table, -     * whereas on disk we already have the new one. -     * -     * qcow2_update_snapshot_refcount special cases the current L1 table to use -     * the in-memory data instead of really using the offset to load a new one, -     * which is why this works. -     */ -    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, -                                         s->l1_size, -1); - -    /* -     * Now update the in-memory L1 table to be in sync with the on-disk one. We -     * need to do this even if updating refcounts failed. -     */ -    for(i = 0;i < s->l1_size; i++) { -        s->l1_table[i] = be64_to_cpu(sn_l1_table[i]); -    } - -    if (ret < 0) { -        goto fail; -    } - -    g_free(sn_l1_table); -    sn_l1_table = NULL; - -    /* -     * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed -     * when we decreased the refcount of the old snapshot. -     */ -    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); -    if (ret < 0) { -        goto fail; -    } - -#ifdef DEBUG_ALLOC -    { -        BdrvCheckResult result = {0}; -        qcow2_check_refcounts(bs, &result, 0); -    } -#endif -    return 0; - -fail: -    g_free(sn_l1_table); -    return ret; -} - -int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) -{ -    BDRVQcowState *s = bs->opaque; -    QCowSnapshot sn; -    int snapshot_index, ret; - -    /* Search the snapshot */ -    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); -    if (snapshot_index < 0) { -        return -ENOENT; -    } -    sn = s->snapshots[snapshot_index]; - -    /* Remove it from the snapshot list */ -    memmove(s->snapshots + snapshot_index, -            s->snapshots + snapshot_index + 1, -            (s->nb_snapshots - snapshot_index - 1) * sizeof(sn)); -    s->nb_snapshots--; -    ret = qcow2_write_snapshots(bs); -    if (ret < 0) { -        return ret; -    } - -    /* -     * The snapshot is now unused, clean up. If we fail after this point, we -     * won't recover but just leak clusters. -     */ -    g_free(sn.id_str); -    g_free(sn.name); - -    /* -     * Now decrease the refcounts of clusters referenced by the snapshot and -     * free the L1 table. -     */ -    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset, -                                         sn.l1_size, -1); -    if (ret < 0) { -        return ret; -    } -    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t), -                        QCOW2_DISCARD_SNAPSHOT); - -    /* must update the copied flag on the current cluster offsets */ -    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); -    if (ret < 0) { -        return ret; -    } - -#ifdef DEBUG_ALLOC -    { -        BdrvCheckResult result = {0}; -        qcow2_check_refcounts(bs, &result, 0); -    } -#endif -    return 0; -} - -int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) -{ -    BDRVQcowState *s = bs->opaque; -    QEMUSnapshotInfo *sn_tab, *sn_info; -    QCowSnapshot *sn; -    int i; - -    if (!s->nb_snapshots) { -        *psn_tab = NULL; -        return s->nb_snapshots; -    } - -    sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo)); -    for(i = 0; i < s->nb_snapshots; i++) { -        sn_info = sn_tab + i; -        sn = s->snapshots + i; -        pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), -                sn->id_str); -        pstrcpy(sn_info->name, sizeof(sn_info->name), -                sn->name); -        sn_info->vm_state_size = sn->vm_state_size; -        sn_info->date_sec = sn->date_sec; -        sn_info->date_nsec = sn->date_nsec; -        sn_info->vm_clock_nsec = sn->vm_clock_nsec; -    } -    *psn_tab = sn_tab; -    return s->nb_snapshots; -} - -int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) -{ -    int i, snapshot_index; -    BDRVQcowState *s = bs->opaque; -    QCowSnapshot *sn; -    uint64_t *new_l1_table; -    int new_l1_bytes; -    int ret; - -    assert(bs->read_only); - -    /* Search the snapshot */ -    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name); -    if (snapshot_index < 0) { -        return -ENOENT; -    } -    sn = &s->snapshots[snapshot_index]; - -    /* Allocate and read in the snapshot's L1 table */ -    new_l1_bytes = s->l1_size * sizeof(uint64_t); -    new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512)); - -    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes); -    if (ret < 0) { -        g_free(new_l1_table); -        return ret; -    } - -    /* Switch the L1 table */ -    g_free(s->l1_table); - -    s->l1_size = sn->l1_size; -    s->l1_table_offset = sn->l1_table_offset; -    s->l1_table = new_l1_table; - -    for(i = 0;i < s->l1_size; i++) { -        be64_to_cpus(&s->l1_table[i]); -    } - -    return 0; -} diff --git a/contrib/qemu/block/qcow2.c b/contrib/qemu/block/qcow2.c deleted file mode 100644 index 0eceefe2cd9..00000000000 --- a/contrib/qemu/block/qcow2.c +++ /dev/null @@ -1,1825 +0,0 @@ -/* - * Block driver for the QCOW version 2 format - * - * Copyright (c) 2004-2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block/block_int.h" -#include "qemu/module.h" -#include <zlib.h> -#include "qemu/aes.h" -#include "block/qcow2.h" -#include "qemu/error-report.h" -#include "qapi/qmp/qerror.h" -#include "qapi/qmp/qbool.h" -#include "trace.h" - -/* -  Differences with QCOW: - -  - Support for multiple incremental snapshots. -  - Memory management by reference counts. -  - Clusters which have a reference count of one have the bit -    QCOW_OFLAG_COPIED to optimize write performance. -  - Size of compressed clusters is stored in sectors to reduce bit usage -    in the cluster offsets. -  - Support for storing additional data (such as the VM state) in the -    snapshots. -  - If a backing store is used, the cluster size is not constrained -    (could be backported to QCOW). -  - L2 tables have always a size of one cluster. -*/ - - -typedef struct { -    uint32_t magic; -    uint32_t len; -} QCowExtension; - -#define  QCOW2_EXT_MAGIC_END 0 -#define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA -#define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 - -static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) -{ -    const QCowHeader *cow_header = (const void *)buf; - -    if (buf_size >= sizeof(QCowHeader) && -        be32_to_cpu(cow_header->magic) == QCOW_MAGIC && -        be32_to_cpu(cow_header->version) >= 2) -        return 100; -    else -        return 0; -} - - -/*  - * read qcow2 extension and fill bs - * start reading from start_offset - * finish reading upon magic of value 0 or when end_offset reached - * unknown magic is skipped (future extension this version knows nothing about) - * return 0 upon success, non-0 otherwise - */ -static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, -                                 uint64_t end_offset, void **p_feature_table) -{ -    BDRVQcowState *s = bs->opaque; -    QCowExtension ext; -    uint64_t offset; -    int ret; - -#ifdef DEBUG_EXT -    printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); -#endif -    offset = start_offset; -    while (offset < end_offset) { - -#ifdef DEBUG_EXT -        /* Sanity check */ -        if (offset > s->cluster_size) -            printf("qcow2_read_extension: suspicious offset %lu\n", offset); - -        printf("attempting to read extended header in offset %lu\n", offset); -#endif - -        if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) { -            fprintf(stderr, "qcow2_read_extension: ERROR: " -                    "pread fail from offset %" PRIu64 "\n", -                    offset); -            return 1; -        } -        be32_to_cpus(&ext.magic); -        be32_to_cpus(&ext.len); -        offset += sizeof(ext); -#ifdef DEBUG_EXT -        printf("ext.magic = 0x%x\n", ext.magic); -#endif -        if (ext.len > end_offset - offset) { -            error_report("Header extension too large"); -            return -EINVAL; -        } - -        switch (ext.magic) { -        case QCOW2_EXT_MAGIC_END: -            return 0; - -        case QCOW2_EXT_MAGIC_BACKING_FORMAT: -            if (ext.len >= sizeof(bs->backing_format)) { -                fprintf(stderr, "ERROR: ext_backing_format: len=%u too large" -                        " (>=%zu)\n", -                        ext.len, sizeof(bs->backing_format)); -                return 2; -            } -            if (bdrv_pread(bs->file, offset , bs->backing_format, -                           ext.len) != ext.len) -                return 3; -            bs->backing_format[ext.len] = '\0'; -#ifdef DEBUG_EXT -            printf("Qcow2: Got format extension %s\n", bs->backing_format); -#endif -            break; - -        case QCOW2_EXT_MAGIC_FEATURE_TABLE: -            if (p_feature_table != NULL) { -                void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); -                ret = bdrv_pread(bs->file, offset , feature_table, ext.len); -                if (ret < 0) { -                    return ret; -                } - -                *p_feature_table = feature_table; -            } -            break; - -        default: -            /* unknown magic - save it in case we need to rewrite the header */ -            { -                Qcow2UnknownHeaderExtension *uext; - -                uext = g_malloc0(sizeof(*uext)  + ext.len); -                uext->magic = ext.magic; -                uext->len = ext.len; -                QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); - -                ret = bdrv_pread(bs->file, offset , uext->data, uext->len); -                if (ret < 0) { -                    return ret; -                } -            } -            break; -        } - -        offset += ((ext.len + 7) & ~7); -    } - -    return 0; -} - -static void cleanup_unknown_header_ext(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    Qcow2UnknownHeaderExtension *uext, *next; - -    QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { -        QLIST_REMOVE(uext, next); -        g_free(uext); -    } -} - -static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs, -    const char *fmt, ...) -{ -    char msg[64]; -    va_list ap; - -    va_start(ap, fmt); -    vsnprintf(msg, sizeof(msg), fmt, ap); -    va_end(ap); - -    qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, -        bs->device_name, "qcow2", msg); -} - -static void report_unsupported_feature(BlockDriverState *bs, -    Qcow2Feature *table, uint64_t mask) -{ -    while (table && table->name[0] != '\0') { -        if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { -            if (mask & (1 << table->bit)) { -                report_unsupported(bs, "%.46s",table->name); -                mask &= ~(1 << table->bit); -            } -        } -        table++; -    } - -    if (mask) { -        report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask); -    } -} - -/* - * Sets the dirty bit and flushes afterwards if necessary. - * - * The incompatible_features bit is only set if the image file header was - * updated successfully.  Therefore it is not required to check the return - * value of this function. - */ -int qcow2_mark_dirty(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t val; -    int ret; - -    assert(s->qcow_version >= 3); - -    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { -        return 0; /* already dirty */ -    } - -    val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); -    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), -                      &val, sizeof(val)); -    if (ret < 0) { -        return ret; -    } -    ret = bdrv_flush(bs->file); -    if (ret < 0) { -        return ret; -    } - -    /* Only treat image as dirty if the header was updated successfully */ -    s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; -    return 0; -} - -/* - * Clears the dirty bit and flushes before if necessary.  Only call this - * function when there are no pending requests, it does not guard against - * concurrent requests dirtying the image. - */ -static int qcow2_mark_clean(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; - -    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { -        int ret = bdrv_flush(bs); -        if (ret < 0) { -            return ret; -        } - -        s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; -        return qcow2_update_header(bs); -    } -    return 0; -} - -static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, -                       BdrvCheckMode fix) -{ -    int ret = qcow2_check_refcounts(bs, result, fix); -    if (ret < 0) { -        return ret; -    } - -    if (fix && result->check_errors == 0 && result->corruptions == 0) { -        return qcow2_mark_clean(bs); -    } -    return ret; -} - -static QemuOptsList qcow2_runtime_opts = { -    .name = "qcow2", -    .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), -    .desc = { -        { -            .name = "lazy_refcounts", -            .type = QEMU_OPT_BOOL, -            .help = "Postpone refcount updates", -        }, -        { -            .name = QCOW2_OPT_DISCARD_REQUEST, -            .type = QEMU_OPT_BOOL, -            .help = "Pass guest discard requests to the layer below", -        }, -        { -            .name = QCOW2_OPT_DISCARD_SNAPSHOT, -            .type = QEMU_OPT_BOOL, -            .help = "Generate discard requests when snapshot related space " -                    "is freed", -        }, -        { -            .name = QCOW2_OPT_DISCARD_OTHER, -            .type = QEMU_OPT_BOOL, -            .help = "Generate discard requests when other clusters are freed", -        }, -        { /* end of list */ } -    }, -}; - -static int qcow2_open(BlockDriverState *bs, QDict *options, int flags) -{ -    BDRVQcowState *s = bs->opaque; -    int len, i, ret = 0; -    QCowHeader header; -    QemuOpts *opts; -    Error *local_err = NULL; -    uint64_t ext_end; -    uint64_t l1_vm_state_index; - -    ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); -    if (ret < 0) { -        goto fail; -    } -    be32_to_cpus(&header.magic); -    be32_to_cpus(&header.version); -    be64_to_cpus(&header.backing_file_offset); -    be32_to_cpus(&header.backing_file_size); -    be64_to_cpus(&header.size); -    be32_to_cpus(&header.cluster_bits); -    be32_to_cpus(&header.crypt_method); -    be64_to_cpus(&header.l1_table_offset); -    be32_to_cpus(&header.l1_size); -    be64_to_cpus(&header.refcount_table_offset); -    be32_to_cpus(&header.refcount_table_clusters); -    be64_to_cpus(&header.snapshots_offset); -    be32_to_cpus(&header.nb_snapshots); - -    if (header.magic != QCOW_MAGIC) { -        ret = -EMEDIUMTYPE; -        goto fail; -    } -    if (header.version < 2 || header.version > 3) { -        report_unsupported(bs, "QCOW version %d", header.version); -        ret = -ENOTSUP; -        goto fail; -    } - -    s->qcow_version = header.version; - -    /* Initialise version 3 header fields */ -    if (header.version == 2) { -        header.incompatible_features    = 0; -        header.compatible_features      = 0; -        header.autoclear_features       = 0; -        header.refcount_order           = 4; -        header.header_length            = 72; -    } else { -        be64_to_cpus(&header.incompatible_features); -        be64_to_cpus(&header.compatible_features); -        be64_to_cpus(&header.autoclear_features); -        be32_to_cpus(&header.refcount_order); -        be32_to_cpus(&header.header_length); -    } - -    if (header.header_length > sizeof(header)) { -        s->unknown_header_fields_size = header.header_length - sizeof(header); -        s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); -        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, -                         s->unknown_header_fields_size); -        if (ret < 0) { -            goto fail; -        } -    } - -    if (header.backing_file_offset) { -        ext_end = header.backing_file_offset; -    } else { -        ext_end = 1 << header.cluster_bits; -    } - -    /* Handle feature bits */ -    s->incompatible_features    = header.incompatible_features; -    s->compatible_features      = header.compatible_features; -    s->autoclear_features       = header.autoclear_features; - -    if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { -        void *feature_table = NULL; -        qcow2_read_extensions(bs, header.header_length, ext_end, -                              &feature_table); -        report_unsupported_feature(bs, feature_table, -                                   s->incompatible_features & -                                   ~QCOW2_INCOMPAT_MASK); -        ret = -ENOTSUP; -        goto fail; -    } - -    /* Check support for various header values */ -    if (header.refcount_order != 4) { -        report_unsupported(bs, "%d bit reference counts", -                           1 << header.refcount_order); -        ret = -ENOTSUP; -        goto fail; -    } - -    if (header.cluster_bits < MIN_CLUSTER_BITS || -        header.cluster_bits > MAX_CLUSTER_BITS) { -        ret = -EINVAL; -        goto fail; -    } -    if (header.crypt_method > QCOW_CRYPT_AES) { -        ret = -EINVAL; -        goto fail; -    } -    s->crypt_method_header = header.crypt_method; -    if (s->crypt_method_header) { -        bs->encrypted = 1; -    } -    s->cluster_bits = header.cluster_bits; -    s->cluster_size = 1 << s->cluster_bits; -    s->cluster_sectors = 1 << (s->cluster_bits - 9); -    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ -    s->l2_size = 1 << s->l2_bits; -    bs->total_sectors = header.size / 512; -    s->csize_shift = (62 - (s->cluster_bits - 8)); -    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; -    s->cluster_offset_mask = (1LL << s->csize_shift) - 1; -    s->refcount_table_offset = header.refcount_table_offset; -    s->refcount_table_size = -        header.refcount_table_clusters << (s->cluster_bits - 3); - -    s->snapshots_offset = header.snapshots_offset; -    s->nb_snapshots = header.nb_snapshots; - -    /* read the level 1 table */ -    s->l1_size = header.l1_size; - -    l1_vm_state_index = size_to_l1(s, header.size); -    if (l1_vm_state_index > INT_MAX) { -        ret = -EFBIG; -        goto fail; -    } -    s->l1_vm_state_index = l1_vm_state_index; - -    /* the L1 table must contain at least enough entries to put -       header.size bytes */ -    if (s->l1_size < s->l1_vm_state_index) { -        ret = -EINVAL; -        goto fail; -    } -    s->l1_table_offset = header.l1_table_offset; -    if (s->l1_size > 0) { -        s->l1_table = g_malloc0( -            align_offset(s->l1_size * sizeof(uint64_t), 512)); -        ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, -                         s->l1_size * sizeof(uint64_t)); -        if (ret < 0) { -            goto fail; -        } -        for(i = 0;i < s->l1_size; i++) { -            be64_to_cpus(&s->l1_table[i]); -        } -    } - -    /* alloc L2 table/refcount block cache */ -    s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE); -    s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE); - -    s->cluster_cache = g_malloc(s->cluster_size); -    /* one more sector for decompressed data alignment */ -    s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size -                                  + 512); -    s->cluster_cache_offset = -1; -    s->flags = flags; - -    ret = qcow2_refcount_init(bs); -    if (ret != 0) { -        goto fail; -    } - -    QLIST_INIT(&s->cluster_allocs); -    QTAILQ_INIT(&s->discards); - -    /* read qcow2 extensions */ -    if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) { -        ret = -EINVAL; -        goto fail; -    } - -    /* read the backing file name */ -    if (header.backing_file_offset != 0) { -        len = header.backing_file_size; -        if (len > 1023) { -            len = 1023; -        } -        ret = bdrv_pread(bs->file, header.backing_file_offset, -                         bs->backing_file, len); -        if (ret < 0) { -            goto fail; -        } -        bs->backing_file[len] = '\0'; -    } - -    ret = qcow2_read_snapshots(bs); -    if (ret < 0) { -        goto fail; -    } - -    /* Clear unknown autoclear feature bits */ -    if (!bs->read_only && s->autoclear_features != 0) { -        s->autoclear_features = 0; -        ret = qcow2_update_header(bs); -        if (ret < 0) { -            goto fail; -        } -    } - -    /* Initialise locks */ -    qemu_co_mutex_init(&s->lock); - -    /* Repair image if dirty */ -    if (!(flags & BDRV_O_CHECK) && !bs->read_only && -        (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { -        BdrvCheckResult result = {0}; - -        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS); -        if (ret < 0) { -            goto fail; -        } -    } - -    /* Enable lazy_refcounts according to image and command line options */ -    opts = qemu_opts_create_nofail(&qcow2_runtime_opts); -    qemu_opts_absorb_qdict(opts, options, &local_err); -    if (error_is_set(&local_err)) { -        qerror_report_err(local_err); -        error_free(local_err); -        ret = -EINVAL; -        goto fail; -    } - -    s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, -        (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); - -    s->discard_passthrough[QCOW2_DISCARD_NEVER] = false; -    s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; -    s->discard_passthrough[QCOW2_DISCARD_REQUEST] = -        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, -                          flags & BDRV_O_UNMAP); -    s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = -        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); -    s->discard_passthrough[QCOW2_DISCARD_OTHER] = -        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); - -    qemu_opts_del(opts); - -    if (s->use_lazy_refcounts && s->qcow_version < 3) { -        qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require " -            "a qcow2 image with at least qemu 1.1 compatibility level"); -        ret = -EINVAL; -        goto fail; -    } - -#ifdef DEBUG_ALLOC -    { -        BdrvCheckResult result = {0}; -        qcow2_check_refcounts(bs, &result, 0); -    } -#endif -    return ret; - - fail: -    g_free(s->unknown_header_fields); -    cleanup_unknown_header_ext(bs); -    qcow2_free_snapshots(bs); -    qcow2_refcount_close(bs); -    g_free(s->l1_table); -    if (s->l2_table_cache) { -        qcow2_cache_destroy(bs, s->l2_table_cache); -    } -    g_free(s->cluster_cache); -    qemu_vfree(s->cluster_data); -    return ret; -} - -static int qcow2_set_key(BlockDriverState *bs, const char *key) -{ -    BDRVQcowState *s = bs->opaque; -    uint8_t keybuf[16]; -    int len, i; - -    memset(keybuf, 0, 16); -    len = strlen(key); -    if (len > 16) -        len = 16; -    /* XXX: we could compress the chars to 7 bits to increase -       entropy */ -    for(i = 0;i < len;i++) { -        keybuf[i] = key[i]; -    } -    s->crypt_method = s->crypt_method_header; - -    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) -        return -1; -    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) -        return -1; -#if 0 -    /* test */ -    { -        uint8_t in[16]; -        uint8_t out[16]; -        uint8_t tmp[16]; -        for(i=0;i<16;i++) -            in[i] = i; -        AES_encrypt(in, tmp, &s->aes_encrypt_key); -        AES_decrypt(tmp, out, &s->aes_decrypt_key); -        for(i = 0; i < 16; i++) -            printf(" %02x", tmp[i]); -        printf("\n"); -        for(i = 0; i < 16; i++) -            printf(" %02x", out[i]); -        printf("\n"); -    } -#endif -    return 0; -} - -/* We have nothing to do for QCOW2 reopen, stubs just return - * success */ -static int qcow2_reopen_prepare(BDRVReopenState *state, -                                BlockReopenQueue *queue, Error **errp) -{ -    return 0; -} - -static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs, -        int64_t sector_num, int nb_sectors, int *pnum) -{ -    BDRVQcowState *s = bs->opaque; -    uint64_t cluster_offset; -    int ret; - -    *pnum = nb_sectors; -    /* FIXME We can get errors here, but the bdrv_co_is_allocated interface -     * can't pass them on today */ -    qemu_co_mutex_lock(&s->lock); -    ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); -    qemu_co_mutex_unlock(&s->lock); -    if (ret < 0) { -        *pnum = 0; -    } - -    return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO); -} - -/* handle reading after the end of the backing file */ -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, -                  int64_t sector_num, int nb_sectors) -{ -    int n1; -    if ((sector_num + nb_sectors) <= bs->total_sectors) -        return nb_sectors; -    if (sector_num >= bs->total_sectors) -        n1 = 0; -    else -        n1 = bs->total_sectors - sector_num; - -    qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1)); - -    return n1; -} - -static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, -                          int remaining_sectors, QEMUIOVector *qiov) -{ -    BDRVQcowState *s = bs->opaque; -    int index_in_cluster, n1; -    int ret; -    int cur_nr_sectors; /* number of sectors in current iteration */ -    uint64_t cluster_offset = 0; -    uint64_t bytes_done = 0; -    QEMUIOVector hd_qiov; -    uint8_t *cluster_data = NULL; - -    qemu_iovec_init(&hd_qiov, qiov->niov); - -    qemu_co_mutex_lock(&s->lock); - -    while (remaining_sectors != 0) { - -        /* prepare next request */ -        cur_nr_sectors = remaining_sectors; -        if (s->crypt_method) { -            cur_nr_sectors = MIN(cur_nr_sectors, -                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); -        } - -        ret = qcow2_get_cluster_offset(bs, sector_num << 9, -            &cur_nr_sectors, &cluster_offset); -        if (ret < 0) { -            goto fail; -        } - -        index_in_cluster = sector_num & (s->cluster_sectors - 1); - -        qemu_iovec_reset(&hd_qiov); -        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, -            cur_nr_sectors * 512); - -        switch (ret) { -        case QCOW2_CLUSTER_UNALLOCATED: - -            if (bs->backing_hd) { -                /* read from the base image */ -                n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov, -                    sector_num, cur_nr_sectors); -                if (n1 > 0) { -                    BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); -                    qemu_co_mutex_unlock(&s->lock); -                    ret = bdrv_co_readv(bs->backing_hd, sector_num, -                                        n1, &hd_qiov); -                    qemu_co_mutex_lock(&s->lock); -                    if (ret < 0) { -                        goto fail; -                    } -                } -            } else { -                /* Note: in this case, no need to wait */ -                qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); -            } -            break; - -        case QCOW2_CLUSTER_ZERO: -            qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); -            break; - -        case QCOW2_CLUSTER_COMPRESSED: -            /* add AIO support for compressed blocks ? */ -            ret = qcow2_decompress_cluster(bs, cluster_offset); -            if (ret < 0) { -                goto fail; -            } - -            qemu_iovec_from_buf(&hd_qiov, 0, -                s->cluster_cache + index_in_cluster * 512, -                512 * cur_nr_sectors); -            break; - -        case QCOW2_CLUSTER_NORMAL: -            if ((cluster_offset & 511) != 0) { -                ret = -EIO; -                goto fail; -            } - -            if (s->crypt_method) { -                /* -                 * For encrypted images, read everything into a temporary -                 * contiguous buffer on which the AES functions can work. -                 */ -                if (!cluster_data) { -                    cluster_data = -                        qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); -                } - -                assert(cur_nr_sectors <= -                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); -                qemu_iovec_reset(&hd_qiov); -                qemu_iovec_add(&hd_qiov, cluster_data, -                    512 * cur_nr_sectors); -            } - -            BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); -            qemu_co_mutex_unlock(&s->lock); -            ret = bdrv_co_readv(bs->file, -                                (cluster_offset >> 9) + index_in_cluster, -                                cur_nr_sectors, &hd_qiov); -            qemu_co_mutex_lock(&s->lock); -            if (ret < 0) { -                goto fail; -            } -            if (s->crypt_method) { -                qcow2_encrypt_sectors(s, sector_num,  cluster_data, -                    cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key); -                qemu_iovec_from_buf(qiov, bytes_done, -                    cluster_data, 512 * cur_nr_sectors); -            } -            break; - -        default: -            g_assert_not_reached(); -            ret = -EIO; -            goto fail; -        } - -        remaining_sectors -= cur_nr_sectors; -        sector_num += cur_nr_sectors; -        bytes_done += cur_nr_sectors * 512; -    } -    ret = 0; - -fail: -    qemu_co_mutex_unlock(&s->lock); - -    qemu_iovec_destroy(&hd_qiov); -    qemu_vfree(cluster_data); - -    return ret; -} - -static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, -                           int64_t sector_num, -                           int remaining_sectors, -                           QEMUIOVector *qiov) -{ -    BDRVQcowState *s = bs->opaque; -    int index_in_cluster; -    int n_end; -    int ret; -    int cur_nr_sectors; /* number of sectors in current iteration */ -    uint64_t cluster_offset; -    QEMUIOVector hd_qiov; -    uint64_t bytes_done = 0; -    uint8_t *cluster_data = NULL; -    QCowL2Meta *l2meta = NULL; - -    trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, -                                 remaining_sectors); - -    qemu_iovec_init(&hd_qiov, qiov->niov); - -    s->cluster_cache_offset = -1; /* disable compressed cache */ - -    qemu_co_mutex_lock(&s->lock); - -    while (remaining_sectors != 0) { - -        l2meta = NULL; - -        trace_qcow2_writev_start_part(qemu_coroutine_self()); -        index_in_cluster = sector_num & (s->cluster_sectors - 1); -        n_end = index_in_cluster + remaining_sectors; -        if (s->crypt_method && -            n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) { -            n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors; -        } - -        ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, -            index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta); -        if (ret < 0) { -            goto fail; -        } - -        assert((cluster_offset & 511) == 0); - -        qemu_iovec_reset(&hd_qiov); -        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, -            cur_nr_sectors * 512); - -        if (s->crypt_method) { -            if (!cluster_data) { -                cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * -                                                 s->cluster_size); -            } - -            assert(hd_qiov.size <= -                   QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); -            qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); - -            qcow2_encrypt_sectors(s, sector_num, cluster_data, -                cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key); - -            qemu_iovec_reset(&hd_qiov); -            qemu_iovec_add(&hd_qiov, cluster_data, -                cur_nr_sectors * 512); -        } - -        qemu_co_mutex_unlock(&s->lock); -        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); -        trace_qcow2_writev_data(qemu_coroutine_self(), -                                (cluster_offset >> 9) + index_in_cluster); -        ret = bdrv_co_writev(bs->file, -                             (cluster_offset >> 9) + index_in_cluster, -                             cur_nr_sectors, &hd_qiov); -        qemu_co_mutex_lock(&s->lock); -        if (ret < 0) { -            goto fail; -        } - -        while (l2meta != NULL) { -            QCowL2Meta *next; - -            ret = qcow2_alloc_cluster_link_l2(bs, l2meta); -            if (ret < 0) { -                goto fail; -            } - -            /* Take the request off the list of running requests */ -            if (l2meta->nb_clusters != 0) { -                QLIST_REMOVE(l2meta, next_in_flight); -            } - -            qemu_co_queue_restart_all(&l2meta->dependent_requests); - -            next = l2meta->next; -            g_free(l2meta); -            l2meta = next; -        } - -        remaining_sectors -= cur_nr_sectors; -        sector_num += cur_nr_sectors; -        bytes_done += cur_nr_sectors * 512; -        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors); -    } -    ret = 0; - -fail: -    qemu_co_mutex_unlock(&s->lock); - -    while (l2meta != NULL) { -        QCowL2Meta *next; - -        if (l2meta->nb_clusters != 0) { -            QLIST_REMOVE(l2meta, next_in_flight); -        } -        qemu_co_queue_restart_all(&l2meta->dependent_requests); - -        next = l2meta->next; -        g_free(l2meta); -        l2meta = next; -    } - -    qemu_iovec_destroy(&hd_qiov); -    qemu_vfree(cluster_data); -    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); - -    return ret; -} - -static void qcow2_close(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    g_free(s->l1_table); - -    qcow2_cache_flush(bs, s->l2_table_cache); -    qcow2_cache_flush(bs, s->refcount_block_cache); - -    qcow2_mark_clean(bs); - -    qcow2_cache_destroy(bs, s->l2_table_cache); -    qcow2_cache_destroy(bs, s->refcount_block_cache); - -    g_free(s->unknown_header_fields); -    cleanup_unknown_header_ext(bs); - -    g_free(s->cluster_cache); -    qemu_vfree(s->cluster_data); -    qcow2_refcount_close(bs); -    qcow2_free_snapshots(bs); -} - -static void qcow2_invalidate_cache(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    int flags = s->flags; -    AES_KEY aes_encrypt_key; -    AES_KEY aes_decrypt_key; -    uint32_t crypt_method = 0; -    QDict *options; - -    /* -     * Backing files are read-only which makes all of their metadata immutable, -     * that means we don't have to worry about reopening them here. -     */ - -    if (s->crypt_method) { -        crypt_method = s->crypt_method; -        memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key)); -        memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key)); -    } - -    qcow2_close(bs); - -    options = qdict_new(); -    qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS, -              qbool_from_int(s->use_lazy_refcounts)); - -    memset(s, 0, sizeof(BDRVQcowState)); -    qcow2_open(bs, options, flags); - -    QDECREF(options); - -    if (crypt_method) { -        s->crypt_method = crypt_method; -        memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key)); -        memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key)); -    } -} - -static size_t header_ext_add(char *buf, uint32_t magic, const void *s, -    size_t len, size_t buflen) -{ -    QCowExtension *ext_backing_fmt = (QCowExtension*) buf; -    size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); - -    if (buflen < ext_len) { -        return -ENOSPC; -    } - -    *ext_backing_fmt = (QCowExtension) { -        .magic  = cpu_to_be32(magic), -        .len    = cpu_to_be32(len), -    }; -    memcpy(buf + sizeof(QCowExtension), s, len); - -    return ext_len; -} - -/* - * Updates the qcow2 header, including the variable length parts of it, i.e. - * the backing file name and all extensions. qcow2 was not designed to allow - * such changes, so if we run out of space (we can only use the first cluster) - * this function may fail. - * - * Returns 0 on success, -errno in error cases. - */ -int qcow2_update_header(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    QCowHeader *header; -    char *buf; -    size_t buflen = s->cluster_size; -    int ret; -    uint64_t total_size; -    uint32_t refcount_table_clusters; -    size_t header_length; -    Qcow2UnknownHeaderExtension *uext; - -    buf = qemu_blockalign(bs, buflen); - -    /* Header structure */ -    header = (QCowHeader*) buf; - -    if (buflen < sizeof(*header)) { -        ret = -ENOSPC; -        goto fail; -    } - -    header_length = sizeof(*header) + s->unknown_header_fields_size; -    total_size = bs->total_sectors * BDRV_SECTOR_SIZE; -    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); - -    *header = (QCowHeader) { -        /* Version 2 fields */ -        .magic                  = cpu_to_be32(QCOW_MAGIC), -        .version                = cpu_to_be32(s->qcow_version), -        .backing_file_offset    = 0, -        .backing_file_size      = 0, -        .cluster_bits           = cpu_to_be32(s->cluster_bits), -        .size                   = cpu_to_be64(total_size), -        .crypt_method           = cpu_to_be32(s->crypt_method_header), -        .l1_size                = cpu_to_be32(s->l1_size), -        .l1_table_offset        = cpu_to_be64(s->l1_table_offset), -        .refcount_table_offset  = cpu_to_be64(s->refcount_table_offset), -        .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), -        .nb_snapshots           = cpu_to_be32(s->nb_snapshots), -        .snapshots_offset       = cpu_to_be64(s->snapshots_offset), - -        /* Version 3 fields */ -        .incompatible_features  = cpu_to_be64(s->incompatible_features), -        .compatible_features    = cpu_to_be64(s->compatible_features), -        .autoclear_features     = cpu_to_be64(s->autoclear_features), -        .refcount_order         = cpu_to_be32(3 + REFCOUNT_SHIFT), -        .header_length          = cpu_to_be32(header_length), -    }; - -    /* For older versions, write a shorter header */ -    switch (s->qcow_version) { -    case 2: -        ret = offsetof(QCowHeader, incompatible_features); -        break; -    case 3: -        ret = sizeof(*header); -        break; -    default: -        ret = -EINVAL; -        goto fail; -    } - -    buf += ret; -    buflen -= ret; -    memset(buf, 0, buflen); - -    /* Preserve any unknown field in the header */ -    if (s->unknown_header_fields_size) { -        if (buflen < s->unknown_header_fields_size) { -            ret = -ENOSPC; -            goto fail; -        } - -        memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); -        buf += s->unknown_header_fields_size; -        buflen -= s->unknown_header_fields_size; -    } - -    /* Backing file format header extension */ -    if (*bs->backing_format) { -        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, -                             bs->backing_format, strlen(bs->backing_format), -                             buflen); -        if (ret < 0) { -            goto fail; -        } - -        buf += ret; -        buflen -= ret; -    } - -    /* Feature table */ -    Qcow2Feature features[] = { -        { -            .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, -            .bit  = QCOW2_INCOMPAT_DIRTY_BITNR, -            .name = "dirty bit", -        }, -        { -            .type = QCOW2_FEAT_TYPE_COMPATIBLE, -            .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, -            .name = "lazy refcounts", -        }, -    }; - -    ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, -                         features, sizeof(features), buflen); -    if (ret < 0) { -        goto fail; -    } -    buf += ret; -    buflen -= ret; - -    /* Keep unknown header extensions */ -    QLIST_FOREACH(uext, &s->unknown_header_ext, next) { -        ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); -        if (ret < 0) { -            goto fail; -        } - -        buf += ret; -        buflen -= ret; -    } - -    /* End of header extensions */ -    ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); -    if (ret < 0) { -        goto fail; -    } - -    buf += ret; -    buflen -= ret; - -    /* Backing file name */ -    if (*bs->backing_file) { -        size_t backing_file_len = strlen(bs->backing_file); - -        if (buflen < backing_file_len) { -            ret = -ENOSPC; -            goto fail; -        } - -        /* Using strncpy is ok here, since buf is not NUL-terminated. */ -        strncpy(buf, bs->backing_file, buflen); - -        header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); -        header->backing_file_size   = cpu_to_be32(backing_file_len); -    } - -    /* Write the new header */ -    ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); -    if (ret < 0) { -        goto fail; -    } - -    ret = 0; -fail: -    qemu_vfree(header); -    return ret; -} - -static int qcow2_change_backing_file(BlockDriverState *bs, -    const char *backing_file, const char *backing_fmt) -{ -    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); -    pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); - -    return qcow2_update_header(bs); -} - -static int preallocate(BlockDriverState *bs) -{ -    uint64_t nb_sectors; -    uint64_t offset; -    uint64_t host_offset = 0; -    int num; -    int ret; -    QCowL2Meta *meta; - -    nb_sectors = bdrv_getlength(bs) >> 9; -    offset = 0; - -    while (nb_sectors) { -        num = MIN(nb_sectors, INT_MAX >> 9); -        ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, -                                         &host_offset, &meta); -        if (ret < 0) { -            return ret; -        } - -        ret = qcow2_alloc_cluster_link_l2(bs, meta); -        if (ret < 0) { -            qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters, -                                    QCOW2_DISCARD_NEVER); -            return ret; -        } - -        /* There are no dependent requests, but we need to remove our request -         * from the list of in-flight requests */ -        if (meta != NULL) { -            QLIST_REMOVE(meta, next_in_flight); -        } - -        /* TODO Preallocate data if requested */ - -        nb_sectors -= num; -        offset += num << 9; -    } - -    /* -     * It is expected that the image file is large enough to actually contain -     * all of the allocated clusters (otherwise we get failing reads after -     * EOF). Extend the image to the last allocated sector. -     */ -    if (host_offset != 0) { -        uint8_t buf[512]; -        memset(buf, 0, 512); -        ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1); -        if (ret < 0) { -            return ret; -        } -    } - -    return 0; -} - -static int qcow2_create2(const char *filename, int64_t total_size, -                         const char *backing_file, const char *backing_format, -                         int flags, size_t cluster_size, int prealloc, -                         QEMUOptionParameter *options, int version) -{ -    /* Calculate cluster_bits */ -    int cluster_bits; -    cluster_bits = ffs(cluster_size) - 1; -    if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || -        (1 << cluster_bits) != cluster_size) -    { -        error_report( -            "Cluster size must be a power of two between %d and %dk", -            1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); -        return -EINVAL; -    } - -    /* -     * Open the image file and write a minimal qcow2 header. -     * -     * We keep things simple and start with a zero-sized image. We also -     * do without refcount blocks or a L1 table for now. We'll fix the -     * inconsistency later. -     * -     * We do need a refcount table because growing the refcount table means -     * allocating two new refcount blocks - the seconds of which would be at -     * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file -     * size for any qcow2 image. -     */ -    BlockDriverState* bs; -    QCowHeader header; -    uint8_t* refcount_table; -    int ret; - -    ret = bdrv_create_file(filename, options); -    if (ret < 0) { -        return ret; -    } - -    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR); -    if (ret < 0) { -        return ret; -    } - -    /* Write the header */ -    memset(&header, 0, sizeof(header)); -    header.magic = cpu_to_be32(QCOW_MAGIC); -    header.version = cpu_to_be32(version); -    header.cluster_bits = cpu_to_be32(cluster_bits); -    header.size = cpu_to_be64(0); -    header.l1_table_offset = cpu_to_be64(0); -    header.l1_size = cpu_to_be32(0); -    header.refcount_table_offset = cpu_to_be64(cluster_size); -    header.refcount_table_clusters = cpu_to_be32(1); -    header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT); -    header.header_length = cpu_to_be32(sizeof(header)); - -    if (flags & BLOCK_FLAG_ENCRYPT) { -        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); -    } else { -        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); -    } - -    if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) { -        header.compatible_features |= -            cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); -    } - -    ret = bdrv_pwrite(bs, 0, &header, sizeof(header)); -    if (ret < 0) { -        goto out; -    } - -    /* Write an empty refcount table */ -    refcount_table = g_malloc0(cluster_size); -    ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size); -    g_free(refcount_table); - -    if (ret < 0) { -        goto out; -    } - -    bdrv_close(bs); - -    /* -     * And now open the image and make it consistent first (i.e. increase the -     * refcount of the cluster that is occupied by the header and the refcount -     * table) -     */ -    BlockDriver* drv = bdrv_find_format("qcow2"); -    assert(drv != NULL); -    ret = bdrv_open(bs, filename, NULL, -        BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv); -    if (ret < 0) { -        goto out; -    } - -    ret = qcow2_alloc_clusters(bs, 2 * cluster_size); -    if (ret < 0) { -        goto out; - -    } else if (ret != 0) { -        error_report("Huh, first cluster in empty image is already in use?"); -        abort(); -    } - -    /* Okay, now that we have a valid image, let's give it the right size */ -    ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE); -    if (ret < 0) { -        goto out; -    } - -    /* Want a backing file? There you go.*/ -    if (backing_file) { -        ret = bdrv_change_backing_file(bs, backing_file, backing_format); -        if (ret < 0) { -            goto out; -        } -    } - -    /* And if we're supposed to preallocate metadata, do that now */ -    if (prealloc) { -        BDRVQcowState *s = bs->opaque; -        qemu_co_mutex_lock(&s->lock); -        ret = preallocate(bs); -        qemu_co_mutex_unlock(&s->lock); -        if (ret < 0) { -            goto out; -        } -    } - -    ret = 0; -out: -    bdrv_delete(bs); -    return ret; -} - -static int qcow2_create(const char *filename, QEMUOptionParameter *options) -{ -    const char *backing_file = NULL; -    const char *backing_fmt = NULL; -    uint64_t sectors = 0; -    int flags = 0; -    size_t cluster_size = DEFAULT_CLUSTER_SIZE; -    int prealloc = 0; -    int version = 2; - -    /* Read out options */ -    while (options && options->name) { -        if (!strcmp(options->name, BLOCK_OPT_SIZE)) { -            sectors = options->value.n / 512; -        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { -            backing_file = options->value.s; -        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { -            backing_fmt = options->value.s; -        } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) { -            flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0; -        } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { -            if (options->value.n) { -                cluster_size = options->value.n; -            } -        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { -            if (!options->value.s || !strcmp(options->value.s, "off")) { -                prealloc = 0; -            } else if (!strcmp(options->value.s, "metadata")) { -                prealloc = 1; -            } else { -                fprintf(stderr, "Invalid preallocation mode: '%s'\n", -                    options->value.s); -                return -EINVAL; -            } -        } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) { -            if (!options->value.s || !strcmp(options->value.s, "0.10")) { -                version = 2; -            } else if (!strcmp(options->value.s, "1.1")) { -                version = 3; -            } else { -                fprintf(stderr, "Invalid compatibility level: '%s'\n", -                    options->value.s); -                return -EINVAL; -            } -        } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) { -            flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0; -        } -        options++; -    } - -    if (backing_file && prealloc) { -        fprintf(stderr, "Backing file and preallocation cannot be used at " -            "the same time\n"); -        return -EINVAL; -    } - -    if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) { -        fprintf(stderr, "Lazy refcounts only supported with compatibility " -                "level 1.1 and above (use compat=1.1 or greater)\n"); -        return -EINVAL; -    } - -    return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags, -                         cluster_size, prealloc, options, version); -} - -static int qcow2_make_empty(BlockDriverState *bs) -{ -#if 0 -    /* XXX: not correct */ -    BDRVQcowState *s = bs->opaque; -    uint32_t l1_length = s->l1_size * sizeof(uint64_t); -    int ret; - -    memset(s->l1_table, 0, l1_length); -    if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0) -        return -1; -    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length); -    if (ret < 0) -        return ret; - -    l2_cache_reset(bs); -#endif -    return 0; -} - -static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, -    int64_t sector_num, int nb_sectors) -{ -    int ret; -    BDRVQcowState *s = bs->opaque; - -    /* Emulate misaligned zero writes */ -    if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { -        return -ENOTSUP; -    } - -    /* Whatever is left can use real zero clusters */ -    qemu_co_mutex_lock(&s->lock); -    ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, -        nb_sectors); -    qemu_co_mutex_unlock(&s->lock); - -    return ret; -} - -static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, -    int64_t sector_num, int nb_sectors) -{ -    int ret; -    BDRVQcowState *s = bs->opaque; - -    qemu_co_mutex_lock(&s->lock); -    ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, -        nb_sectors); -    qemu_co_mutex_unlock(&s->lock); -    return ret; -} - -static int qcow2_truncate(BlockDriverState *bs, int64_t offset) -{ -    BDRVQcowState *s = bs->opaque; -    int64_t new_l1_size; -    int ret; - -    if (offset & 511) { -        error_report("The new size must be a multiple of 512"); -        return -EINVAL; -    } - -    /* cannot proceed if image has snapshots */ -    if (s->nb_snapshots) { -        error_report("Can't resize an image which has snapshots"); -        return -ENOTSUP; -    } - -    /* shrinking is currently not supported */ -    if (offset < bs->total_sectors * 512) { -        error_report("qcow2 doesn't support shrinking images yet"); -        return -ENOTSUP; -    } - -    new_l1_size = size_to_l1(s, offset); -    ret = qcow2_grow_l1_table(bs, new_l1_size, true); -    if (ret < 0) { -        return ret; -    } - -    /* write updated header.size */ -    offset = cpu_to_be64(offset); -    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), -                           &offset, sizeof(uint64_t)); -    if (ret < 0) { -        return ret; -    } - -    s->l1_vm_state_index = new_l1_size; -    return 0; -} - -/* XXX: put compressed sectors first, then all the cluster aligned -   tables to avoid losing bytes in alignment */ -static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, -                                  const uint8_t *buf, int nb_sectors) -{ -    BDRVQcowState *s = bs->opaque; -    z_stream strm; -    int ret, out_len; -    uint8_t *out_buf; -    uint64_t cluster_offset; - -    if (nb_sectors == 0) { -        /* align end of file to a sector boundary to ease reading with -           sector based I/Os */ -        cluster_offset = bdrv_getlength(bs->file); -        cluster_offset = (cluster_offset + 511) & ~511; -        bdrv_truncate(bs->file, cluster_offset); -        return 0; -    } - -    if (nb_sectors != s->cluster_sectors) { -        ret = -EINVAL; - -        /* Zero-pad last write if image size is not cluster aligned */ -        if (sector_num + nb_sectors == bs->total_sectors && -            nb_sectors < s->cluster_sectors) { -            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size); -            memset(pad_buf, 0, s->cluster_size); -            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE); -            ret = qcow2_write_compressed(bs, sector_num, -                                         pad_buf, s->cluster_sectors); -            qemu_vfree(pad_buf); -        } -        return ret; -    } - -    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); - -    /* best compression, small window, no zlib header */ -    memset(&strm, 0, sizeof(strm)); -    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, -                       Z_DEFLATED, -12, -                       9, Z_DEFAULT_STRATEGY); -    if (ret != 0) { -        ret = -EINVAL; -        goto fail; -    } - -    strm.avail_in = s->cluster_size; -    strm.next_in = (uint8_t *)buf; -    strm.avail_out = s->cluster_size; -    strm.next_out = out_buf; - -    ret = deflate(&strm, Z_FINISH); -    if (ret != Z_STREAM_END && ret != Z_OK) { -        deflateEnd(&strm); -        ret = -EINVAL; -        goto fail; -    } -    out_len = strm.next_out - out_buf; - -    deflateEnd(&strm); - -    if (ret != Z_STREAM_END || out_len >= s->cluster_size) { -        /* could not compress: write normal cluster */ -        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); -        if (ret < 0) { -            goto fail; -        } -    } else { -        cluster_offset = qcow2_alloc_compressed_cluster_offset(bs, -            sector_num << 9, out_len); -        if (!cluster_offset) { -            ret = -EIO; -            goto fail; -        } -        cluster_offset &= s->cluster_offset_mask; -        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); -        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); -        if (ret < 0) { -            goto fail; -        } -    } - -    ret = 0; -fail: -    g_free(out_buf); -    return ret; -} - -static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    int ret; - -    qemu_co_mutex_lock(&s->lock); -    ret = qcow2_cache_flush(bs, s->l2_table_cache); -    if (ret < 0) { -        qemu_co_mutex_unlock(&s->lock); -        return ret; -    } - -    if (qcow2_need_accurate_refcounts(s)) { -        ret = qcow2_cache_flush(bs, s->refcount_block_cache); -        if (ret < 0) { -            qemu_co_mutex_unlock(&s->lock); -            return ret; -        } -    } -    qemu_co_mutex_unlock(&s->lock); - -    return 0; -} - -static int64_t qcow2_vm_state_offset(BDRVQcowState *s) -{ -	return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); -} - -static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) -{ -    BDRVQcowState *s = bs->opaque; -    bdi->cluster_size = s->cluster_size; -    bdi->vm_state_offset = qcow2_vm_state_offset(s); -    return 0; -} - -#if 0 -static void dump_refcounts(BlockDriverState *bs) -{ -    BDRVQcowState *s = bs->opaque; -    int64_t nb_clusters, k, k1, size; -    int refcount; - -    size = bdrv_getlength(bs->file); -    nb_clusters = size_to_clusters(s, size); -    for(k = 0; k < nb_clusters;) { -        k1 = k; -        refcount = get_refcount(bs, k); -        k++; -        while (k < nb_clusters && get_refcount(bs, k) == refcount) -            k++; -        printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount, -               k - k1); -    } -} -#endif - -static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, -                              int64_t pos) -{ -    BDRVQcowState *s = bs->opaque; -    int growable = bs->growable; -    int ret; - -    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); -    bs->growable = 1; -    ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); -    bs->growable = growable; - -    return ret; -} - -static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, -                              int64_t pos, int size) -{ -    BDRVQcowState *s = bs->opaque; -    int growable = bs->growable; -    int ret; - -    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); -    bs->growable = 1; -    ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); -    bs->growable = growable; - -    return ret; -} - -static QEMUOptionParameter qcow2_create_options[] = { -    { -        .name = BLOCK_OPT_SIZE, -        .type = OPT_SIZE, -        .help = "Virtual disk size" -    }, -    { -        .name = BLOCK_OPT_COMPAT_LEVEL, -        .type = OPT_STRING, -        .help = "Compatibility level (0.10 or 1.1)" -    }, -    { -        .name = BLOCK_OPT_BACKING_FILE, -        .type = OPT_STRING, -        .help = "File name of a base image" -    }, -    { -        .name = BLOCK_OPT_BACKING_FMT, -        .type = OPT_STRING, -        .help = "Image format of the base image" -    }, -    { -        .name = BLOCK_OPT_ENCRYPT, -        .type = OPT_FLAG, -        .help = "Encrypt the image" -    }, -    { -        .name = BLOCK_OPT_CLUSTER_SIZE, -        .type = OPT_SIZE, -        .help = "qcow2 cluster size", -        .value = { .n = DEFAULT_CLUSTER_SIZE }, -    }, -    { -        .name = BLOCK_OPT_PREALLOC, -        .type = OPT_STRING, -        .help = "Preallocation mode (allowed values: off, metadata)" -    }, -    { -        .name = BLOCK_OPT_LAZY_REFCOUNTS, -        .type = OPT_FLAG, -        .help = "Postpone refcount updates", -    }, -    { NULL } -}; - -static BlockDriver bdrv_qcow2 = { -    .format_name        = "qcow2", -    .instance_size      = sizeof(BDRVQcowState), -    .bdrv_probe         = qcow2_probe, -    .bdrv_open          = qcow2_open, -    .bdrv_close         = qcow2_close, -    .bdrv_reopen_prepare  = qcow2_reopen_prepare, -    .bdrv_create        = qcow2_create, -    .bdrv_has_zero_init = bdrv_has_zero_init_1, -    .bdrv_co_is_allocated = qcow2_co_is_allocated, -    .bdrv_set_key       = qcow2_set_key, -    .bdrv_make_empty    = qcow2_make_empty, - -    .bdrv_co_readv          = qcow2_co_readv, -    .bdrv_co_writev         = qcow2_co_writev, -    .bdrv_co_flush_to_os    = qcow2_co_flush_to_os, - -    .bdrv_co_write_zeroes   = qcow2_co_write_zeroes, -    .bdrv_co_discard        = qcow2_co_discard, -    .bdrv_truncate          = qcow2_truncate, -    .bdrv_write_compressed  = qcow2_write_compressed, - -    .bdrv_snapshot_create   = qcow2_snapshot_create, -    .bdrv_snapshot_goto     = qcow2_snapshot_goto, -    .bdrv_snapshot_delete   = qcow2_snapshot_delete, -    .bdrv_snapshot_list     = qcow2_snapshot_list, -    .bdrv_snapshot_load_tmp     = qcow2_snapshot_load_tmp, -    .bdrv_get_info      = qcow2_get_info, - -    .bdrv_save_vmstate    = qcow2_save_vmstate, -    .bdrv_load_vmstate    = qcow2_load_vmstate, - -    .bdrv_change_backing_file   = qcow2_change_backing_file, - -    .bdrv_invalidate_cache      = qcow2_invalidate_cache, - -    .create_options = qcow2_create_options, -    .bdrv_check = qcow2_check, -}; - -static void bdrv_qcow2_init(void) -{ -    bdrv_register(&bdrv_qcow2); -} - -block_init(bdrv_qcow2_init); diff --git a/contrib/qemu/block/qcow2.h b/contrib/qemu/block/qcow2.h deleted file mode 100644 index 3b2d5cda71f..00000000000 --- a/contrib/qemu/block/qcow2.h +++ /dev/null @@ -1,437 +0,0 @@ -/* - * Block driver for the QCOW version 2 format - * - * Copyright (c) 2004-2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef BLOCK_QCOW2_H -#define BLOCK_QCOW2_H - -#include "qemu/aes.h" -#include "block/coroutine.h" - -//#define DEBUG_ALLOC -//#define DEBUG_ALLOC2 -//#define DEBUG_EXT - -#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) - -#define QCOW_CRYPT_NONE 0 -#define QCOW_CRYPT_AES  1 - -#define QCOW_MAX_CRYPT_CLUSTERS 32 - -/* indicate that the refcount of the referenced cluster is exactly one. */ -#define QCOW_OFLAG_COPIED     (1LL << 63) -/* indicate that the cluster is compressed (they never have the copied flag) */ -#define QCOW_OFLAG_COMPRESSED (1LL << 62) -/* The cluster reads as all zeros */ -#define QCOW_OFLAG_ZERO (1LL << 0) - -#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */ - -#define MIN_CLUSTER_BITS 9 -#define MAX_CLUSTER_BITS 21 - -#define L2_CACHE_SIZE 16 - -/* Must be at least 4 to cover all cases of refcount table growth */ -#define REFCOUNT_CACHE_SIZE 4 - -#define DEFAULT_CLUSTER_SIZE 65536 - - -#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts" -#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request" -#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot" -#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other" - -typedef struct QCowHeader { -    uint32_t magic; -    uint32_t version; -    uint64_t backing_file_offset; -    uint32_t backing_file_size; -    uint32_t cluster_bits; -    uint64_t size; /* in bytes */ -    uint32_t crypt_method; -    uint32_t l1_size; /* XXX: save number of clusters instead ? */ -    uint64_t l1_table_offset; -    uint64_t refcount_table_offset; -    uint32_t refcount_table_clusters; -    uint32_t nb_snapshots; -    uint64_t snapshots_offset; - -    /* The following fields are only valid for version >= 3 */ -    uint64_t incompatible_features; -    uint64_t compatible_features; -    uint64_t autoclear_features; - -    uint32_t refcount_order; -    uint32_t header_length; -} QCowHeader; - -typedef struct QCowSnapshot { -    uint64_t l1_table_offset; -    uint32_t l1_size; -    char *id_str; -    char *name; -    uint64_t disk_size; -    uint64_t vm_state_size; -    uint32_t date_sec; -    uint32_t date_nsec; -    uint64_t vm_clock_nsec; -} QCowSnapshot; - -struct Qcow2Cache; -typedef struct Qcow2Cache Qcow2Cache; - -typedef struct Qcow2UnknownHeaderExtension { -    uint32_t magic; -    uint32_t len; -    QLIST_ENTRY(Qcow2UnknownHeaderExtension) next; -    uint8_t data[]; -} Qcow2UnknownHeaderExtension; - -enum { -    QCOW2_FEAT_TYPE_INCOMPATIBLE    = 0, -    QCOW2_FEAT_TYPE_COMPATIBLE      = 1, -    QCOW2_FEAT_TYPE_AUTOCLEAR       = 2, -}; - -/* Incompatible feature bits */ -enum { -    QCOW2_INCOMPAT_DIRTY_BITNR   = 0, -    QCOW2_INCOMPAT_DIRTY         = 1 << QCOW2_INCOMPAT_DIRTY_BITNR, - -    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY, -}; - -/* Compatible feature bits */ -enum { -    QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0, -    QCOW2_COMPAT_LAZY_REFCOUNTS       = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, - -    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS, -}; - -enum qcow2_discard_type { -    QCOW2_DISCARD_NEVER = 0, -    QCOW2_DISCARD_ALWAYS, -    QCOW2_DISCARD_REQUEST, -    QCOW2_DISCARD_SNAPSHOT, -    QCOW2_DISCARD_OTHER, -    QCOW2_DISCARD_MAX -}; - -typedef struct Qcow2Feature { -    uint8_t type; -    uint8_t bit; -    char    name[46]; -} QEMU_PACKED Qcow2Feature; - -typedef struct Qcow2DiscardRegion { -    BlockDriverState *bs; -    uint64_t offset; -    uint64_t bytes; -    QTAILQ_ENTRY(Qcow2DiscardRegion) next; -} Qcow2DiscardRegion; - -typedef struct BDRVQcowState { -    int cluster_bits; -    int cluster_size; -    int cluster_sectors; -    int l2_bits; -    int l2_size; -    int l1_size; -    int l1_vm_state_index; -    int csize_shift; -    int csize_mask; -    uint64_t cluster_offset_mask; -    uint64_t l1_table_offset; -    uint64_t *l1_table; - -    Qcow2Cache* l2_table_cache; -    Qcow2Cache* refcount_block_cache; - -    uint8_t *cluster_cache; -    uint8_t *cluster_data; -    uint64_t cluster_cache_offset; -    QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs; - -    uint64_t *refcount_table; -    uint64_t refcount_table_offset; -    uint32_t refcount_table_size; -    int64_t free_cluster_index; -    int64_t free_byte_offset; - -    CoMutex lock; - -    uint32_t crypt_method; /* current crypt method, 0 if no key yet */ -    uint32_t crypt_method_header; -    AES_KEY aes_encrypt_key; -    AES_KEY aes_decrypt_key; -    uint64_t snapshots_offset; -    int snapshots_size; -    int nb_snapshots; -    QCowSnapshot *snapshots; - -    int flags; -    int qcow_version; -    bool use_lazy_refcounts; - -    bool discard_passthrough[QCOW2_DISCARD_MAX]; - -    uint64_t incompatible_features; -    uint64_t compatible_features; -    uint64_t autoclear_features; - -    size_t unknown_header_fields_size; -    void* unknown_header_fields; -    QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; -    QTAILQ_HEAD (, Qcow2DiscardRegion) discards; -    bool cache_discards; -} BDRVQcowState; - -/* XXX: use std qcow open function ? */ -typedef struct QCowCreateState { -    int cluster_size; -    int cluster_bits; -    uint16_t *refcount_block; -    uint64_t *refcount_table; -    int64_t l1_table_offset; -    int64_t refcount_table_offset; -    int64_t refcount_block_offset; -} QCowCreateState; - -struct QCowAIOCB; - -typedef struct Qcow2COWRegion { -    /** -     * Offset of the COW region in bytes from the start of the first cluster -     * touched by the request. -     */ -    uint64_t    offset; - -    /** Number of sectors to copy */ -    int         nb_sectors; -} Qcow2COWRegion; - -/** - * Describes an in-flight (part of a) write request that writes to clusters - * that are not referenced in their L2 table yet. - */ -typedef struct QCowL2Meta -{ -    /** Guest offset of the first newly allocated cluster */ -    uint64_t offset; - -    /** Host offset of the first newly allocated cluster */ -    uint64_t alloc_offset; - -    /** -     * Number of sectors from the start of the first allocated cluster to -     * the end of the (possibly shortened) request -     */ -    int nb_available; - -    /** Number of newly allocated clusters */ -    int nb_clusters; - -    /** -     * Requests that overlap with this allocation and wait to be restarted -     * when the allocating request has completed. -     */ -    CoQueue dependent_requests; - -    /** -     * The COW Region between the start of the first allocated cluster and the -     * area the guest actually writes to. -     */ -    Qcow2COWRegion cow_start; - -    /** -     * The COW Region between the area the guest actually writes to and the -     * end of the last allocated cluster. -     */ -    Qcow2COWRegion cow_end; - -    /** Pointer to next L2Meta of the same write request */ -    struct QCowL2Meta *next; - -    QLIST_ENTRY(QCowL2Meta) next_in_flight; -} QCowL2Meta; - -enum { -    QCOW2_CLUSTER_UNALLOCATED, -    QCOW2_CLUSTER_NORMAL, -    QCOW2_CLUSTER_COMPRESSED, -    QCOW2_CLUSTER_ZERO -}; - -#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL -#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL -#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL - -#define REFT_OFFSET_MASK 0xffffffffffffff00ULL - -static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset) -{ -    return offset & ~(s->cluster_size - 1); -} - -static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset) -{ -    return offset & (s->cluster_size - 1); -} - -static inline int size_to_clusters(BDRVQcowState *s, int64_t size) -{ -    return (size + (s->cluster_size - 1)) >> s->cluster_bits; -} - -static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size) -{ -    int shift = s->cluster_bits + s->l2_bits; -    return (size + (1ULL << shift) - 1) >> shift; -} - -static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset) -{ -    return (offset >> s->cluster_bits) & (s->l2_size - 1); -} - -static inline int64_t align_offset(int64_t offset, int n) -{ -    offset = (offset + n - 1) & ~(n - 1); -    return offset; -} - -static inline int qcow2_get_cluster_type(uint64_t l2_entry) -{ -    if (l2_entry & QCOW_OFLAG_COMPRESSED) { -        return QCOW2_CLUSTER_COMPRESSED; -    } else if (l2_entry & QCOW_OFLAG_ZERO) { -        return QCOW2_CLUSTER_ZERO; -    } else if (!(l2_entry & L2E_OFFSET_MASK)) { -        return QCOW2_CLUSTER_UNALLOCATED; -    } else { -        return QCOW2_CLUSTER_NORMAL; -    } -} - -/* Check whether refcounts are eager or lazy */ -static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) -{ -    return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY); -} - -static inline uint64_t l2meta_cow_start(QCowL2Meta *m) -{ -    return m->offset + m->cow_start.offset; -} - -static inline uint64_t l2meta_cow_end(QCowL2Meta *m) -{ -    return m->offset + m->cow_end.offset -        + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS); -} - -// FIXME Need qcow2_ prefix to global functions - -/* qcow2.c functions */ -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, -                  int64_t sector_num, int nb_sectors); - -int qcow2_mark_dirty(BlockDriverState *bs); -int qcow2_update_header(BlockDriverState *bs); - -/* qcow2-refcount.c functions */ -int qcow2_refcount_init(BlockDriverState *bs); -void qcow2_refcount_close(BlockDriverState *bs); - -int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size); -int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, -    int nb_clusters); -int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size); -void qcow2_free_clusters(BlockDriverState *bs, -                          int64_t offset, int64_t size, -                          enum qcow2_discard_type type); -void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, -                             int nb_clusters, enum qcow2_discard_type type); - -int qcow2_update_snapshot_refcount(BlockDriverState *bs, -    int64_t l1_table_offset, int l1_size, int addend); - -int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, -                          BdrvCheckMode fix); - -void qcow2_process_discards(BlockDriverState *bs, int ret); - -/* qcow2-cluster.c functions */ -int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, -                        bool exact_size); -void qcow2_l2_cache_reset(BlockDriverState *bs); -int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); -void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, -                     uint8_t *out_buf, const uint8_t *in_buf, -                     int nb_sectors, int enc, -                     const AES_KEY *key); - -int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, -    int *num, uint64_t *cluster_offset); -int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, -    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m); -uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, -                                         uint64_t offset, -                                         int compressed_size); - -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); -int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, -    int nb_sectors); -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); - -/* qcow2-snapshot.c functions */ -int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); -int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id); -int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id); -int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab); -int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name); - -void qcow2_free_snapshots(BlockDriverState *bs); -int qcow2_read_snapshots(BlockDriverState *bs); - -/* qcow2-cache.c functions */ -Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables); -int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c); - -void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table); -int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c); -int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, -    Qcow2Cache *dependency); -void qcow2_cache_depends_on_flush(Qcow2Cache *c); - -int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, -    void **table); -int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, -    void **table); -int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); - -#endif diff --git a/contrib/qemu/block/qed-check.c b/contrib/qemu/block/qed-check.c deleted file mode 100644 index b473dcd61f6..00000000000 --- a/contrib/qemu/block/qed-check.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * QEMU Enhanced Disk Format Consistency Check - * - * Copyright IBM, Corp. 2010 - * - * Authors: - *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#include "qed.h" - -typedef struct { -    BDRVQEDState *s; -    BdrvCheckResult *result; -    bool fix;                           /* whether to fix invalid offsets */ - -    uint64_t nclusters; -    uint32_t *used_clusters;            /* referenced cluster bitmap */ - -    QEDRequest request; -} QEDCheck; - -static bool qed_test_bit(uint32_t *bitmap, uint64_t n) { -    return !!(bitmap[n / 32] & (1 << (n % 32))); -} - -static void qed_set_bit(uint32_t *bitmap, uint64_t n) { -    bitmap[n / 32] |= 1 << (n % 32); -} - -/** - * Set bitmap bits for clusters - * - * @check:          Check structure - * @offset:         Starting offset in bytes - * @n:              Number of clusters - */ -static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset, -                                  unsigned int n) -{ -    uint64_t cluster = qed_bytes_to_clusters(check->s, offset); -    unsigned int corruptions = 0; - -    while (n-- != 0) { -        /* Clusters should only be referenced once */ -        if (qed_test_bit(check->used_clusters, cluster)) { -            corruptions++; -        } - -        qed_set_bit(check->used_clusters, cluster); -        cluster++; -    } - -    check->result->corruptions += corruptions; -    return corruptions == 0; -} - -/** - * Check an L2 table - * - * @ret:            Number of invalid cluster offsets - */ -static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table) -{ -    BDRVQEDState *s = check->s; -    unsigned int i, num_invalid = 0; -    uint64_t last_offset = 0; - -    for (i = 0; i < s->table_nelems; i++) { -        uint64_t offset = table->offsets[i]; - -        if (qed_offset_is_unalloc_cluster(offset) || -            qed_offset_is_zero_cluster(offset)) { -            continue; -        } -        check->result->bfi.allocated_clusters++; -        if (last_offset && (last_offset + s->header.cluster_size != offset)) { -            check->result->bfi.fragmented_clusters++; -        } -        last_offset = offset; - -        /* Detect invalid cluster offset */ -        if (!qed_check_cluster_offset(s, offset)) { -            if (check->fix) { -                table->offsets[i] = 0; -                check->result->corruptions_fixed++; -            } else { -                check->result->corruptions++; -            } - -            num_invalid++; -            continue; -        } - -        qed_set_used_clusters(check, offset, 1); -    } - -    return num_invalid; -} - -/** - * Descend tables and check each cluster is referenced once only - */ -static int qed_check_l1_table(QEDCheck *check, QEDTable *table) -{ -    BDRVQEDState *s = check->s; -    unsigned int i, num_invalid_l1 = 0; -    int ret, last_error = 0; - -    /* Mark L1 table clusters used */ -    qed_set_used_clusters(check, s->header.l1_table_offset, -                          s->header.table_size); - -    for (i = 0; i < s->table_nelems; i++) { -        unsigned int num_invalid_l2; -        uint64_t offset = table->offsets[i]; - -        if (qed_offset_is_unalloc_cluster(offset)) { -            continue; -        } - -        /* Detect invalid L2 offset */ -        if (!qed_check_table_offset(s, offset)) { -            /* Clear invalid offset */ -            if (check->fix) { -                table->offsets[i] = 0; -                check->result->corruptions_fixed++; -            } else { -                check->result->corruptions++; -            } - -            num_invalid_l1++; -            continue; -        } - -        if (!qed_set_used_clusters(check, offset, s->header.table_size)) { -            continue; /* skip an invalid table */ -        } - -        ret = qed_read_l2_table_sync(s, &check->request, offset); -        if (ret) { -            check->result->check_errors++; -            last_error = ret; -            continue; -        } - -        num_invalid_l2 = qed_check_l2_table(check, -                                            check->request.l2_table->table); - -        /* Write out fixed L2 table */ -        if (num_invalid_l2 > 0 && check->fix) { -            ret = qed_write_l2_table_sync(s, &check->request, 0, -                                          s->table_nelems, false); -            if (ret) { -                check->result->check_errors++; -                last_error = ret; -                continue; -            } -        } -    } - -    /* Drop reference to final table */ -    qed_unref_l2_cache_entry(check->request.l2_table); -    check->request.l2_table = NULL; - -    /* Write out fixed L1 table */ -    if (num_invalid_l1 > 0 && check->fix) { -        ret = qed_write_l1_table_sync(s, 0, s->table_nelems); -        if (ret) { -            check->result->check_errors++; -            last_error = ret; -        } -    } - -    return last_error; -} - -/** - * Check for unreferenced (leaked) clusters - */ -static void qed_check_for_leaks(QEDCheck *check) -{ -    BDRVQEDState *s = check->s; -    uint64_t i; - -    for (i = s->header.header_size; i < check->nclusters; i++) { -        if (!qed_test_bit(check->used_clusters, i)) { -            check->result->leaks++; -        } -    } -} - -/** - * Mark an image clean once it passes check or has been repaired - */ -static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result) -{ -    /* Skip if there were unfixable corruptions or I/O errors */ -    if (result->corruptions > 0 || result->check_errors > 0) { -        return; -    } - -    /* Skip if image is already marked clean */ -    if (!(s->header.features & QED_F_NEED_CHECK)) { -        return; -    } - -    /* Ensure fixes reach storage before clearing check bit */ -    bdrv_flush(s->bs); - -    s->header.features &= ~QED_F_NEED_CHECK; -    qed_write_header_sync(s); -} - -int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix) -{ -    QEDCheck check = { -        .s = s, -        .result = result, -        .nclusters = qed_bytes_to_clusters(s, s->file_size), -        .request = { .l2_table = NULL }, -        .fix = fix, -    }; -    int ret; - -    check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) * -                                       sizeof(check.used_clusters[0])); - -    check.result->bfi.total_clusters = -        (s->header.image_size + s->header.cluster_size - 1) / -            s->header.cluster_size; -    ret = qed_check_l1_table(&check, s->l1_table); -    if (ret == 0) { -        /* Only check for leaks if entire image was scanned successfully */ -        qed_check_for_leaks(&check); - -        if (fix) { -            qed_check_mark_clean(s, result); -        } -    } - -    g_free(check.used_clusters); -    return ret; -} diff --git a/contrib/qemu/block/qed-cluster.c b/contrib/qemu/block/qed-cluster.c deleted file mode 100644 index f64b2af8f7e..00000000000 --- a/contrib/qemu/block/qed-cluster.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * QEMU Enhanced Disk Format Cluster functions - * - * Copyright IBM, Corp. 2010 - * - * Authors: - *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com> - *  Anthony Liguori   <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#include "qed.h" - -/** - * Count the number of contiguous data clusters - * - * @s:              QED state - * @table:          L2 table - * @index:          First cluster index - * @n:              Maximum number of clusters - * @offset:         Set to first cluster offset - * - * This function scans tables for contiguous clusters.  A contiguous run of - * clusters may be allocated, unallocated, or zero. - */ -static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s, -                                                  QEDTable *table, -                                                  unsigned int index, -                                                  unsigned int n, -                                                  uint64_t *offset) -{ -    unsigned int end = MIN(index + n, s->table_nelems); -    uint64_t last = table->offsets[index]; -    unsigned int i; - -    *offset = last; - -    for (i = index + 1; i < end; i++) { -        if (qed_offset_is_unalloc_cluster(last)) { -            /* Counting unallocated clusters */ -            if (!qed_offset_is_unalloc_cluster(table->offsets[i])) { -                break; -            } -        } else if (qed_offset_is_zero_cluster(last)) { -            /* Counting zero clusters */ -            if (!qed_offset_is_zero_cluster(table->offsets[i])) { -                break; -            } -        } else { -            /* Counting allocated clusters */ -            if (table->offsets[i] != last + s->header.cluster_size) { -                break; -            } -            last = table->offsets[i]; -        } -    } -    return i - index; -} - -typedef struct { -    BDRVQEDState *s; -    uint64_t pos; -    size_t len; - -    QEDRequest *request; - -    /* User callback */ -    QEDFindClusterFunc *cb; -    void *opaque; -} QEDFindClusterCB; - -static void qed_find_cluster_cb(void *opaque, int ret) -{ -    QEDFindClusterCB *find_cluster_cb = opaque; -    BDRVQEDState *s = find_cluster_cb->s; -    QEDRequest *request = find_cluster_cb->request; -    uint64_t offset = 0; -    size_t len = 0; -    unsigned int index; -    unsigned int n; - -    if (ret) { -        goto out; -    } - -    index = qed_l2_index(s, find_cluster_cb->pos); -    n = qed_bytes_to_clusters(s, -                              qed_offset_into_cluster(s, find_cluster_cb->pos) + -                              find_cluster_cb->len); -    n = qed_count_contiguous_clusters(s, request->l2_table->table, -                                      index, n, &offset); - -    if (qed_offset_is_unalloc_cluster(offset)) { -        ret = QED_CLUSTER_L2; -    } else if (qed_offset_is_zero_cluster(offset)) { -        ret = QED_CLUSTER_ZERO; -    } else if (qed_check_cluster_offset(s, offset)) { -        ret = QED_CLUSTER_FOUND; -    } else { -        ret = -EINVAL; -    } - -    len = MIN(find_cluster_cb->len, n * s->header.cluster_size - -              qed_offset_into_cluster(s, find_cluster_cb->pos)); - -out: -    find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len); -    g_free(find_cluster_cb); -} - -/** - * Find the offset of a data cluster - * - * @s:          QED state - * @request:    L2 cache entry - * @pos:        Byte position in device - * @len:        Number of bytes - * @cb:         Completion function - * @opaque:     User data for completion function - * - * This function translates a position in the block device to an offset in the - * image file.  It invokes the cb completion callback to report back the - * translated offset or unallocated range in the image file. - * - * If the L2 table exists, request->l2_table points to the L2 table cache entry - * and the caller must free the reference when they are finished.  The cache - * entry is exposed in this way to avoid callers having to read the L2 table - * again later during request processing.  If request->l2_table is non-NULL it - * will be unreferenced before taking on the new cache entry. - */ -void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, -                      size_t len, QEDFindClusterFunc *cb, void *opaque) -{ -    QEDFindClusterCB *find_cluster_cb; -    uint64_t l2_offset; - -    /* Limit length to L2 boundary.  Requests are broken up at the L2 boundary -     * so that a request acts on one L2 table at a time. -     */ -    len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos); - -    l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)]; -    if (qed_offset_is_unalloc_cluster(l2_offset)) { -        cb(opaque, QED_CLUSTER_L1, 0, len); -        return; -    } -    if (!qed_check_table_offset(s, l2_offset)) { -        cb(opaque, -EINVAL, 0, 0); -        return; -    } - -    find_cluster_cb = g_malloc(sizeof(*find_cluster_cb)); -    find_cluster_cb->s = s; -    find_cluster_cb->pos = pos; -    find_cluster_cb->len = len; -    find_cluster_cb->cb = cb; -    find_cluster_cb->opaque = opaque; -    find_cluster_cb->request = request; - -    qed_read_l2_table(s, request, l2_offset, -                      qed_find_cluster_cb, find_cluster_cb); -} diff --git a/contrib/qemu/block/qed-gencb.c b/contrib/qemu/block/qed-gencb.c deleted file mode 100644 index 7d7ac1ffc8e..00000000000 --- a/contrib/qemu/block/qed-gencb.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * QEMU Enhanced Disk Format - * - * Copyright IBM, Corp. 2010 - * - * Authors: - *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#include "qed.h" - -void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque) -{ -    GenericCB *gencb = g_malloc(len); -    gencb->cb = cb; -    gencb->opaque = opaque; -    return gencb; -} - -void gencb_complete(void *opaque, int ret) -{ -    GenericCB *gencb = opaque; -    BlockDriverCompletionFunc *cb = gencb->cb; -    void *user_opaque = gencb->opaque; - -    g_free(gencb); -    cb(user_opaque, ret); -} diff --git a/contrib/qemu/block/qed-l2-cache.c b/contrib/qemu/block/qed-l2-cache.c deleted file mode 100644 index e9b2aae44d9..00000000000 --- a/contrib/qemu/block/qed-l2-cache.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * QEMU Enhanced Disk Format L2 Cache - * - * Copyright IBM, Corp. 2010 - * - * Authors: - *  Anthony Liguori   <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -/* - * L2 table cache usage is as follows: - * - * An open image has one L2 table cache that is used to avoid accessing the - * image file for recently referenced L2 tables. - * - * Cluster offset lookup translates the logical offset within the block device - * to a cluster offset within the image file.  This is done by indexing into - * the L1 and L2 tables which store cluster offsets.  It is here where the L2 - * table cache serves up recently referenced L2 tables. - * - * If there is a cache miss, that L2 table is read from the image file and - * committed to the cache.  Subsequent accesses to that L2 table will be served - * from the cache until the table is evicted from the cache. - * - * L2 tables are also committed to the cache when new L2 tables are allocated - * in the image file.  Since the L2 table cache is write-through, the new L2 - * table is first written out to the image file and then committed to the - * cache. - * - * Multiple I/O requests may be using an L2 table cache entry at any given - * time.  That means an entry may be in use across several requests and - * reference counting is needed to free the entry at the correct time.  In - * particular, an entry evicted from the cache will only be freed once all - * references are dropped. - * - * An in-flight I/O request will hold a reference to a L2 table cache entry for - * the period during which it needs to access the L2 table.  This includes - * cluster offset lookup, L2 table allocation, and L2 table update when a new - * data cluster has been allocated. - * - * An interesting case occurs when two requests need to access an L2 table that - * is not in the cache.  Since the operation to read the table from the image - * file takes some time to complete, both requests may see a cache miss and - * start reading the L2 table from the image file.  The first to finish will - * commit its L2 table into the cache.  When the second tries to commit its - * table will be deleted in favor of the existing cache entry. - */ - -#include "trace.h" -#include "qed.h" - -/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */ -#define MAX_L2_CACHE_SIZE 50 - -/** - * Initialize the L2 cache - */ -void qed_init_l2_cache(L2TableCache *l2_cache) -{ -    QTAILQ_INIT(&l2_cache->entries); -    l2_cache->n_entries = 0; -} - -/** - * Free the L2 cache - */ -void qed_free_l2_cache(L2TableCache *l2_cache) -{ -    CachedL2Table *entry, *next_entry; - -    QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) { -        qemu_vfree(entry->table); -        g_free(entry); -    } -} - -/** - * Allocate an uninitialized entry from the cache - * - * The returned entry has a reference count of 1 and is owned by the caller. - * The caller must allocate the actual table field for this entry and it must - * be freeable using qemu_vfree(). - */ -CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache) -{ -    CachedL2Table *entry; - -    entry = g_malloc0(sizeof(*entry)); -    entry->ref++; - -    trace_qed_alloc_l2_cache_entry(l2_cache, entry); - -    return entry; -} - -/** - * Decrease an entry's reference count and free if necessary when the reference - * count drops to zero. - */ -void qed_unref_l2_cache_entry(CachedL2Table *entry) -{ -    if (!entry) { -        return; -    } - -    entry->ref--; -    trace_qed_unref_l2_cache_entry(entry, entry->ref); -    if (entry->ref == 0) { -        qemu_vfree(entry->table); -        g_free(entry); -    } -} - -/** - * Find an entry in the L2 cache.  This may return NULL and it's up to the - * caller to satisfy the cache miss. - * - * For a cached entry, this function increases the reference count and returns - * the entry. - */ -CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset) -{ -    CachedL2Table *entry; - -    QTAILQ_FOREACH(entry, &l2_cache->entries, node) { -        if (entry->offset == offset) { -            trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref); -            entry->ref++; -            return entry; -        } -    } -    return NULL; -} - -/** - * Commit an L2 cache entry into the cache.  This is meant to be used as part of - * the process to satisfy a cache miss.  A caller would allocate an entry which - * is not actually in the L2 cache and then once the entry was valid and - * present on disk, the entry can be committed into the cache. - * - * Since the cache is write-through, it's important that this function is not - * called until the entry is present on disk and the L1 has been updated to - * point to the entry. - * - * N.B. This function steals a reference to the l2_table from the caller so the - * caller must obtain a new reference by issuing a call to - * qed_find_l2_cache_entry(). - */ -void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table) -{ -    CachedL2Table *entry; - -    entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset); -    if (entry) { -        qed_unref_l2_cache_entry(entry); -        qed_unref_l2_cache_entry(l2_table); -        return; -    } - -    /* Evict an unused cache entry so we have space.  If all entries are in use -     * we can grow the cache temporarily and we try to shrink back down later. -     */ -    if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) { -        CachedL2Table *next; -        QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) { -            if (entry->ref > 1) { -                continue; -            } - -            QTAILQ_REMOVE(&l2_cache->entries, entry, node); -            l2_cache->n_entries--; -            qed_unref_l2_cache_entry(entry); - -            /* Stop evicting when we've shrunk back to max size */ -            if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) { -                break; -            } -        } -    } - -    l2_cache->n_entries++; -    QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node); -} diff --git a/contrib/qemu/block/qed-table.c b/contrib/qemu/block/qed-table.c deleted file mode 100644 index 76d2dcccf81..00000000000 --- a/contrib/qemu/block/qed-table.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * QEMU Enhanced Disk Format Table I/O - * - * Copyright IBM, Corp. 2010 - * - * Authors: - *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com> - *  Anthony Liguori   <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#include "trace.h" -#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ -#include "qed.h" - -typedef struct { -    GenericCB gencb; -    BDRVQEDState *s; -    QEDTable *table; - -    struct iovec iov; -    QEMUIOVector qiov; -} QEDReadTableCB; - -static void qed_read_table_cb(void *opaque, int ret) -{ -    QEDReadTableCB *read_table_cb = opaque; -    QEDTable *table = read_table_cb->table; -    int noffsets = read_table_cb->qiov.size / sizeof(uint64_t); -    int i; - -    /* Handle I/O error */ -    if (ret) { -        goto out; -    } - -    /* Byteswap offsets */ -    for (i = 0; i < noffsets; i++) { -        table->offsets[i] = le64_to_cpu(table->offsets[i]); -    } - -out: -    /* Completion */ -    trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret); -    gencb_complete(&read_table_cb->gencb, ret); -} - -static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, -                           BlockDriverCompletionFunc *cb, void *opaque) -{ -    QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb), -                                                cb, opaque); -    QEMUIOVector *qiov = &read_table_cb->qiov; - -    trace_qed_read_table(s, offset, table); - -    read_table_cb->s = s; -    read_table_cb->table = table; -    read_table_cb->iov.iov_base = table->offsets, -    read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size, - -    qemu_iovec_init_external(qiov, &read_table_cb->iov, 1); -    bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov, -                   qiov->size / BDRV_SECTOR_SIZE, -                   qed_read_table_cb, read_table_cb); -} - -typedef struct { -    GenericCB gencb; -    BDRVQEDState *s; -    QEDTable *orig_table; -    QEDTable *table; -    bool flush;             /* flush after write? */ - -    struct iovec iov; -    QEMUIOVector qiov; -} QEDWriteTableCB; - -static void qed_write_table_cb(void *opaque, int ret) -{ -    QEDWriteTableCB *write_table_cb = opaque; - -    trace_qed_write_table_cb(write_table_cb->s, -                             write_table_cb->orig_table, -                             write_table_cb->flush, -                             ret); - -    if (ret) { -        goto out; -    } - -    if (write_table_cb->flush) { -        /* We still need to flush first */ -        write_table_cb->flush = false; -        bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb, -                       write_table_cb); -        return; -    } - -out: -    qemu_vfree(write_table_cb->table); -    gencb_complete(&write_table_cb->gencb, ret); -} - -/** - * Write out an updated part or all of a table - * - * @s:          QED state - * @offset:     Offset of table in image file, in bytes - * @table:      Table - * @index:      Index of first element - * @n:          Number of elements - * @flush:      Whether or not to sync to disk - * @cb:         Completion function - * @opaque:     Argument for completion function - */ -static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, -                            unsigned int index, unsigned int n, bool flush, -                            BlockDriverCompletionFunc *cb, void *opaque) -{ -    QEDWriteTableCB *write_table_cb; -    unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1; -    unsigned int start, end, i; -    size_t len_bytes; - -    trace_qed_write_table(s, offset, table, index, n); - -    /* Calculate indices of the first and one after last elements */ -    start = index & ~sector_mask; -    end = (index + n + sector_mask) & ~sector_mask; - -    len_bytes = (end - start) * sizeof(uint64_t); - -    write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque); -    write_table_cb->s = s; -    write_table_cb->orig_table = table; -    write_table_cb->flush = flush; -    write_table_cb->table = qemu_blockalign(s->bs, len_bytes); -    write_table_cb->iov.iov_base = write_table_cb->table->offsets; -    write_table_cb->iov.iov_len = len_bytes; -    qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1); - -    /* Byteswap table */ -    for (i = start; i < end; i++) { -        uint64_t le_offset = cpu_to_le64(table->offsets[i]); -        write_table_cb->table->offsets[i - start] = le_offset; -    } - -    /* Adjust for offset into table */ -    offset += start * sizeof(uint64_t); - -    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, -                    &write_table_cb->qiov, -                    write_table_cb->qiov.size / BDRV_SECTOR_SIZE, -                    qed_write_table_cb, write_table_cb); -} - -/** - * Propagate return value from async callback - */ -static void qed_sync_cb(void *opaque, int ret) -{ -    *(int *)opaque = ret; -} - -int qed_read_l1_table_sync(BDRVQEDState *s) -{ -    int ret = -EINPROGRESS; - -    qed_read_table(s, s->header.l1_table_offset, -                   s->l1_table, qed_sync_cb, &ret); -    while (ret == -EINPROGRESS) { -        qemu_aio_wait(); -    } - -    return ret; -} - -void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n, -                        BlockDriverCompletionFunc *cb, void *opaque) -{ -    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE); -    qed_write_table(s, s->header.l1_table_offset, -                    s->l1_table, index, n, false, cb, opaque); -} - -int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, -                            unsigned int n) -{ -    int ret = -EINPROGRESS; - -    qed_write_l1_table(s, index, n, qed_sync_cb, &ret); -    while (ret == -EINPROGRESS) { -        qemu_aio_wait(); -    } - -    return ret; -} - -typedef struct { -    GenericCB gencb; -    BDRVQEDState *s; -    uint64_t l2_offset; -    QEDRequest *request; -} QEDReadL2TableCB; - -static void qed_read_l2_table_cb(void *opaque, int ret) -{ -    QEDReadL2TableCB *read_l2_table_cb = opaque; -    QEDRequest *request = read_l2_table_cb->request; -    BDRVQEDState *s = read_l2_table_cb->s; -    CachedL2Table *l2_table = request->l2_table; -    uint64_t l2_offset = read_l2_table_cb->l2_offset; - -    if (ret) { -        /* can't trust loaded L2 table anymore */ -        qed_unref_l2_cache_entry(l2_table); -        request->l2_table = NULL; -    } else { -        l2_table->offset = l2_offset; - -        qed_commit_l2_cache_entry(&s->l2_cache, l2_table); - -        /* This is guaranteed to succeed because we just committed the entry -         * to the cache. -         */ -        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset); -        assert(request->l2_table != NULL); -    } - -    gencb_complete(&read_l2_table_cb->gencb, ret); -} - -void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, -                       BlockDriverCompletionFunc *cb, void *opaque) -{ -    QEDReadL2TableCB *read_l2_table_cb; - -    qed_unref_l2_cache_entry(request->l2_table); - -    /* Check for cached L2 entry */ -    request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset); -    if (request->l2_table) { -        cb(opaque, 0); -        return; -    } - -    request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); -    request->l2_table->table = qed_alloc_table(s); - -    read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque); -    read_l2_table_cb->s = s; -    read_l2_table_cb->l2_offset = offset; -    read_l2_table_cb->request = request; - -    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD); -    qed_read_table(s, offset, request->l2_table->table, -                   qed_read_l2_table_cb, read_l2_table_cb); -} - -int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset) -{ -    int ret = -EINPROGRESS; - -    qed_read_l2_table(s, request, offset, qed_sync_cb, &ret); -    while (ret == -EINPROGRESS) { -        qemu_aio_wait(); -    } - -    return ret; -} - -void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, -                        unsigned int index, unsigned int n, bool flush, -                        BlockDriverCompletionFunc *cb, void *opaque) -{ -    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE); -    qed_write_table(s, request->l2_table->offset, -                    request->l2_table->table, index, n, flush, cb, opaque); -} - -int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, -                            unsigned int index, unsigned int n, bool flush) -{ -    int ret = -EINPROGRESS; - -    qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret); -    while (ret == -EINPROGRESS) { -        qemu_aio_wait(); -    } - -    return ret; -} diff --git a/contrib/qemu/block/qed.c b/contrib/qemu/block/qed.c deleted file mode 100644 index f767b0528ce..00000000000 --- a/contrib/qemu/block/qed.c +++ /dev/null @@ -1,1596 +0,0 @@ -/* - * QEMU Enhanced Disk Format - * - * Copyright IBM, Corp. 2010 - * - * Authors: - *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com> - *  Anthony Liguori   <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#include "qemu/timer.h" -#include "trace.h" -#include "qed.h" -#include "qapi/qmp/qerror.h" -#include "migration/migration.h" - -static void qed_aio_cancel(BlockDriverAIOCB *blockacb) -{ -    QEDAIOCB *acb = (QEDAIOCB *)blockacb; -    bool finished = false; - -    /* Wait for the request to finish */ -    acb->finished = &finished; -    while (!finished) { -        qemu_aio_wait(); -    } -} - -static const AIOCBInfo qed_aiocb_info = { -    .aiocb_size         = sizeof(QEDAIOCB), -    .cancel             = qed_aio_cancel, -}; - -static int bdrv_qed_probe(const uint8_t *buf, int buf_size, -                          const char *filename) -{ -    const QEDHeader *header = (const QEDHeader *)buf; - -    if (buf_size < sizeof(*header)) { -        return 0; -    } -    if (le32_to_cpu(header->magic) != QED_MAGIC) { -        return 0; -    } -    return 100; -} - -/** - * Check whether an image format is raw - * - * @fmt:    Backing file format, may be NULL - */ -static bool qed_fmt_is_raw(const char *fmt) -{ -    return fmt && strcmp(fmt, "raw") == 0; -} - -static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu) -{ -    cpu->magic = le32_to_cpu(le->magic); -    cpu->cluster_size = le32_to_cpu(le->cluster_size); -    cpu->table_size = le32_to_cpu(le->table_size); -    cpu->header_size = le32_to_cpu(le->header_size); -    cpu->features = le64_to_cpu(le->features); -    cpu->compat_features = le64_to_cpu(le->compat_features); -    cpu->autoclear_features = le64_to_cpu(le->autoclear_features); -    cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset); -    cpu->image_size = le64_to_cpu(le->image_size); -    cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset); -    cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size); -} - -static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le) -{ -    le->magic = cpu_to_le32(cpu->magic); -    le->cluster_size = cpu_to_le32(cpu->cluster_size); -    le->table_size = cpu_to_le32(cpu->table_size); -    le->header_size = cpu_to_le32(cpu->header_size); -    le->features = cpu_to_le64(cpu->features); -    le->compat_features = cpu_to_le64(cpu->compat_features); -    le->autoclear_features = cpu_to_le64(cpu->autoclear_features); -    le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset); -    le->image_size = cpu_to_le64(cpu->image_size); -    le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset); -    le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size); -} - -int qed_write_header_sync(BDRVQEDState *s) -{ -    QEDHeader le; -    int ret; - -    qed_header_cpu_to_le(&s->header, &le); -    ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le)); -    if (ret != sizeof(le)) { -        return ret; -    } -    return 0; -} - -typedef struct { -    GenericCB gencb; -    BDRVQEDState *s; -    struct iovec iov; -    QEMUIOVector qiov; -    int nsectors; -    uint8_t *buf; -} QEDWriteHeaderCB; - -static void qed_write_header_cb(void *opaque, int ret) -{ -    QEDWriteHeaderCB *write_header_cb = opaque; - -    qemu_vfree(write_header_cb->buf); -    gencb_complete(write_header_cb, ret); -} - -static void qed_write_header_read_cb(void *opaque, int ret) -{ -    QEDWriteHeaderCB *write_header_cb = opaque; -    BDRVQEDState *s = write_header_cb->s; - -    if (ret) { -        qed_write_header_cb(write_header_cb, ret); -        return; -    } - -    /* Update header */ -    qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf); - -    bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov, -                    write_header_cb->nsectors, qed_write_header_cb, -                    write_header_cb); -} - -/** - * Update header in-place (does not rewrite backing filename or other strings) - * - * This function only updates known header fields in-place and does not affect - * extra data after the QED header. - */ -static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb, -                             void *opaque) -{ -    /* We must write full sectors for O_DIRECT but cannot necessarily generate -     * the data following the header if an unrecognized compat feature is -     * active.  Therefore, first read the sectors containing the header, update -     * them, and write back. -     */ - -    int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) / -                   BDRV_SECTOR_SIZE; -    size_t len = nsectors * BDRV_SECTOR_SIZE; -    QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb), -                                                    cb, opaque); - -    write_header_cb->s = s; -    write_header_cb->nsectors = nsectors; -    write_header_cb->buf = qemu_blockalign(s->bs, len); -    write_header_cb->iov.iov_base = write_header_cb->buf; -    write_header_cb->iov.iov_len = len; -    qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1); - -    bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors, -                   qed_write_header_read_cb, write_header_cb); -} - -static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size) -{ -    uint64_t table_entries; -    uint64_t l2_size; - -    table_entries = (table_size * cluster_size) / sizeof(uint64_t); -    l2_size = table_entries * cluster_size; - -    return l2_size * table_entries; -} - -static bool qed_is_cluster_size_valid(uint32_t cluster_size) -{ -    if (cluster_size < QED_MIN_CLUSTER_SIZE || -        cluster_size > QED_MAX_CLUSTER_SIZE) { -        return false; -    } -    if (cluster_size & (cluster_size - 1)) { -        return false; /* not power of 2 */ -    } -    return true; -} - -static bool qed_is_table_size_valid(uint32_t table_size) -{ -    if (table_size < QED_MIN_TABLE_SIZE || -        table_size > QED_MAX_TABLE_SIZE) { -        return false; -    } -    if (table_size & (table_size - 1)) { -        return false; /* not power of 2 */ -    } -    return true; -} - -static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size, -                                    uint32_t table_size) -{ -    if (image_size % BDRV_SECTOR_SIZE != 0) { -        return false; /* not multiple of sector size */ -    } -    if (image_size > qed_max_image_size(cluster_size, table_size)) { -        return false; /* image is too large */ -    } -    return true; -} - -/** - * Read a string of known length from the image file - * - * @file:       Image file - * @offset:     File offset to start of string, in bytes - * @n:          String length in bytes - * @buf:        Destination buffer - * @buflen:     Destination buffer length in bytes - * @ret:        0 on success, -errno on failure - * - * The string is NUL-terminated. - */ -static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n, -                           char *buf, size_t buflen) -{ -    int ret; -    if (n >= buflen) { -        return -EINVAL; -    } -    ret = bdrv_pread(file, offset, buf, n); -    if (ret < 0) { -        return ret; -    } -    buf[n] = '\0'; -    return 0; -} - -/** - * Allocate new clusters - * - * @s:          QED state - * @n:          Number of contiguous clusters to allocate - * @ret:        Offset of first allocated cluster - * - * This function only produces the offset where the new clusters should be - * written.  It updates BDRVQEDState but does not make any changes to the image - * file. - */ -static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n) -{ -    uint64_t offset = s->file_size; -    s->file_size += n * s->header.cluster_size; -    return offset; -} - -QEDTable *qed_alloc_table(BDRVQEDState *s) -{ -    /* Honor O_DIRECT memory alignment requirements */ -    return qemu_blockalign(s->bs, -                           s->header.cluster_size * s->header.table_size); -} - -/** - * Allocate a new zeroed L2 table - */ -static CachedL2Table *qed_new_l2_table(BDRVQEDState *s) -{ -    CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache); - -    l2_table->table = qed_alloc_table(s); -    l2_table->offset = qed_alloc_clusters(s, s->header.table_size); - -    memset(l2_table->table->offsets, 0, -           s->header.cluster_size * s->header.table_size); -    return l2_table; -} - -static void qed_aio_next_io(void *opaque, int ret); - -static void qed_plug_allocating_write_reqs(BDRVQEDState *s) -{ -    assert(!s->allocating_write_reqs_plugged); - -    s->allocating_write_reqs_plugged = true; -} - -static void qed_unplug_allocating_write_reqs(BDRVQEDState *s) -{ -    QEDAIOCB *acb; - -    assert(s->allocating_write_reqs_plugged); - -    s->allocating_write_reqs_plugged = false; - -    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); -    if (acb) { -        qed_aio_next_io(acb, 0); -    } -} - -static void qed_finish_clear_need_check(void *opaque, int ret) -{ -    /* Do nothing */ -} - -static void qed_flush_after_clear_need_check(void *opaque, int ret) -{ -    BDRVQEDState *s = opaque; - -    bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s); - -    /* No need to wait until flush completes */ -    qed_unplug_allocating_write_reqs(s); -} - -static void qed_clear_need_check(void *opaque, int ret) -{ -    BDRVQEDState *s = opaque; - -    if (ret) { -        qed_unplug_allocating_write_reqs(s); -        return; -    } - -    s->header.features &= ~QED_F_NEED_CHECK; -    qed_write_header(s, qed_flush_after_clear_need_check, s); -} - -static void qed_need_check_timer_cb(void *opaque) -{ -    BDRVQEDState *s = opaque; - -    /* The timer should only fire when allocating writes have drained */ -    assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs)); - -    trace_qed_need_check_timer_cb(s); - -    qed_plug_allocating_write_reqs(s); - -    /* Ensure writes are on disk before clearing flag */ -    bdrv_aio_flush(s->bs, qed_clear_need_check, s); -} - -static void qed_start_need_check_timer(BDRVQEDState *s) -{ -    trace_qed_start_need_check_timer(s); - -    /* Use vm_clock so we don't alter the image file while suspended for -     * migration. -     */ -    qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) + -                   get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT); -} - -/* It's okay to call this multiple times or when no timer is started */ -static void qed_cancel_need_check_timer(BDRVQEDState *s) -{ -    trace_qed_cancel_need_check_timer(s); -    qemu_del_timer(s->need_check_timer); -} - -static void bdrv_qed_rebind(BlockDriverState *bs) -{ -    BDRVQEDState *s = bs->opaque; -    s->bs = bs; -} - -static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags) -{ -    BDRVQEDState *s = bs->opaque; -    QEDHeader le_header; -    int64_t file_size; -    int ret; - -    s->bs = bs; -    QSIMPLEQ_INIT(&s->allocating_write_reqs); - -    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); -    if (ret < 0) { -        return ret; -    } -    qed_header_le_to_cpu(&le_header, &s->header); - -    if (s->header.magic != QED_MAGIC) { -        return -EMEDIUMTYPE; -    } -    if (s->header.features & ~QED_FEATURE_MASK) { -        /* image uses unsupported feature bits */ -        char buf[64]; -        snprintf(buf, sizeof(buf), "%" PRIx64, -            s->header.features & ~QED_FEATURE_MASK); -        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, -            bs->device_name, "QED", buf); -        return -ENOTSUP; -    } -    if (!qed_is_cluster_size_valid(s->header.cluster_size)) { -        return -EINVAL; -    } - -    /* Round down file size to the last cluster */ -    file_size = bdrv_getlength(bs->file); -    if (file_size < 0) { -        return file_size; -    } -    s->file_size = qed_start_of_cluster(s, file_size); - -    if (!qed_is_table_size_valid(s->header.table_size)) { -        return -EINVAL; -    } -    if (!qed_is_image_size_valid(s->header.image_size, -                                 s->header.cluster_size, -                                 s->header.table_size)) { -        return -EINVAL; -    } -    if (!qed_check_table_offset(s, s->header.l1_table_offset)) { -        return -EINVAL; -    } - -    s->table_nelems = (s->header.cluster_size * s->header.table_size) / -                      sizeof(uint64_t); -    s->l2_shift = ffs(s->header.cluster_size) - 1; -    s->l2_mask = s->table_nelems - 1; -    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1; - -    if ((s->header.features & QED_F_BACKING_FILE)) { -        if ((uint64_t)s->header.backing_filename_offset + -            s->header.backing_filename_size > -            s->header.cluster_size * s->header.header_size) { -            return -EINVAL; -        } - -        ret = qed_read_string(bs->file, s->header.backing_filename_offset, -                              s->header.backing_filename_size, bs->backing_file, -                              sizeof(bs->backing_file)); -        if (ret < 0) { -            return ret; -        } - -        if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) { -            pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw"); -        } -    } - -    /* Reset unknown autoclear feature bits.  This is a backwards -     * compatibility mechanism that allows images to be opened by older -     * programs, which "knock out" unknown feature bits.  When an image is -     * opened by a newer program again it can detect that the autoclear -     * feature is no longer valid. -     */ -    if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && -        !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) { -        s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; - -        ret = qed_write_header_sync(s); -        if (ret) { -            return ret; -        } - -        /* From here on only known autoclear feature bits are valid */ -        bdrv_flush(bs->file); -    } - -    s->l1_table = qed_alloc_table(s); -    qed_init_l2_cache(&s->l2_cache); - -    ret = qed_read_l1_table_sync(s); -    if (ret) { -        goto out; -    } - -    /* If image was not closed cleanly, check consistency */ -    if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) { -        /* Read-only images cannot be fixed.  There is no risk of corruption -         * since write operations are not possible.  Therefore, allow -         * potentially inconsistent images to be opened read-only.  This can -         * aid data recovery from an otherwise inconsistent image. -         */ -        if (!bdrv_is_read_only(bs->file) && -            !(flags & BDRV_O_INCOMING)) { -            BdrvCheckResult result = {0}; - -            ret = qed_check(s, &result, true); -            if (ret) { -                goto out; -            } -        } -    } - -    s->need_check_timer = qemu_new_timer_ns(vm_clock, -                                            qed_need_check_timer_cb, s); - -out: -    if (ret) { -        qed_free_l2_cache(&s->l2_cache); -        qemu_vfree(s->l1_table); -    } -    return ret; -} - -/* We have nothing to do for QED reopen, stubs just return - * success */ -static int bdrv_qed_reopen_prepare(BDRVReopenState *state, -                                   BlockReopenQueue *queue, Error **errp) -{ -    return 0; -} - -static void bdrv_qed_close(BlockDriverState *bs) -{ -    BDRVQEDState *s = bs->opaque; - -    qed_cancel_need_check_timer(s); -    qemu_free_timer(s->need_check_timer); - -    /* Ensure writes reach stable storage */ -    bdrv_flush(bs->file); - -    /* Clean shutdown, no check required on next open */ -    if (s->header.features & QED_F_NEED_CHECK) { -        s->header.features &= ~QED_F_NEED_CHECK; -        qed_write_header_sync(s); -    } - -    qed_free_l2_cache(&s->l2_cache); -    qemu_vfree(s->l1_table); -} - -static int qed_create(const char *filename, uint32_t cluster_size, -                      uint64_t image_size, uint32_t table_size, -                      const char *backing_file, const char *backing_fmt) -{ -    QEDHeader header = { -        .magic = QED_MAGIC, -        .cluster_size = cluster_size, -        .table_size = table_size, -        .header_size = 1, -        .features = 0, -        .compat_features = 0, -        .l1_table_offset = cluster_size, -        .image_size = image_size, -    }; -    QEDHeader le_header; -    uint8_t *l1_table = NULL; -    size_t l1_size = header.cluster_size * header.table_size; -    int ret = 0; -    BlockDriverState *bs = NULL; - -    ret = bdrv_create_file(filename, NULL); -    if (ret < 0) { -        return ret; -    } - -    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB); -    if (ret < 0) { -        return ret; -    } - -    /* File must start empty and grow, check truncate is supported */ -    ret = bdrv_truncate(bs, 0); -    if (ret < 0) { -        goto out; -    } - -    if (backing_file) { -        header.features |= QED_F_BACKING_FILE; -        header.backing_filename_offset = sizeof(le_header); -        header.backing_filename_size = strlen(backing_file); - -        if (qed_fmt_is_raw(backing_fmt)) { -            header.features |= QED_F_BACKING_FORMAT_NO_PROBE; -        } -    } - -    qed_header_cpu_to_le(&header, &le_header); -    ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header)); -    if (ret < 0) { -        goto out; -    } -    ret = bdrv_pwrite(bs, sizeof(le_header), backing_file, -                      header.backing_filename_size); -    if (ret < 0) { -        goto out; -    } - -    l1_table = g_malloc0(l1_size); -    ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size); -    if (ret < 0) { -        goto out; -    } - -    ret = 0; /* success */ -out: -    g_free(l1_table); -    bdrv_delete(bs); -    return ret; -} - -static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options) -{ -    uint64_t image_size = 0; -    uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE; -    uint32_t table_size = QED_DEFAULT_TABLE_SIZE; -    const char *backing_file = NULL; -    const char *backing_fmt = NULL; - -    while (options && options->name) { -        if (!strcmp(options->name, BLOCK_OPT_SIZE)) { -            image_size = options->value.n; -        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { -            backing_file = options->value.s; -        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { -            backing_fmt = options->value.s; -        } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { -            if (options->value.n) { -                cluster_size = options->value.n; -            } -        } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) { -            if (options->value.n) { -                table_size = options->value.n; -            } -        } -        options++; -    } - -    if (!qed_is_cluster_size_valid(cluster_size)) { -        fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n", -                QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE); -        return -EINVAL; -    } -    if (!qed_is_table_size_valid(table_size)) { -        fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n", -                QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE); -        return -EINVAL; -    } -    if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) { -        fprintf(stderr, "QED image size must be a non-zero multiple of " -                        "cluster size and less than %" PRIu64 " bytes\n", -                qed_max_image_size(cluster_size, table_size)); -        return -EINVAL; -    } - -    return qed_create(filename, cluster_size, image_size, table_size, -                      backing_file, backing_fmt); -} - -typedef struct { -    Coroutine *co; -    int is_allocated; -    int *pnum; -} QEDIsAllocatedCB; - -static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) -{ -    QEDIsAllocatedCB *cb = opaque; -    *cb->pnum = len / BDRV_SECTOR_SIZE; -    cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO); -    if (cb->co) { -        qemu_coroutine_enter(cb->co, NULL); -    } -} - -static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs, -                                                 int64_t sector_num, -                                                 int nb_sectors, int *pnum) -{ -    BDRVQEDState *s = bs->opaque; -    uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; -    size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; -    QEDIsAllocatedCB cb = { -        .is_allocated = -1, -        .pnum = pnum, -    }; -    QEDRequest request = { .l2_table = NULL }; - -    qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb); - -    /* Now sleep if the callback wasn't invoked immediately */ -    while (cb.is_allocated == -1) { -        cb.co = qemu_coroutine_self(); -        qemu_coroutine_yield(); -    } - -    qed_unref_l2_cache_entry(request.l2_table); - -    return cb.is_allocated; -} - -static int bdrv_qed_make_empty(BlockDriverState *bs) -{ -    return -ENOTSUP; -} - -static BDRVQEDState *acb_to_s(QEDAIOCB *acb) -{ -    return acb->common.bs->opaque; -} - -/** - * Read from the backing file or zero-fill if no backing file - * - * @s:          QED state - * @pos:        Byte position in device - * @qiov:       Destination I/O vector - * @cb:         Completion function - * @opaque:     User data for completion function - * - * This function reads qiov->size bytes starting at pos from the backing file. - * If there is no backing file then zeroes are read. - */ -static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, -                                  QEMUIOVector *qiov, -                                  BlockDriverCompletionFunc *cb, void *opaque) -{ -    uint64_t backing_length = 0; -    size_t size; - -    /* If there is a backing file, get its length.  Treat the absence of a -     * backing file like a zero length backing file. -     */ -    if (s->bs->backing_hd) { -        int64_t l = bdrv_getlength(s->bs->backing_hd); -        if (l < 0) { -            cb(opaque, l); -            return; -        } -        backing_length = l; -    } - -    /* Zero all sectors if reading beyond the end of the backing file */ -    if (pos >= backing_length || -        pos + qiov->size > backing_length) { -        qemu_iovec_memset(qiov, 0, 0, qiov->size); -    } - -    /* Complete now if there are no backing file sectors to read */ -    if (pos >= backing_length) { -        cb(opaque, 0); -        return; -    } - -    /* If the read straddles the end of the backing file, shorten it */ -    size = MIN((uint64_t)backing_length - pos, qiov->size); - -    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); -    bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE, -                   qiov, size / BDRV_SECTOR_SIZE, cb, opaque); -} - -typedef struct { -    GenericCB gencb; -    BDRVQEDState *s; -    QEMUIOVector qiov; -    struct iovec iov; -    uint64_t offset; -} CopyFromBackingFileCB; - -static void qed_copy_from_backing_file_cb(void *opaque, int ret) -{ -    CopyFromBackingFileCB *copy_cb = opaque; -    qemu_vfree(copy_cb->iov.iov_base); -    gencb_complete(©_cb->gencb, ret); -} - -static void qed_copy_from_backing_file_write(void *opaque, int ret) -{ -    CopyFromBackingFileCB *copy_cb = opaque; -    BDRVQEDState *s = copy_cb->s; - -    if (ret) { -        qed_copy_from_backing_file_cb(copy_cb, ret); -        return; -    } - -    BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); -    bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE, -                    ©_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE, -                    qed_copy_from_backing_file_cb, copy_cb); -} - -/** - * Copy data from backing file into the image - * - * @s:          QED state - * @pos:        Byte position in device - * @len:        Number of bytes - * @offset:     Byte offset in image file - * @cb:         Completion function - * @opaque:     User data for completion function - */ -static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, -                                       uint64_t len, uint64_t offset, -                                       BlockDriverCompletionFunc *cb, -                                       void *opaque) -{ -    CopyFromBackingFileCB *copy_cb; - -    /* Skip copy entirely if there is no work to do */ -    if (len == 0) { -        cb(opaque, 0); -        return; -    } - -    copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque); -    copy_cb->s = s; -    copy_cb->offset = offset; -    copy_cb->iov.iov_base = qemu_blockalign(s->bs, len); -    copy_cb->iov.iov_len = len; -    qemu_iovec_init_external(©_cb->qiov, ©_cb->iov, 1); - -    qed_read_backing_file(s, pos, ©_cb->qiov, -                          qed_copy_from_backing_file_write, copy_cb); -} - -/** - * Link one or more contiguous clusters into a table - * - * @s:              QED state - * @table:          L2 table - * @index:          First cluster index - * @n:              Number of contiguous clusters - * @cluster:        First cluster offset - * - * The cluster offset may be an allocated byte offset in the image file, the - * zero cluster marker, or the unallocated cluster marker. - */ -static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index, -                                unsigned int n, uint64_t cluster) -{ -    int i; -    for (i = index; i < index + n; i++) { -        table->offsets[i] = cluster; -        if (!qed_offset_is_unalloc_cluster(cluster) && -            !qed_offset_is_zero_cluster(cluster)) { -            cluster += s->header.cluster_size; -        } -    } -} - -static void qed_aio_complete_bh(void *opaque) -{ -    QEDAIOCB *acb = opaque; -    BlockDriverCompletionFunc *cb = acb->common.cb; -    void *user_opaque = acb->common.opaque; -    int ret = acb->bh_ret; -    bool *finished = acb->finished; - -    qemu_bh_delete(acb->bh); -    qemu_aio_release(acb); - -    /* Invoke callback */ -    cb(user_opaque, ret); - -    /* Signal cancel completion */ -    if (finished) { -        *finished = true; -    } -} - -static void qed_aio_complete(QEDAIOCB *acb, int ret) -{ -    BDRVQEDState *s = acb_to_s(acb); - -    trace_qed_aio_complete(s, acb, ret); - -    /* Free resources */ -    qemu_iovec_destroy(&acb->cur_qiov); -    qed_unref_l2_cache_entry(acb->request.l2_table); - -    /* Free the buffer we may have allocated for zero writes */ -    if (acb->flags & QED_AIOCB_ZERO) { -        qemu_vfree(acb->qiov->iov[0].iov_base); -        acb->qiov->iov[0].iov_base = NULL; -    } - -    /* Arrange for a bh to invoke the completion function */ -    acb->bh_ret = ret; -    acb->bh = qemu_bh_new(qed_aio_complete_bh, acb); -    qemu_bh_schedule(acb->bh); - -    /* Start next allocating write request waiting behind this one.  Note that -     * requests enqueue themselves when they first hit an unallocated cluster -     * but they wait until the entire request is finished before waking up the -     * next request in the queue.  This ensures that we don't cycle through -     * requests multiple times but rather finish one at a time completely. -     */ -    if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { -        QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next); -        acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs); -        if (acb) { -            qed_aio_next_io(acb, 0); -        } else if (s->header.features & QED_F_NEED_CHECK) { -            qed_start_need_check_timer(s); -        } -    } -} - -/** - * Commit the current L2 table to the cache - */ -static void qed_commit_l2_update(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; -    BDRVQEDState *s = acb_to_s(acb); -    CachedL2Table *l2_table = acb->request.l2_table; -    uint64_t l2_offset = l2_table->offset; - -    qed_commit_l2_cache_entry(&s->l2_cache, l2_table); - -    /* This is guaranteed to succeed because we just committed the entry to the -     * cache. -     */ -    acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset); -    assert(acb->request.l2_table != NULL); - -    qed_aio_next_io(opaque, ret); -} - -/** - * Update L1 table with new L2 table offset and write it out - */ -static void qed_aio_write_l1_update(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; -    BDRVQEDState *s = acb_to_s(acb); -    int index; - -    if (ret) { -        qed_aio_complete(acb, ret); -        return; -    } - -    index = qed_l1_index(s, acb->cur_pos); -    s->l1_table->offsets[index] = acb->request.l2_table->offset; - -    qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb); -} - -/** - * Update L2 table with new cluster offsets and write them out - */ -static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset) -{ -    BDRVQEDState *s = acb_to_s(acb); -    bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1; -    int index; - -    if (ret) { -        goto err; -    } - -    if (need_alloc) { -        qed_unref_l2_cache_entry(acb->request.l2_table); -        acb->request.l2_table = qed_new_l2_table(s); -    } - -    index = qed_l2_index(s, acb->cur_pos); -    qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters, -                         offset); - -    if (need_alloc) { -        /* Write out the whole new L2 table */ -        qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true, -                            qed_aio_write_l1_update, acb); -    } else { -        /* Write out only the updated part of the L2 table */ -        qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false, -                            qed_aio_next_io, acb); -    } -    return; - -err: -    qed_aio_complete(acb, ret); -} - -static void qed_aio_write_l2_update_cb(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; -    qed_aio_write_l2_update(acb, ret, acb->cur_cluster); -} - -/** - * Flush new data clusters before updating the L2 table - * - * This flush is necessary when a backing file is in use.  A crash during an - * allocating write could result in empty clusters in the image.  If the write - * only touched a subregion of the cluster, then backing image sectors have - * been lost in the untouched region.  The solution is to flush after writing a - * new data cluster and before updating the L2 table. - */ -static void qed_aio_write_flush_before_l2_update(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; -    BDRVQEDState *s = acb_to_s(acb); - -    if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) { -        qed_aio_complete(acb, -EIO); -    } -} - -/** - * Write data to the image file - */ -static void qed_aio_write_main(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; -    BDRVQEDState *s = acb_to_s(acb); -    uint64_t offset = acb->cur_cluster + -                      qed_offset_into_cluster(s, acb->cur_pos); -    BlockDriverCompletionFunc *next_fn; - -    trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size); - -    if (ret) { -        qed_aio_complete(acb, ret); -        return; -    } - -    if (acb->find_cluster_ret == QED_CLUSTER_FOUND) { -        next_fn = qed_aio_next_io; -    } else { -        if (s->bs->backing_hd) { -            next_fn = qed_aio_write_flush_before_l2_update; -        } else { -            next_fn = qed_aio_write_l2_update_cb; -        } -    } - -    BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); -    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, -                    &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, -                    next_fn, acb); -} - -/** - * Populate back untouched region of new data cluster - */ -static void qed_aio_write_postfill(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; -    BDRVQEDState *s = acb_to_s(acb); -    uint64_t start = acb->cur_pos + acb->cur_qiov.size; -    uint64_t len = -        qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start; -    uint64_t offset = acb->cur_cluster + -                      qed_offset_into_cluster(s, acb->cur_pos) + -                      acb->cur_qiov.size; - -    if (ret) { -        qed_aio_complete(acb, ret); -        return; -    } - -    trace_qed_aio_write_postfill(s, acb, start, len, offset); -    qed_copy_from_backing_file(s, start, len, offset, -                                qed_aio_write_main, acb); -} - -/** - * Populate front untouched region of new data cluster - */ -static void qed_aio_write_prefill(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; -    BDRVQEDState *s = acb_to_s(acb); -    uint64_t start = qed_start_of_cluster(s, acb->cur_pos); -    uint64_t len = qed_offset_into_cluster(s, acb->cur_pos); - -    trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster); -    qed_copy_from_backing_file(s, start, len, acb->cur_cluster, -                                qed_aio_write_postfill, acb); -} - -/** - * Check if the QED_F_NEED_CHECK bit should be set during allocating write - */ -static bool qed_should_set_need_check(BDRVQEDState *s) -{ -    /* The flush before L2 update path ensures consistency */ -    if (s->bs->backing_hd) { -        return false; -    } - -    return !(s->header.features & QED_F_NEED_CHECK); -} - -static void qed_aio_write_zero_cluster(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; - -    if (ret) { -        qed_aio_complete(acb, ret); -        return; -    } - -    qed_aio_write_l2_update(acb, 0, 1); -} - -/** - * Write new data cluster - * - * @acb:        Write request - * @len:        Length in bytes - * - * This path is taken when writing to previously unallocated clusters. - */ -static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) -{ -    BDRVQEDState *s = acb_to_s(acb); -    BlockDriverCompletionFunc *cb; - -    /* Cancel timer when the first allocating request comes in */ -    if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) { -        qed_cancel_need_check_timer(s); -    } - -    /* Freeze this request if another allocating write is in progress */ -    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) { -        QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next); -    } -    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) || -        s->allocating_write_reqs_plugged) { -        return; /* wait for existing request to finish */ -    } - -    acb->cur_nclusters = qed_bytes_to_clusters(s, -            qed_offset_into_cluster(s, acb->cur_pos) + len); -    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); - -    if (acb->flags & QED_AIOCB_ZERO) { -        /* Skip ahead if the clusters are already zero */ -        if (acb->find_cluster_ret == QED_CLUSTER_ZERO) { -            qed_aio_next_io(acb, 0); -            return; -        } - -        cb = qed_aio_write_zero_cluster; -    } else { -        cb = qed_aio_write_prefill; -        acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); -    } - -    if (qed_should_set_need_check(s)) { -        s->header.features |= QED_F_NEED_CHECK; -        qed_write_header(s, cb, acb); -    } else { -        cb(acb, 0); -    } -} - -/** - * Write data cluster in place - * - * @acb:        Write request - * @offset:     Cluster offset in bytes - * @len:        Length in bytes - * - * This path is taken when writing to already allocated clusters. - */ -static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) -{ -    /* Allocate buffer for zero writes */ -    if (acb->flags & QED_AIOCB_ZERO) { -        struct iovec *iov = acb->qiov->iov; - -        if (!iov->iov_base) { -            iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len); -            memset(iov->iov_base, 0, iov->iov_len); -        } -    } - -    /* Calculate the I/O vector */ -    acb->cur_cluster = offset; -    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); - -    /* Do the actual write */ -    qed_aio_write_main(acb, 0); -} - -/** - * Write data cluster - * - * @opaque:     Write request - * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1, - *              or -errno - * @offset:     Cluster offset in bytes - * @len:        Length in bytes - * - * Callback from qed_find_cluster(). - */ -static void qed_aio_write_data(void *opaque, int ret, -                               uint64_t offset, size_t len) -{ -    QEDAIOCB *acb = opaque; - -    trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len); - -    acb->find_cluster_ret = ret; - -    switch (ret) { -    case QED_CLUSTER_FOUND: -        qed_aio_write_inplace(acb, offset, len); -        break; - -    case QED_CLUSTER_L2: -    case QED_CLUSTER_L1: -    case QED_CLUSTER_ZERO: -        qed_aio_write_alloc(acb, len); -        break; - -    default: -        qed_aio_complete(acb, ret); -        break; -    } -} - -/** - * Read data cluster - * - * @opaque:     Read request - * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1, - *              or -errno - * @offset:     Cluster offset in bytes - * @len:        Length in bytes - * - * Callback from qed_find_cluster(). - */ -static void qed_aio_read_data(void *opaque, int ret, -                              uint64_t offset, size_t len) -{ -    QEDAIOCB *acb = opaque; -    BDRVQEDState *s = acb_to_s(acb); -    BlockDriverState *bs = acb->common.bs; - -    /* Adjust offset into cluster */ -    offset += qed_offset_into_cluster(s, acb->cur_pos); - -    trace_qed_aio_read_data(s, acb, ret, offset, len); - -    if (ret < 0) { -        goto err; -    } - -    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); - -    /* Handle zero cluster and backing file reads */ -    if (ret == QED_CLUSTER_ZERO) { -        qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); -        qed_aio_next_io(acb, 0); -        return; -    } else if (ret != QED_CLUSTER_FOUND) { -        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, -                              qed_aio_next_io, acb); -        return; -    } - -    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); -    bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE, -                   &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, -                   qed_aio_next_io, acb); -    return; - -err: -    qed_aio_complete(acb, ret); -} - -/** - * Begin next I/O or complete the request - */ -static void qed_aio_next_io(void *opaque, int ret) -{ -    QEDAIOCB *acb = opaque; -    BDRVQEDState *s = acb_to_s(acb); -    QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ? -                                qed_aio_write_data : qed_aio_read_data; - -    trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); - -    /* Handle I/O error */ -    if (ret) { -        qed_aio_complete(acb, ret); -        return; -    } - -    acb->qiov_offset += acb->cur_qiov.size; -    acb->cur_pos += acb->cur_qiov.size; -    qemu_iovec_reset(&acb->cur_qiov); - -    /* Complete request */ -    if (acb->cur_pos >= acb->end_pos) { -        qed_aio_complete(acb, 0); -        return; -    } - -    /* Find next cluster and start I/O */ -    qed_find_cluster(s, &acb->request, -                      acb->cur_pos, acb->end_pos - acb->cur_pos, -                      io_fn, acb); -} - -static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, -                                       int64_t sector_num, -                                       QEMUIOVector *qiov, int nb_sectors, -                                       BlockDriverCompletionFunc *cb, -                                       void *opaque, int flags) -{ -    QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque); - -    trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors, -                        opaque, flags); - -    acb->flags = flags; -    acb->finished = NULL; -    acb->qiov = qiov; -    acb->qiov_offset = 0; -    acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; -    acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; -    acb->request.l2_table = NULL; -    qemu_iovec_init(&acb->cur_qiov, qiov->niov); - -    /* Start request */ -    qed_aio_next_io(acb, 0); -    return &acb->common; -} - -static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs, -                                            int64_t sector_num, -                                            QEMUIOVector *qiov, int nb_sectors, -                                            BlockDriverCompletionFunc *cb, -                                            void *opaque) -{ -    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); -} - -static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs, -                                             int64_t sector_num, -                                             QEMUIOVector *qiov, int nb_sectors, -                                             BlockDriverCompletionFunc *cb, -                                             void *opaque) -{ -    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, -                         opaque, QED_AIOCB_WRITE); -} - -typedef struct { -    Coroutine *co; -    int ret; -    bool done; -} QEDWriteZeroesCB; - -static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret) -{ -    QEDWriteZeroesCB *cb = opaque; - -    cb->done = true; -    cb->ret = ret; -    if (cb->co) { -        qemu_coroutine_enter(cb->co, NULL); -    } -} - -static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, -                                                 int64_t sector_num, -                                                 int nb_sectors) -{ -    BlockDriverAIOCB *blockacb; -    BDRVQEDState *s = bs->opaque; -    QEDWriteZeroesCB cb = { .done = false }; -    QEMUIOVector qiov; -    struct iovec iov; - -    /* Refuse if there are untouched backing file sectors */ -    if (bs->backing_hd) { -        if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) { -            return -ENOTSUP; -        } -        if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) { -            return -ENOTSUP; -        } -    } - -    /* Zero writes start without an I/O buffer.  If a buffer becomes necessary -     * then it will be allocated during request processing. -     */ -    iov.iov_base = NULL, -    iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE, - -    qemu_iovec_init_external(&qiov, &iov, 1); -    blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors, -                             qed_co_write_zeroes_cb, &cb, -                             QED_AIOCB_WRITE | QED_AIOCB_ZERO); -    if (!blockacb) { -        return -EIO; -    } -    if (!cb.done) { -        cb.co = qemu_coroutine_self(); -        qemu_coroutine_yield(); -    } -    assert(cb.done); -    return cb.ret; -} - -static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset) -{ -    BDRVQEDState *s = bs->opaque; -    uint64_t old_image_size; -    int ret; - -    if (!qed_is_image_size_valid(offset, s->header.cluster_size, -                                 s->header.table_size)) { -        return -EINVAL; -    } - -    /* Shrinking is currently not supported */ -    if ((uint64_t)offset < s->header.image_size) { -        return -ENOTSUP; -    } - -    old_image_size = s->header.image_size; -    s->header.image_size = offset; -    ret = qed_write_header_sync(s); -    if (ret < 0) { -        s->header.image_size = old_image_size; -    } -    return ret; -} - -static int64_t bdrv_qed_getlength(BlockDriverState *bs) -{ -    BDRVQEDState *s = bs->opaque; -    return s->header.image_size; -} - -static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) -{ -    BDRVQEDState *s = bs->opaque; - -    memset(bdi, 0, sizeof(*bdi)); -    bdi->cluster_size = s->header.cluster_size; -    bdi->is_dirty = s->header.features & QED_F_NEED_CHECK; -    return 0; -} - -static int bdrv_qed_change_backing_file(BlockDriverState *bs, -                                        const char *backing_file, -                                        const char *backing_fmt) -{ -    BDRVQEDState *s = bs->opaque; -    QEDHeader new_header, le_header; -    void *buffer; -    size_t buffer_len, backing_file_len; -    int ret; - -    /* Refuse to set backing filename if unknown compat feature bits are -     * active.  If the image uses an unknown compat feature then we may not -     * know the layout of data following the header structure and cannot safely -     * add a new string. -     */ -    if (backing_file && (s->header.compat_features & -                         ~QED_COMPAT_FEATURE_MASK)) { -        return -ENOTSUP; -    } - -    memcpy(&new_header, &s->header, sizeof(new_header)); - -    new_header.features &= ~(QED_F_BACKING_FILE | -                             QED_F_BACKING_FORMAT_NO_PROBE); - -    /* Adjust feature flags */ -    if (backing_file) { -        new_header.features |= QED_F_BACKING_FILE; - -        if (qed_fmt_is_raw(backing_fmt)) { -            new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE; -        } -    } - -    /* Calculate new header size */ -    backing_file_len = 0; - -    if (backing_file) { -        backing_file_len = strlen(backing_file); -    } - -    buffer_len = sizeof(new_header); -    new_header.backing_filename_offset = buffer_len; -    new_header.backing_filename_size = backing_file_len; -    buffer_len += backing_file_len; - -    /* Make sure we can rewrite header without failing */ -    if (buffer_len > new_header.header_size * new_header.cluster_size) { -        return -ENOSPC; -    } - -    /* Prepare new header */ -    buffer = g_malloc(buffer_len); - -    qed_header_cpu_to_le(&new_header, &le_header); -    memcpy(buffer, &le_header, sizeof(le_header)); -    buffer_len = sizeof(le_header); - -    if (backing_file) { -        memcpy(buffer + buffer_len, backing_file, backing_file_len); -        buffer_len += backing_file_len; -    } - -    /* Write new header */ -    ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len); -    g_free(buffer); -    if (ret == 0) { -        memcpy(&s->header, &new_header, sizeof(new_header)); -    } -    return ret; -} - -static void bdrv_qed_invalidate_cache(BlockDriverState *bs) -{ -    BDRVQEDState *s = bs->opaque; - -    bdrv_qed_close(bs); -    memset(s, 0, sizeof(BDRVQEDState)); -    bdrv_qed_open(bs, NULL, bs->open_flags); -} - -static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result, -                          BdrvCheckMode fix) -{ -    BDRVQEDState *s = bs->opaque; - -    return qed_check(s, result, !!fix); -} - -static QEMUOptionParameter qed_create_options[] = { -    { -        .name = BLOCK_OPT_SIZE, -        .type = OPT_SIZE, -        .help = "Virtual disk size (in bytes)" -    }, { -        .name = BLOCK_OPT_BACKING_FILE, -        .type = OPT_STRING, -        .help = "File name of a base image" -    }, { -        .name = BLOCK_OPT_BACKING_FMT, -        .type = OPT_STRING, -        .help = "Image format of the base image" -    }, { -        .name = BLOCK_OPT_CLUSTER_SIZE, -        .type = OPT_SIZE, -        .help = "Cluster size (in bytes)", -        .value = { .n = QED_DEFAULT_CLUSTER_SIZE }, -    }, { -        .name = BLOCK_OPT_TABLE_SIZE, -        .type = OPT_SIZE, -        .help = "L1/L2 table size (in clusters)" -    }, -    { /* end of list */ } -}; - -static BlockDriver bdrv_qed = { -    .format_name              = "qed", -    .instance_size            = sizeof(BDRVQEDState), -    .create_options           = qed_create_options, - -    .bdrv_probe               = bdrv_qed_probe, -    .bdrv_rebind              = bdrv_qed_rebind, -    .bdrv_open                = bdrv_qed_open, -    .bdrv_close               = bdrv_qed_close, -    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare, -    .bdrv_create              = bdrv_qed_create, -    .bdrv_has_zero_init       = bdrv_has_zero_init_1, -    .bdrv_co_is_allocated     = bdrv_qed_co_is_allocated, -    .bdrv_make_empty          = bdrv_qed_make_empty, -    .bdrv_aio_readv           = bdrv_qed_aio_readv, -    .bdrv_aio_writev          = bdrv_qed_aio_writev, -    .bdrv_co_write_zeroes     = bdrv_qed_co_write_zeroes, -    .bdrv_truncate            = bdrv_qed_truncate, -    .bdrv_getlength           = bdrv_qed_getlength, -    .bdrv_get_info            = bdrv_qed_get_info, -    .bdrv_change_backing_file = bdrv_qed_change_backing_file, -    .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache, -    .bdrv_check               = bdrv_qed_check, -}; - -static void bdrv_qed_init(void) -{ -    bdrv_register(&bdrv_qed); -} - -block_init(bdrv_qed_init); diff --git a/contrib/qemu/block/qed.h b/contrib/qemu/block/qed.h deleted file mode 100644 index 2b4ddedf313..00000000000 --- a/contrib/qemu/block/qed.h +++ /dev/null @@ -1,344 +0,0 @@ -/* - * QEMU Enhanced Disk Format - * - * Copyright IBM, Corp. 2010 - * - * Authors: - *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com> - *  Anthony Liguori   <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#ifndef BLOCK_QED_H -#define BLOCK_QED_H - -#include "block/block_int.h" - -/* The layout of a QED file is as follows: - * - * +--------+----------+----------+----------+-----+ - * | header | L1 table | cluster0 | cluster1 | ... | - * +--------+----------+----------+----------+-----+ - * - * There is a 2-level pagetable for cluster allocation: - * - *                     +----------+ - *                     | L1 table | - *                     +----------+ - *                ,------'  |  '------. - *           +----------+   |    +----------+ - *           | L2 table |  ...   | L2 table | - *           +----------+        +----------+ - *       ,------'  |  '------. - *  +----------+   |    +----------+ - *  |   Data   |  ...   |   Data   | - *  +----------+        +----------+ - * - * The L1 table is fixed size and always present.  L2 tables are allocated on - * demand.  The L1 table size determines the maximum possible image size; it - * can be influenced using the cluster_size and table_size values. - * - * All fields are little-endian on disk. - */ - -enum { -    QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24, - -    /* The image supports a backing file */ -    QED_F_BACKING_FILE = 0x01, - -    /* The image needs a consistency check before use */ -    QED_F_NEED_CHECK = 0x02, - -    /* The backing file format must not be probed, treat as raw image */ -    QED_F_BACKING_FORMAT_NO_PROBE = 0x04, - -    /* Feature bits must be used when the on-disk format changes */ -    QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */ -                       QED_F_NEED_CHECK | -                       QED_F_BACKING_FORMAT_NO_PROBE, -    QED_COMPAT_FEATURE_MASK = 0,            /* supported compat feature bits */ -    QED_AUTOCLEAR_FEATURE_MASK = 0,         /* supported autoclear feature bits */ - -    /* Data is stored in groups of sectors called clusters.  Cluster size must -     * be large to avoid keeping too much metadata.  I/O requests that have -     * sub-cluster size will require read-modify-write. -     */ -    QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */ -    QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024, -    QED_DEFAULT_CLUSTER_SIZE = 64 * 1024, - -    /* Allocated clusters are tracked using a 2-level pagetable.  Table size is -     * a multiple of clusters so large maximum image sizes can be supported -     * without jacking up the cluster size too much. -     */ -    QED_MIN_TABLE_SIZE = 1,        /* in clusters */ -    QED_MAX_TABLE_SIZE = 16, -    QED_DEFAULT_TABLE_SIZE = 4, - -    /* Delay to flush and clean image after last allocating write completes */ -    QED_NEED_CHECK_TIMEOUT = 5,    /* in seconds */ -}; - -typedef struct { -    uint32_t magic;                 /* QED\0 */ - -    uint32_t cluster_size;          /* in bytes */ -    uint32_t table_size;            /* for L1 and L2 tables, in clusters */ -    uint32_t header_size;           /* in clusters */ - -    uint64_t features;              /* format feature bits */ -    uint64_t compat_features;       /* compatible feature bits */ -    uint64_t autoclear_features;    /* self-resetting feature bits */ - -    uint64_t l1_table_offset;       /* in bytes */ -    uint64_t image_size;            /* total logical image size, in bytes */ - -    /* if (features & QED_F_BACKING_FILE) */ -    uint32_t backing_filename_offset; /* in bytes from start of header */ -    uint32_t backing_filename_size;   /* in bytes */ -} QEDHeader; - -typedef struct { -    uint64_t offsets[0];            /* in bytes */ -} QEDTable; - -/* The L2 cache is a simple write-through cache for L2 structures */ -typedef struct CachedL2Table { -    QEDTable *table; -    uint64_t offset;    /* offset=0 indicates an invalidate entry */ -    QTAILQ_ENTRY(CachedL2Table) node; -    int ref; -} CachedL2Table; - -typedef struct { -    QTAILQ_HEAD(, CachedL2Table) entries; -    unsigned int n_entries; -} L2TableCache; - -typedef struct QEDRequest { -    CachedL2Table *l2_table; -} QEDRequest; - -enum { -    QED_AIOCB_WRITE = 0x0001,       /* read or write? */ -    QED_AIOCB_ZERO  = 0x0002,       /* zero write, used with QED_AIOCB_WRITE */ -}; - -typedef struct QEDAIOCB { -    BlockDriverAIOCB common; -    QEMUBH *bh; -    int bh_ret;                     /* final return status for completion bh */ -    QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */ -    int flags;                      /* QED_AIOCB_* bits ORed together */ -    bool *finished;                 /* signal for cancel completion */ -    uint64_t end_pos;               /* request end on block device, in bytes */ - -    /* User scatter-gather list */ -    QEMUIOVector *qiov; -    size_t qiov_offset;             /* byte count already processed */ - -    /* Current cluster scatter-gather list */ -    QEMUIOVector cur_qiov; -    uint64_t cur_pos;               /* position on block device, in bytes */ -    uint64_t cur_cluster;           /* cluster offset in image file */ -    unsigned int cur_nclusters;     /* number of clusters being accessed */ -    int find_cluster_ret;           /* used for L1/L2 update */ - -    QEDRequest request; -} QEDAIOCB; - -typedef struct { -    BlockDriverState *bs;           /* device */ -    uint64_t file_size;             /* length of image file, in bytes */ - -    QEDHeader header;               /* always cpu-endian */ -    QEDTable *l1_table; -    L2TableCache l2_cache;          /* l2 table cache */ -    uint32_t table_nelems; -    uint32_t l1_shift; -    uint32_t l2_shift; -    uint32_t l2_mask; - -    /* Allocating write request queue */ -    QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs; -    bool allocating_write_reqs_plugged; - -    /* Periodic flush and clear need check flag */ -    QEMUTimer *need_check_timer; -} BDRVQEDState; - -enum { -    QED_CLUSTER_FOUND,         /* cluster found */ -    QED_CLUSTER_ZERO,          /* zero cluster found */ -    QED_CLUSTER_L2,            /* cluster missing in L2 */ -    QED_CLUSTER_L1,            /* cluster missing in L1 */ -}; - -/** - * qed_find_cluster() completion callback - * - * @opaque:     User data for completion callback - * @ret:        QED_CLUSTER_FOUND   Success - *              QED_CLUSTER_L2      Data cluster unallocated in L2 - *              QED_CLUSTER_L1      L2 unallocated in L1 - *              -errno              POSIX error occurred - * @offset:     Data cluster offset - * @len:        Contiguous bytes starting from cluster offset - * - * This function is invoked when qed_find_cluster() completes. - * - * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range - * in the image file. - * - * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1 - * table offset, respectively.  len is number of contiguous unallocated bytes. - */ -typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len); - -/** - * Generic callback for chaining async callbacks - */ -typedef struct { -    BlockDriverCompletionFunc *cb; -    void *opaque; -} GenericCB; - -void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque); -void gencb_complete(void *opaque, int ret); - -/** - * Header functions - */ -int qed_write_header_sync(BDRVQEDState *s); - -/** - * L2 cache functions - */ -void qed_init_l2_cache(L2TableCache *l2_cache); -void qed_free_l2_cache(L2TableCache *l2_cache); -CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache); -void qed_unref_l2_cache_entry(CachedL2Table *entry); -CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset); -void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table); - -/** - * Table I/O functions - */ -int qed_read_l1_table_sync(BDRVQEDState *s); -void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n, -                        BlockDriverCompletionFunc *cb, void *opaque); -int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index, -                            unsigned int n); -int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, -                           uint64_t offset); -void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset, -                       BlockDriverCompletionFunc *cb, void *opaque); -void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request, -                        unsigned int index, unsigned int n, bool flush, -                        BlockDriverCompletionFunc *cb, void *opaque); -int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request, -                            unsigned int index, unsigned int n, bool flush); - -/** - * Cluster functions - */ -void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos, -                      size_t len, QEDFindClusterFunc *cb, void *opaque); - -/** - * Consistency check - */ -int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix); - -QEDTable *qed_alloc_table(BDRVQEDState *s); - -/** - * Round down to the start of a cluster - */ -static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset) -{ -    return offset & ~(uint64_t)(s->header.cluster_size - 1); -} - -static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset) -{ -    return offset & (s->header.cluster_size - 1); -} - -static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes) -{ -    return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) / -           (s->header.cluster_size - 1); -} - -static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos) -{ -    return pos >> s->l1_shift; -} - -static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos) -{ -    return (pos >> s->l2_shift) & s->l2_mask; -} - -/** - * Test if a cluster offset is valid - */ -static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset) -{ -    uint64_t header_size = (uint64_t)s->header.header_size * -                           s->header.cluster_size; - -    if (offset & (s->header.cluster_size - 1)) { -        return false; -    } -    return offset >= header_size && offset < s->file_size; -} - -/** - * Test if a table offset is valid - */ -static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset) -{ -    uint64_t end_offset = offset + (s->header.table_size - 1) * -                          s->header.cluster_size; - -    /* Overflow check */ -    if (end_offset <= offset) { -        return false; -    } - -    return qed_check_cluster_offset(s, offset) && -           qed_check_cluster_offset(s, end_offset); -} - -static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s, -                                                 uint64_t offset) -{ -    if (qed_offset_into_cluster(s, offset)) { -        return false; -    } -    return true; -} - -static inline bool qed_offset_is_unalloc_cluster(uint64_t offset) -{ -    if (offset == 0) { -        return true; -    } -    return false; -} - -static inline bool qed_offset_is_zero_cluster(uint64_t offset) -{ -    if (offset == 1) { -        return true; -    } -    return false; -} - -#endif /* BLOCK_QED_H */ diff --git a/contrib/qemu/block/snapshot.c b/contrib/qemu/block/snapshot.c deleted file mode 100644 index 6c6d9deea1f..00000000000 --- a/contrib/qemu/block/snapshot.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Block layer snapshot related functions - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "block/snapshot.h" -#include "block/block_int.h" - -int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, -                       const char *name) -{ -    QEMUSnapshotInfo *sn_tab, *sn; -    int nb_sns, i, ret; - -    ret = -ENOENT; -    nb_sns = bdrv_snapshot_list(bs, &sn_tab); -    if (nb_sns < 0) { -        return ret; -    } -    for (i = 0; i < nb_sns; i++) { -        sn = &sn_tab[i]; -        if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) { -            *sn_info = *sn; -            ret = 0; -            break; -        } -    } -    g_free(sn_tab); -    return ret; -} - -int bdrv_can_snapshot(BlockDriverState *bs) -{ -    BlockDriver *drv = bs->drv; -    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { -        return 0; -    } - -    if (!drv->bdrv_snapshot_create) { -        if (bs->file != NULL) { -            return bdrv_can_snapshot(bs->file); -        } -        return 0; -    } - -    return 1; -} - -int bdrv_snapshot_create(BlockDriverState *bs, -                         QEMUSnapshotInfo *sn_info) -{ -    BlockDriver *drv = bs->drv; -    if (!drv) { -        return -ENOMEDIUM; -    } -    if (drv->bdrv_snapshot_create) { -        return drv->bdrv_snapshot_create(bs, sn_info); -    } -    if (bs->file) { -        return bdrv_snapshot_create(bs->file, sn_info); -    } -    return -ENOTSUP; -} - -int bdrv_snapshot_goto(BlockDriverState *bs, -                       const char *snapshot_id) -{ -    BlockDriver *drv = bs->drv; -    int ret, open_ret; - -    if (!drv) { -        return -ENOMEDIUM; -    } -    if (drv->bdrv_snapshot_goto) { -        return drv->bdrv_snapshot_goto(bs, snapshot_id); -    } - -    if (bs->file) { -        drv->bdrv_close(bs); -        ret = bdrv_snapshot_goto(bs->file, snapshot_id); -        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags); -        if (open_ret < 0) { -            bdrv_delete(bs->file); -            bs->drv = NULL; -            return open_ret; -        } -        return ret; -    } - -    return -ENOTSUP; -} - -int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) -{ -    BlockDriver *drv = bs->drv; -    if (!drv) { -        return -ENOMEDIUM; -    } -    if (drv->bdrv_snapshot_delete) { -        return drv->bdrv_snapshot_delete(bs, snapshot_id); -    } -    if (bs->file) { -        return bdrv_snapshot_delete(bs->file, snapshot_id); -    } -    return -ENOTSUP; -} - -int bdrv_snapshot_list(BlockDriverState *bs, -                       QEMUSnapshotInfo **psn_info) -{ -    BlockDriver *drv = bs->drv; -    if (!drv) { -        return -ENOMEDIUM; -    } -    if (drv->bdrv_snapshot_list) { -        return drv->bdrv_snapshot_list(bs, psn_info); -    } -    if (bs->file) { -        return bdrv_snapshot_list(bs->file, psn_info); -    } -    return -ENOTSUP; -} - -int bdrv_snapshot_load_tmp(BlockDriverState *bs, -        const char *snapshot_name) -{ -    BlockDriver *drv = bs->drv; -    if (!drv) { -        return -ENOMEDIUM; -    } -    if (!bs->read_only) { -        return -EINVAL; -    } -    if (drv->bdrv_snapshot_load_tmp) { -        return drv->bdrv_snapshot_load_tmp(bs, snapshot_name); -    } -    return -ENOTSUP; -}  | 
