cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

block-backend.c (66689B)


      1/*
      2 * QEMU Block backends
      3 *
      4 * Copyright (C) 2014-2016 Red Hat, Inc.
      5 *
      6 * Authors:
      7 *  Markus Armbruster <armbru@redhat.com>,
      8 *
      9 * This work is licensed under the terms of the GNU LGPL, version 2.1
     10 * or later.  See the COPYING.LIB file in the top-level directory.
     11 */
     12
     13#include "qemu/osdep.h"
     14#include "sysemu/block-backend.h"
     15#include "block/block_int.h"
     16#include "block/blockjob.h"
     17#include "block/throttle-groups.h"
     18#include "hw/qdev-core.h"
     19#include "sysemu/blockdev.h"
     20#include "sysemu/runstate.h"
     21#include "sysemu/replay.h"
     22#include "qapi/error.h"
     23#include "qapi/qapi-events-block.h"
     24#include "qemu/id.h"
     25#include "qemu/main-loop.h"
     26#include "qemu/option.h"
     27#include "trace.h"
     28#include "migration/misc.h"
     29
     30/* Number of coroutines to reserve per attached device model */
     31#define COROUTINE_POOL_RESERVATION 64
     32
     33#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
     34
     35static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
     36
     37typedef struct BlockBackendAioNotifier {
     38    void (*attached_aio_context)(AioContext *new_context, void *opaque);
     39    void (*detach_aio_context)(void *opaque);
     40    void *opaque;
     41    QLIST_ENTRY(BlockBackendAioNotifier) list;
     42} BlockBackendAioNotifier;
     43
     44struct BlockBackend {
     45    char *name;
     46    int refcnt;
     47    BdrvChild *root;
     48    AioContext *ctx;
     49    DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
     50    QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
     51    QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
     52    BlockBackendPublic public;
     53
     54    DeviceState *dev;           /* attached device model, if any */
     55    const BlockDevOps *dev_ops;
     56    void *dev_opaque;
     57
     58    /* the block size for which the guest device expects atomicity */
     59    int guest_block_size;
     60
     61    /* If the BDS tree is removed, some of its options are stored here (which
     62     * can be used to restore those options in the new BDS on insert) */
     63    BlockBackendRootState root_state;
     64
     65    bool enable_write_cache;
     66
     67    /* I/O stats (display with "info blockstats"). */
     68    BlockAcctStats stats;
     69
     70    BlockdevOnError on_read_error, on_write_error;
     71    bool iostatus_enabled;
     72    BlockDeviceIoStatus iostatus;
     73
     74    uint64_t perm;
     75    uint64_t shared_perm;
     76    bool disable_perm;
     77
     78    bool allow_aio_context_change;
     79    bool allow_write_beyond_eof;
     80
     81    NotifierList remove_bs_notifiers, insert_bs_notifiers;
     82    QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
     83
     84    int quiesce_counter;
     85    CoQueue queued_requests;
     86    bool disable_request_queuing;
     87
     88    VMChangeStateEntry *vmsh;
     89    bool force_allow_inactivate;
     90
     91    /* Number of in-flight aio requests.  BlockDriverState also counts
     92     * in-flight requests but aio requests can exist even when blk->root is
     93     * NULL, so we cannot rely on its counter for that case.
     94     * Accessed with atomic ops.
     95     */
     96    unsigned int in_flight;
     97};
     98
     99typedef struct BlockBackendAIOCB {
    100    BlockAIOCB common;
    101    BlockBackend *blk;
    102    int ret;
    103} BlockBackendAIOCB;
    104
    105static const AIOCBInfo block_backend_aiocb_info = {
    106    .get_aio_context = blk_aiocb_get_aio_context,
    107    .aiocb_size = sizeof(BlockBackendAIOCB),
    108};
    109
    110static void drive_info_del(DriveInfo *dinfo);
    111static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
    112
    113/* All BlockBackends */
    114static QTAILQ_HEAD(, BlockBackend) block_backends =
    115    QTAILQ_HEAD_INITIALIZER(block_backends);
    116
    117/* All BlockBackends referenced by the monitor and which are iterated through by
    118 * blk_next() */
    119static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
    120    QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
    121
    122static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
    123                                     int *child_flags, QDict *child_options,
    124                                     int parent_flags, QDict *parent_options)
    125{
    126    /* We're not supposed to call this function for root nodes */
    127    abort();
    128}
    129static void blk_root_drained_begin(BdrvChild *child);
    130static bool blk_root_drained_poll(BdrvChild *child);
    131static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
    132
    133static void blk_root_change_media(BdrvChild *child, bool load);
    134static void blk_root_resize(BdrvChild *child);
    135
    136static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
    137                                     GSList **ignore, Error **errp);
    138static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
    139                                 GSList **ignore);
    140
    141static char *blk_root_get_parent_desc(BdrvChild *child)
    142{
    143    BlockBackend *blk = child->opaque;
    144    g_autofree char *dev_id = NULL;
    145
    146    if (blk->name) {
    147        return g_strdup_printf("block device '%s'", blk->name);
    148    }
    149
    150    dev_id = blk_get_attached_dev_id(blk);
    151    if (*dev_id) {
    152        return g_strdup_printf("block device '%s'", dev_id);
    153    } else {
    154        /* TODO Callback into the BB owner for something more detailed */
    155        return g_strdup("an unnamed block device");
    156    }
    157}
    158
    159static const char *blk_root_get_name(BdrvChild *child)
    160{
    161    return blk_name(child->opaque);
    162}
    163
    164static void blk_vm_state_changed(void *opaque, bool running, RunState state)
    165{
    166    Error *local_err = NULL;
    167    BlockBackend *blk = opaque;
    168
    169    if (state == RUN_STATE_INMIGRATE) {
    170        return;
    171    }
    172
    173    qemu_del_vm_change_state_handler(blk->vmsh);
    174    blk->vmsh = NULL;
    175    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
    176    if (local_err) {
    177        error_report_err(local_err);
    178    }
    179}
    180
    181/*
    182 * Notifies the user of the BlockBackend that migration has completed. qdev
    183 * devices can tighten their permissions in response (specifically revoke
    184 * shared write permissions that we needed for storage migration).
    185 *
    186 * If an error is returned, the VM cannot be allowed to be resumed.
    187 */
    188static void blk_root_activate(BdrvChild *child, Error **errp)
    189{
    190    BlockBackend *blk = child->opaque;
    191    Error *local_err = NULL;
    192
    193    if (!blk->disable_perm) {
    194        return;
    195    }
    196
    197    blk->disable_perm = false;
    198
    199    blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
    200    if (local_err) {
    201        error_propagate(errp, local_err);
    202        blk->disable_perm = true;
    203        return;
    204    }
    205
    206    if (runstate_check(RUN_STATE_INMIGRATE)) {
    207        /* Activation can happen when migration process is still active, for
    208         * example when nbd_server_add is called during non-shared storage
    209         * migration. Defer the shared_perm update to migration completion. */
    210        if (!blk->vmsh) {
    211            blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
    212                                                         blk);
    213        }
    214        return;
    215    }
    216
    217    blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
    218    if (local_err) {
    219        error_propagate(errp, local_err);
    220        blk->disable_perm = true;
    221        return;
    222    }
    223}
    224
    225void blk_set_force_allow_inactivate(BlockBackend *blk)
    226{
    227    blk->force_allow_inactivate = true;
    228}
    229
    230static bool blk_can_inactivate(BlockBackend *blk)
    231{
    232    /* If it is a guest device, inactivate is ok. */
    233    if (blk->dev || blk_name(blk)[0]) {
    234        return true;
    235    }
    236
    237    /* Inactivating means no more writes to the image can be done,
    238     * even if those writes would be changes invisible to the
    239     * guest.  For block job BBs that satisfy this, we can just allow
    240     * it.  This is the case for mirror job source, which is required
    241     * by libvirt non-shared block migration. */
    242    if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) {
    243        return true;
    244    }
    245
    246    return blk->force_allow_inactivate;
    247}
    248
    249static int blk_root_inactivate(BdrvChild *child)
    250{
    251    BlockBackend *blk = child->opaque;
    252
    253    if (blk->disable_perm) {
    254        return 0;
    255    }
    256
    257    if (!blk_can_inactivate(blk)) {
    258        return -EPERM;
    259    }
    260
    261    blk->disable_perm = true;
    262    if (blk->root) {
    263        bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
    264    }
    265
    266    return 0;
    267}
    268
    269static void blk_root_attach(BdrvChild *child)
    270{
    271    BlockBackend *blk = child->opaque;
    272    BlockBackendAioNotifier *notifier;
    273
    274    trace_blk_root_attach(child, blk, child->bs);
    275
    276    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
    277        bdrv_add_aio_context_notifier(child->bs,
    278                notifier->attached_aio_context,
    279                notifier->detach_aio_context,
    280                notifier->opaque);
    281    }
    282}
    283
    284static void blk_root_detach(BdrvChild *child)
    285{
    286    BlockBackend *blk = child->opaque;
    287    BlockBackendAioNotifier *notifier;
    288
    289    trace_blk_root_detach(child, blk, child->bs);
    290
    291    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
    292        bdrv_remove_aio_context_notifier(child->bs,
    293                notifier->attached_aio_context,
    294                notifier->detach_aio_context,
    295                notifier->opaque);
    296    }
    297}
    298
    299static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
    300{
    301    BlockBackend *blk = c->opaque;
    302
    303    return blk_get_aio_context(blk);
    304}
    305
    306static const BdrvChildClass child_root = {
    307    .inherit_options    = blk_root_inherit_options,
    308
    309    .change_media       = blk_root_change_media,
    310    .resize             = blk_root_resize,
    311    .get_name           = blk_root_get_name,
    312    .get_parent_desc    = blk_root_get_parent_desc,
    313
    314    .drained_begin      = blk_root_drained_begin,
    315    .drained_poll       = blk_root_drained_poll,
    316    .drained_end        = blk_root_drained_end,
    317
    318    .activate           = blk_root_activate,
    319    .inactivate         = blk_root_inactivate,
    320
    321    .attach             = blk_root_attach,
    322    .detach             = blk_root_detach,
    323
    324    .can_set_aio_ctx    = blk_root_can_set_aio_ctx,
    325    .set_aio_ctx        = blk_root_set_aio_ctx,
    326
    327    .get_parent_aio_context = blk_root_get_parent_aio_context,
    328};
    329
    330/*
    331 * Create a new BlockBackend with a reference count of one.
    332 *
    333 * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
    334 * to request for a block driver node that is attached to this BlockBackend.
    335 * @shared_perm is a bitmask which describes which permissions may be granted
    336 * to other users of the attached node.
    337 * Both sets of permissions can be changed later using blk_set_perm().
    338 *
    339 * Return the new BlockBackend on success, null on failure.
    340 */
    341BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
    342{
    343    BlockBackend *blk;
    344
    345    blk = g_new0(BlockBackend, 1);
    346    blk->refcnt = 1;
    347    blk->ctx = ctx;
    348    blk->perm = perm;
    349    blk->shared_perm = shared_perm;
    350    blk_set_enable_write_cache(blk, true);
    351
    352    blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
    353    blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
    354
    355    block_acct_init(&blk->stats);
    356
    357    qemu_co_queue_init(&blk->queued_requests);
    358    notifier_list_init(&blk->remove_bs_notifiers);
    359    notifier_list_init(&blk->insert_bs_notifiers);
    360    QLIST_INIT(&blk->aio_notifiers);
    361
    362    QTAILQ_INSERT_TAIL(&block_backends, blk, link);
    363    return blk;
    364}
    365
    366/*
    367 * Create a new BlockBackend connected to an existing BlockDriverState.
    368 *
    369 * @perm is a bitmasks of BLK_PERM_* constants which describes the
    370 * permissions to request for @bs that is attached to this
    371 * BlockBackend.  @shared_perm is a bitmask which describes which
    372 * permissions may be granted to other users of the attached node.
    373 * Both sets of permissions can be changed later using blk_set_perm().
    374 *
    375 * Return the new BlockBackend on success, null on failure.
    376 */
    377BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
    378                              uint64_t shared_perm, Error **errp)
    379{
    380    BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
    381
    382    if (blk_insert_bs(blk, bs, errp) < 0) {
    383        blk_unref(blk);
    384        return NULL;
    385    }
    386    return blk;
    387}
    388
    389/*
    390 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
    391 * The new BlockBackend is in the main AioContext.
    392 *
    393 * Just as with bdrv_open(), after having called this function the reference to
    394 * @options belongs to the block layer (even on failure).
    395 *
    396 * TODO: Remove @filename and @flags; it should be possible to specify a whole
    397 * BDS tree just by specifying the @options QDict (or @reference,
    398 * alternatively). At the time of adding this function, this is not possible,
    399 * though, so callers of this function have to be able to specify @filename and
    400 * @flags.
    401 */
    402BlockBackend *blk_new_open(const char *filename, const char *reference,
    403                           QDict *options, int flags, Error **errp)
    404{
    405    BlockBackend *blk;
    406    BlockDriverState *bs;
    407    uint64_t perm = 0;
    408    uint64_t shared = BLK_PERM_ALL;
    409
    410    /*
    411     * blk_new_open() is mainly used in .bdrv_create implementations and the
    412     * tools where sharing isn't a major concern because the BDS stays private
    413     * and the file is generally not supposed to be used by a second process,
    414     * so we just request permission according to the flags.
    415     *
    416     * The exceptions are xen_disk and blockdev_init(); in these cases, the
    417     * caller of blk_new_open() doesn't make use of the permissions, but they
    418     * shouldn't hurt either. We can still share everything here because the
    419     * guest devices will add their own blockers if they can't share.
    420     */
    421    if ((flags & BDRV_O_NO_IO) == 0) {
    422        perm |= BLK_PERM_CONSISTENT_READ;
    423        if (flags & BDRV_O_RDWR) {
    424            perm |= BLK_PERM_WRITE;
    425        }
    426    }
    427    if (flags & BDRV_O_RESIZE) {
    428        perm |= BLK_PERM_RESIZE;
    429    }
    430    if (flags & BDRV_O_NO_SHARE) {
    431        shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
    432    }
    433
    434    blk = blk_new(qemu_get_aio_context(), perm, shared);
    435    bs = bdrv_open(filename, reference, options, flags, errp);
    436    if (!bs) {
    437        blk_unref(blk);
    438        return NULL;
    439    }
    440
    441    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
    442                                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
    443                                       perm, shared, blk, errp);
    444    if (!blk->root) {
    445        blk_unref(blk);
    446        return NULL;
    447    }
    448
    449    return blk;
    450}
    451
    452static void blk_delete(BlockBackend *blk)
    453{
    454    assert(!blk->refcnt);
    455    assert(!blk->name);
    456    assert(!blk->dev);
    457    if (blk->public.throttle_group_member.throttle_state) {
    458        blk_io_limits_disable(blk);
    459    }
    460    if (blk->root) {
    461        blk_remove_bs(blk);
    462    }
    463    if (blk->vmsh) {
    464        qemu_del_vm_change_state_handler(blk->vmsh);
    465        blk->vmsh = NULL;
    466    }
    467    assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
    468    assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
    469    assert(QLIST_EMPTY(&blk->aio_notifiers));
    470    QTAILQ_REMOVE(&block_backends, blk, link);
    471    drive_info_del(blk->legacy_dinfo);
    472    block_acct_cleanup(&blk->stats);
    473    g_free(blk);
    474}
    475
    476static void drive_info_del(DriveInfo *dinfo)
    477{
    478    if (!dinfo) {
    479        return;
    480    }
    481    qemu_opts_del(dinfo->opts);
    482    g_free(dinfo);
    483}
    484
    485int blk_get_refcnt(BlockBackend *blk)
    486{
    487    return blk ? blk->refcnt : 0;
    488}
    489
    490/*
    491 * Increment @blk's reference count.
    492 * @blk must not be null.
    493 */
    494void blk_ref(BlockBackend *blk)
    495{
    496    assert(blk->refcnt > 0);
    497    blk->refcnt++;
    498}
    499
    500/*
    501 * Decrement @blk's reference count.
    502 * If this drops it to zero, destroy @blk.
    503 * For convenience, do nothing if @blk is null.
    504 */
    505void blk_unref(BlockBackend *blk)
    506{
    507    if (blk) {
    508        assert(blk->refcnt > 0);
    509        if (blk->refcnt > 1) {
    510            blk->refcnt--;
    511        } else {
    512            blk_drain(blk);
    513            /* blk_drain() cannot resurrect blk, nobody held a reference */
    514            assert(blk->refcnt == 1);
    515            blk->refcnt = 0;
    516            blk_delete(blk);
    517        }
    518    }
    519}
    520
    521/*
    522 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
    523 * ones which are hidden (i.e. are not referenced by the monitor).
    524 */
    525BlockBackend *blk_all_next(BlockBackend *blk)
    526{
    527    return blk ? QTAILQ_NEXT(blk, link)
    528               : QTAILQ_FIRST(&block_backends);
    529}
    530
    531void blk_remove_all_bs(void)
    532{
    533    BlockBackend *blk = NULL;
    534
    535    while ((blk = blk_all_next(blk)) != NULL) {
    536        AioContext *ctx = blk_get_aio_context(blk);
    537
    538        aio_context_acquire(ctx);
    539        if (blk->root) {
    540            blk_remove_bs(blk);
    541        }
    542        aio_context_release(ctx);
    543    }
    544}
    545
    546/*
    547 * Return the monitor-owned BlockBackend after @blk.
    548 * If @blk is null, return the first one.
    549 * Else, return @blk's next sibling, which may be null.
    550 *
    551 * To iterate over all BlockBackends, do
    552 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
    553 *     ...
    554 * }
    555 */
    556BlockBackend *blk_next(BlockBackend *blk)
    557{
    558    return blk ? QTAILQ_NEXT(blk, monitor_link)
    559               : QTAILQ_FIRST(&monitor_block_backends);
    560}
    561
    562/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
    563 * the monitor or attached to a BlockBackend */
    564BlockDriverState *bdrv_next(BdrvNextIterator *it)
    565{
    566    BlockDriverState *bs, *old_bs;
    567
    568    /* Must be called from the main loop */
    569    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
    570
    571    /* First, return all root nodes of BlockBackends. In order to avoid
    572     * returning a BDS twice when multiple BBs refer to it, we only return it
    573     * if the BB is the first one in the parent list of the BDS. */
    574    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
    575        BlockBackend *old_blk = it->blk;
    576
    577        old_bs = old_blk ? blk_bs(old_blk) : NULL;
    578
    579        do {
    580            it->blk = blk_all_next(it->blk);
    581            bs = it->blk ? blk_bs(it->blk) : NULL;
    582        } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
    583
    584        if (it->blk) {
    585            blk_ref(it->blk);
    586        }
    587        blk_unref(old_blk);
    588
    589        if (bs) {
    590            bdrv_ref(bs);
    591            bdrv_unref(old_bs);
    592            return bs;
    593        }
    594        it->phase = BDRV_NEXT_MONITOR_OWNED;
    595    } else {
    596        old_bs = it->bs;
    597    }
    598
    599    /* Then return the monitor-owned BDSes without a BB attached. Ignore all
    600     * BDSes that are attached to a BlockBackend here; they have been handled
    601     * by the above block already */
    602    do {
    603        it->bs = bdrv_next_monitor_owned(it->bs);
    604        bs = it->bs;
    605    } while (bs && bdrv_has_blk(bs));
    606
    607    if (bs) {
    608        bdrv_ref(bs);
    609    }
    610    bdrv_unref(old_bs);
    611
    612    return bs;
    613}
    614
    615static void bdrv_next_reset(BdrvNextIterator *it)
    616{
    617    *it = (BdrvNextIterator) {
    618        .phase = BDRV_NEXT_BACKEND_ROOTS,
    619    };
    620}
    621
    622BlockDriverState *bdrv_first(BdrvNextIterator *it)
    623{
    624    bdrv_next_reset(it);
    625    return bdrv_next(it);
    626}
    627
    628/* Must be called when aborting a bdrv_next() iteration before
    629 * bdrv_next() returns NULL */
    630void bdrv_next_cleanup(BdrvNextIterator *it)
    631{
    632    /* Must be called from the main loop */
    633    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
    634
    635    if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
    636        if (it->blk) {
    637            bdrv_unref(blk_bs(it->blk));
    638            blk_unref(it->blk);
    639        }
    640    } else {
    641        bdrv_unref(it->bs);
    642    }
    643
    644    bdrv_next_reset(it);
    645}
    646
    647/*
    648 * Add a BlockBackend into the list of backends referenced by the monitor, with
    649 * the given @name acting as the handle for the monitor.
    650 * Strictly for use by blockdev.c.
    651 *
    652 * @name must not be null or empty.
    653 *
    654 * Returns true on success and false on failure. In the latter case, an Error
    655 * object is returned through @errp.
    656 */
    657bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
    658{
    659    assert(!blk->name);
    660    assert(name && name[0]);
    661
    662    if (!id_wellformed(name)) {
    663        error_setg(errp, "Invalid device name");
    664        return false;
    665    }
    666    if (blk_by_name(name)) {
    667        error_setg(errp, "Device with id '%s' already exists", name);
    668        return false;
    669    }
    670    if (bdrv_find_node(name)) {
    671        error_setg(errp,
    672                   "Device name '%s' conflicts with an existing node name",
    673                   name);
    674        return false;
    675    }
    676
    677    blk->name = g_strdup(name);
    678    QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
    679    return true;
    680}
    681
    682/*
    683 * Remove a BlockBackend from the list of backends referenced by the monitor.
    684 * Strictly for use by blockdev.c.
    685 */
    686void monitor_remove_blk(BlockBackend *blk)
    687{
    688    if (!blk->name) {
    689        return;
    690    }
    691
    692    QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
    693    g_free(blk->name);
    694    blk->name = NULL;
    695}
    696
    697/*
    698 * Return @blk's name, a non-null string.
    699 * Returns an empty string iff @blk is not referenced by the monitor.
    700 */
    701const char *blk_name(const BlockBackend *blk)
    702{
    703    return blk->name ?: "";
    704}
    705
    706/*
    707 * Return the BlockBackend with name @name if it exists, else null.
    708 * @name must not be null.
    709 */
    710BlockBackend *blk_by_name(const char *name)
    711{
    712    BlockBackend *blk = NULL;
    713
    714    assert(name);
    715    while ((blk = blk_next(blk)) != NULL) {
    716        if (!strcmp(name, blk->name)) {
    717            return blk;
    718        }
    719    }
    720    return NULL;
    721}
    722
    723/*
    724 * Return the BlockDriverState attached to @blk if any, else null.
    725 */
    726BlockDriverState *blk_bs(BlockBackend *blk)
    727{
    728    return blk->root ? blk->root->bs : NULL;
    729}
    730
    731static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
    732{
    733    BdrvChild *child;
    734    QLIST_FOREACH(child, &bs->parents, next_parent) {
    735        if (child->klass == &child_root) {
    736            return child->opaque;
    737        }
    738    }
    739
    740    return NULL;
    741}
    742
    743/*
    744 * Returns true if @bs has an associated BlockBackend.
    745 */
    746bool bdrv_has_blk(BlockDriverState *bs)
    747{
    748    return bdrv_first_blk(bs) != NULL;
    749}
    750
    751/*
    752 * Returns true if @bs has only BlockBackends as parents.
    753 */
    754bool bdrv_is_root_node(BlockDriverState *bs)
    755{
    756    BdrvChild *c;
    757
    758    QLIST_FOREACH(c, &bs->parents, next_parent) {
    759        if (c->klass != &child_root) {
    760            return false;
    761        }
    762    }
    763
    764    return true;
    765}
    766
    767/*
    768 * Return @blk's DriveInfo if any, else null.
    769 */
    770DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
    771{
    772    return blk->legacy_dinfo;
    773}
    774
    775/*
    776 * Set @blk's DriveInfo to @dinfo, and return it.
    777 * @blk must not have a DriveInfo set already.
    778 * No other BlockBackend may have the same DriveInfo set.
    779 */
    780DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
    781{
    782    assert(!blk->legacy_dinfo);
    783    return blk->legacy_dinfo = dinfo;
    784}
    785
    786/*
    787 * Return the BlockBackend with DriveInfo @dinfo.
    788 * It must exist.
    789 */
    790BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
    791{
    792    BlockBackend *blk = NULL;
    793
    794    while ((blk = blk_next(blk)) != NULL) {
    795        if (blk->legacy_dinfo == dinfo) {
    796            return blk;
    797        }
    798    }
    799    abort();
    800}
    801
    802/*
    803 * Returns a pointer to the publicly accessible fields of @blk.
    804 */
    805BlockBackendPublic *blk_get_public(BlockBackend *blk)
    806{
    807    return &blk->public;
    808}
    809
    810/*
    811 * Returns a BlockBackend given the associated @public fields.
    812 */
    813BlockBackend *blk_by_public(BlockBackendPublic *public)
    814{
    815    return container_of(public, BlockBackend, public);
    816}
    817
    818/*
    819 * Disassociates the currently associated BlockDriverState from @blk.
    820 */
    821void blk_remove_bs(BlockBackend *blk)
    822{
    823    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
    824    BlockDriverState *bs;
    825    BdrvChild *root;
    826
    827    notifier_list_notify(&blk->remove_bs_notifiers, blk);
    828    if (tgm->throttle_state) {
    829        bs = blk_bs(blk);
    830        bdrv_drained_begin(bs);
    831        throttle_group_detach_aio_context(tgm);
    832        throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
    833        bdrv_drained_end(bs);
    834    }
    835
    836    blk_update_root_state(blk);
    837
    838    /* bdrv_root_unref_child() will cause blk->root to become stale and may
    839     * switch to a completion coroutine later on. Let's drain all I/O here
    840     * to avoid that and a potential QEMU crash.
    841     */
    842    blk_drain(blk);
    843    root = blk->root;
    844    blk->root = NULL;
    845    bdrv_root_unref_child(root);
    846}
    847
    848/*
    849 * Associates a new BlockDriverState with @blk.
    850 */
    851int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
    852{
    853    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
    854    bdrv_ref(bs);
    855    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
    856                                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
    857                                       blk->perm, blk->shared_perm,
    858                                       blk, errp);
    859    if (blk->root == NULL) {
    860        return -EPERM;
    861    }
    862
    863    notifier_list_notify(&blk->insert_bs_notifiers, blk);
    864    if (tgm->throttle_state) {
    865        throttle_group_detach_aio_context(tgm);
    866        throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
    867    }
    868
    869    return 0;
    870}
    871
    872/*
    873 * Change BlockDriverState associated with @blk.
    874 */
    875int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
    876{
    877    return bdrv_replace_child_bs(blk->root, new_bs, errp);
    878}
    879
    880/*
    881 * Sets the permission bitmasks that the user of the BlockBackend needs.
    882 */
    883int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
    884                 Error **errp)
    885{
    886    int ret;
    887
    888    if (blk->root && !blk->disable_perm) {
    889        ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
    890        if (ret < 0) {
    891            return ret;
    892        }
    893    }
    894
    895    blk->perm = perm;
    896    blk->shared_perm = shared_perm;
    897
    898    return 0;
    899}
    900
    901void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
    902{
    903    *perm = blk->perm;
    904    *shared_perm = blk->shared_perm;
    905}
    906
    907/*
    908 * Attach device model @dev to @blk.
    909 * Return 0 on success, -EBUSY when a device model is attached already.
    910 */
    911int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
    912{
    913    if (blk->dev) {
    914        return -EBUSY;
    915    }
    916
    917    /* While migration is still incoming, we don't need to apply the
    918     * permissions of guest device BlockBackends. We might still have a block
    919     * job or NBD server writing to the image for storage migration. */
    920    if (runstate_check(RUN_STATE_INMIGRATE)) {
    921        blk->disable_perm = true;
    922    }
    923
    924    blk_ref(blk);
    925    blk->dev = dev;
    926    blk_iostatus_reset(blk);
    927
    928    return 0;
    929}
    930
    931/*
    932 * Detach device model @dev from @blk.
    933 * @dev must be currently attached to @blk.
    934 */
    935void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
    936{
    937    assert(blk->dev == dev);
    938    blk->dev = NULL;
    939    blk->dev_ops = NULL;
    940    blk->dev_opaque = NULL;
    941    blk->guest_block_size = 512;
    942    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
    943    blk_unref(blk);
    944}
    945
    946/*
    947 * Return the device model attached to @blk if any, else null.
    948 */
    949DeviceState *blk_get_attached_dev(BlockBackend *blk)
    950{
    951    return blk->dev;
    952}
    953
    954/* Return the qdev ID, or if no ID is assigned the QOM path, of the block
    955 * device attached to the BlockBackend. */
    956char *blk_get_attached_dev_id(BlockBackend *blk)
    957{
    958    DeviceState *dev = blk->dev;
    959
    960    if (!dev) {
    961        return g_strdup("");
    962    } else if (dev->id) {
    963        return g_strdup(dev->id);
    964    }
    965
    966    return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
    967}
    968
    969/*
    970 * Return the BlockBackend which has the device model @dev attached if it
    971 * exists, else null.
    972 *
    973 * @dev must not be null.
    974 */
    975BlockBackend *blk_by_dev(void *dev)
    976{
    977    BlockBackend *blk = NULL;
    978
    979    assert(dev != NULL);
    980    while ((blk = blk_all_next(blk)) != NULL) {
    981        if (blk->dev == dev) {
    982            return blk;
    983        }
    984    }
    985    return NULL;
    986}
    987
    988/*
    989 * Set @blk's device model callbacks to @ops.
    990 * @opaque is the opaque argument to pass to the callbacks.
    991 * This is for use by device models.
    992 */
    993void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
    994                     void *opaque)
    995{
    996    blk->dev_ops = ops;
    997    blk->dev_opaque = opaque;
    998
    999    /* Are we currently quiesced? Should we enforce this right now? */
   1000    if (blk->quiesce_counter && ops->drained_begin) {
   1001        ops->drained_begin(opaque);
   1002    }
   1003}
   1004
   1005/*
   1006 * Notify @blk's attached device model of media change.
   1007 *
   1008 * If @load is true, notify of media load. This action can fail, meaning that
   1009 * the medium cannot be loaded. @errp is set then.
   1010 *
   1011 * If @load is false, notify of media eject. This can never fail.
   1012 *
   1013 * Also send DEVICE_TRAY_MOVED events as appropriate.
   1014 */
   1015void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
   1016{
   1017    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
   1018        bool tray_was_open, tray_is_open;
   1019        Error *local_err = NULL;
   1020
   1021        tray_was_open = blk_dev_is_tray_open(blk);
   1022        blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
   1023        if (local_err) {
   1024            assert(load == true);
   1025            error_propagate(errp, local_err);
   1026            return;
   1027        }
   1028        tray_is_open = blk_dev_is_tray_open(blk);
   1029
   1030        if (tray_was_open != tray_is_open) {
   1031            char *id = blk_get_attached_dev_id(blk);
   1032            qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open);
   1033            g_free(id);
   1034        }
   1035    }
   1036}
   1037
   1038static void blk_root_change_media(BdrvChild *child, bool load)
   1039{
   1040    blk_dev_change_media_cb(child->opaque, load, NULL);
   1041}
   1042
   1043/*
   1044 * Does @blk's attached device model have removable media?
   1045 * %true if no device model is attached.
   1046 */
   1047bool blk_dev_has_removable_media(BlockBackend *blk)
   1048{
   1049    return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
   1050}
   1051
   1052/*
   1053 * Does @blk's attached device model have a tray?
   1054 */
   1055bool blk_dev_has_tray(BlockBackend *blk)
   1056{
   1057    return blk->dev_ops && blk->dev_ops->is_tray_open;
   1058}
   1059
   1060/*
   1061 * Notify @blk's attached device model of a media eject request.
   1062 * If @force is true, the medium is about to be yanked out forcefully.
   1063 */
   1064void blk_dev_eject_request(BlockBackend *blk, bool force)
   1065{
   1066    if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
   1067        blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
   1068    }
   1069}
   1070
   1071/*
   1072 * Does @blk's attached device model have a tray, and is it open?
   1073 */
   1074bool blk_dev_is_tray_open(BlockBackend *blk)
   1075{
   1076    if (blk_dev_has_tray(blk)) {
   1077        return blk->dev_ops->is_tray_open(blk->dev_opaque);
   1078    }
   1079    return false;
   1080}
   1081
   1082/*
   1083 * Does @blk's attached device model have the medium locked?
   1084 * %false if the device model has no such lock.
   1085 */
   1086bool blk_dev_is_medium_locked(BlockBackend *blk)
   1087{
   1088    if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
   1089        return blk->dev_ops->is_medium_locked(blk->dev_opaque);
   1090    }
   1091    return false;
   1092}
   1093
   1094/*
   1095 * Notify @blk's attached device model of a backend size change.
   1096 */
   1097static void blk_root_resize(BdrvChild *child)
   1098{
   1099    BlockBackend *blk = child->opaque;
   1100
   1101    if (blk->dev_ops && blk->dev_ops->resize_cb) {
   1102        blk->dev_ops->resize_cb(blk->dev_opaque);
   1103    }
   1104}
   1105
   1106void blk_iostatus_enable(BlockBackend *blk)
   1107{
   1108    blk->iostatus_enabled = true;
   1109    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
   1110}
   1111
   1112/* The I/O status is only enabled if the drive explicitly
   1113 * enables it _and_ the VM is configured to stop on errors */
   1114bool blk_iostatus_is_enabled(const BlockBackend *blk)
   1115{
   1116    return (blk->iostatus_enabled &&
   1117           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
   1118            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
   1119            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
   1120}
   1121
   1122BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
   1123{
   1124    return blk->iostatus;
   1125}
   1126
   1127void blk_iostatus_disable(BlockBackend *blk)
   1128{
   1129    blk->iostatus_enabled = false;
   1130}
   1131
   1132void blk_iostatus_reset(BlockBackend *blk)
   1133{
   1134    if (blk_iostatus_is_enabled(blk)) {
   1135        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
   1136    }
   1137}
   1138
   1139void blk_iostatus_set_err(BlockBackend *blk, int error)
   1140{
   1141    assert(blk_iostatus_is_enabled(blk));
   1142    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
   1143        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
   1144                                          BLOCK_DEVICE_IO_STATUS_FAILED;
   1145    }
   1146}
   1147
   1148void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
   1149{
   1150    blk->allow_write_beyond_eof = allow;
   1151}
   1152
   1153void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
   1154{
   1155    blk->allow_aio_context_change = allow;
   1156}
   1157
   1158void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
   1159{
   1160    blk->disable_request_queuing = disable;
   1161}
   1162
   1163static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
   1164                                  size_t size)
   1165{
   1166    int64_t len;
   1167
   1168    if (size > INT_MAX) {
   1169        return -EIO;
   1170    }
   1171
   1172    if (!blk_is_available(blk)) {
   1173        return -ENOMEDIUM;
   1174    }
   1175
   1176    if (offset < 0) {
   1177        return -EIO;
   1178    }
   1179
   1180    if (!blk->allow_write_beyond_eof) {
   1181        len = blk_getlength(blk);
   1182        if (len < 0) {
   1183            return len;
   1184        }
   1185
   1186        if (offset > len || len - offset < size) {
   1187            return -EIO;
   1188        }
   1189    }
   1190
   1191    return 0;
   1192}
   1193
   1194/* To be called between exactly one pair of blk_inc/dec_in_flight() */
   1195static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
   1196{
   1197    assert(blk->in_flight > 0);
   1198
   1199    if (blk->quiesce_counter && !blk->disable_request_queuing) {
   1200        blk_dec_in_flight(blk);
   1201        qemu_co_queue_wait(&blk->queued_requests, NULL);
   1202        blk_inc_in_flight(blk);
   1203    }
   1204}
   1205
   1206/* To be called between exactly one pair of blk_inc/dec_in_flight() */
   1207static int coroutine_fn
   1208blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
   1209              QEMUIOVector *qiov, BdrvRequestFlags flags)
   1210{
   1211    int ret;
   1212    BlockDriverState *bs;
   1213
   1214    blk_wait_while_drained(blk);
   1215
   1216    /* Call blk_bs() only after waiting, the graph may have changed */
   1217    bs = blk_bs(blk);
   1218    trace_blk_co_preadv(blk, bs, offset, bytes, flags);
   1219
   1220    ret = blk_check_byte_request(blk, offset, bytes);
   1221    if (ret < 0) {
   1222        return ret;
   1223    }
   1224
   1225    bdrv_inc_in_flight(bs);
   1226
   1227    /* throttling disk I/O */
   1228    if (blk->public.throttle_group_member.throttle_state) {
   1229        throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
   1230                bytes, false);
   1231    }
   1232
   1233    ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
   1234    bdrv_dec_in_flight(bs);
   1235    return ret;
   1236}
   1237
   1238int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
   1239                               unsigned int bytes, QEMUIOVector *qiov,
   1240                               BdrvRequestFlags flags)
   1241{
   1242    int ret;
   1243
   1244    blk_inc_in_flight(blk);
   1245    ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
   1246    blk_dec_in_flight(blk);
   1247
   1248    return ret;
   1249}
   1250
   1251/* To be called between exactly one pair of blk_inc/dec_in_flight() */
   1252static int coroutine_fn
   1253blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
   1254                    QEMUIOVector *qiov, size_t qiov_offset,
   1255                    BdrvRequestFlags flags)
   1256{
   1257    int ret;
   1258    BlockDriverState *bs;
   1259
   1260    blk_wait_while_drained(blk);
   1261
   1262    /* Call blk_bs() only after waiting, the graph may have changed */
   1263    bs = blk_bs(blk);
   1264    trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
   1265
   1266    ret = blk_check_byte_request(blk, offset, bytes);
   1267    if (ret < 0) {
   1268        return ret;
   1269    }
   1270
   1271    bdrv_inc_in_flight(bs);
   1272    /* throttling disk I/O */
   1273    if (blk->public.throttle_group_member.throttle_state) {
   1274        throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
   1275                bytes, true);
   1276    }
   1277
   1278    if (!blk->enable_write_cache) {
   1279        flags |= BDRV_REQ_FUA;
   1280    }
   1281
   1282    ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset,
   1283                               flags);
   1284    bdrv_dec_in_flight(bs);
   1285    return ret;
   1286}
   1287
   1288int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
   1289                                     unsigned int bytes,
   1290                                     QEMUIOVector *qiov, size_t qiov_offset,
   1291                                     BdrvRequestFlags flags)
   1292{
   1293    int ret;
   1294
   1295    blk_inc_in_flight(blk);
   1296    ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
   1297    blk_dec_in_flight(blk);
   1298
   1299    return ret;
   1300}
   1301
   1302int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
   1303                                unsigned int bytes, QEMUIOVector *qiov,
   1304                                BdrvRequestFlags flags)
   1305{
   1306    return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
   1307}
   1308
   1309typedef struct BlkRwCo {
   1310    BlockBackend *blk;
   1311    int64_t offset;
   1312    void *iobuf;
   1313    int ret;
   1314    BdrvRequestFlags flags;
   1315} BlkRwCo;
   1316
   1317static void blk_read_entry(void *opaque)
   1318{
   1319    BlkRwCo *rwco = opaque;
   1320    QEMUIOVector *qiov = rwco->iobuf;
   1321
   1322    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
   1323                              qiov, rwco->flags);
   1324    aio_wait_kick();
   1325}
   1326
   1327static void blk_write_entry(void *opaque)
   1328{
   1329    BlkRwCo *rwco = opaque;
   1330    QEMUIOVector *qiov = rwco->iobuf;
   1331
   1332    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
   1333                                    qiov, 0, rwco->flags);
   1334    aio_wait_kick();
   1335}
   1336
   1337static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
   1338                   int64_t bytes, CoroutineEntry co_entry,
   1339                   BdrvRequestFlags flags)
   1340{
   1341    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
   1342    BlkRwCo rwco = {
   1343        .blk    = blk,
   1344        .offset = offset,
   1345        .iobuf  = &qiov,
   1346        .flags  = flags,
   1347        .ret    = NOT_DONE,
   1348    };
   1349
   1350    blk_inc_in_flight(blk);
   1351    if (qemu_in_coroutine()) {
   1352        /* Fast-path if already in coroutine context */
   1353        co_entry(&rwco);
   1354    } else {
   1355        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
   1356        bdrv_coroutine_enter(blk_bs(blk), co);
   1357        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
   1358    }
   1359    blk_dec_in_flight(blk);
   1360
   1361    return rwco.ret;
   1362}
   1363
   1364int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
   1365                      int bytes, BdrvRequestFlags flags)
   1366{
   1367    return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
   1368                   flags | BDRV_REQ_ZERO_WRITE);
   1369}
   1370
   1371int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
   1372{
   1373    return bdrv_make_zero(blk->root, flags);
   1374}
   1375
   1376void blk_inc_in_flight(BlockBackend *blk)
   1377{
   1378    qatomic_inc(&blk->in_flight);
   1379}
   1380
   1381void blk_dec_in_flight(BlockBackend *blk)
   1382{
   1383    qatomic_dec(&blk->in_flight);
   1384    aio_wait_kick();
   1385}
   1386
   1387static void error_callback_bh(void *opaque)
   1388{
   1389    struct BlockBackendAIOCB *acb = opaque;
   1390
   1391    blk_dec_in_flight(acb->blk);
   1392    acb->common.cb(acb->common.opaque, acb->ret);
   1393    qemu_aio_unref(acb);
   1394}
   1395
   1396BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
   1397                                  BlockCompletionFunc *cb,
   1398                                  void *opaque, int ret)
   1399{
   1400    struct BlockBackendAIOCB *acb;
   1401
   1402    blk_inc_in_flight(blk);
   1403    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
   1404    acb->blk = blk;
   1405    acb->ret = ret;
   1406
   1407    replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
   1408                                     error_callback_bh, acb);
   1409    return &acb->common;
   1410}
   1411
   1412typedef struct BlkAioEmAIOCB {
   1413    BlockAIOCB common;
   1414    BlkRwCo rwco;
   1415    int bytes;
   1416    bool has_returned;
   1417} BlkAioEmAIOCB;
   1418
   1419static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
   1420{
   1421    BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);
   1422
   1423    return blk_get_aio_context(acb->rwco.blk);
   1424}
   1425
   1426static const AIOCBInfo blk_aio_em_aiocb_info = {
   1427    .aiocb_size         = sizeof(BlkAioEmAIOCB),
   1428    .get_aio_context    = blk_aio_em_aiocb_get_aio_context,
   1429};
   1430
   1431static void blk_aio_complete(BlkAioEmAIOCB *acb)
   1432{
   1433    if (acb->has_returned) {
   1434        acb->common.cb(acb->common.opaque, acb->rwco.ret);
   1435        blk_dec_in_flight(acb->rwco.blk);
   1436        qemu_aio_unref(acb);
   1437    }
   1438}
   1439
   1440static void blk_aio_complete_bh(void *opaque)
   1441{
   1442    BlkAioEmAIOCB *acb = opaque;
   1443    assert(acb->has_returned);
   1444    blk_aio_complete(acb);
   1445}
   1446
   1447static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
   1448                                void *iobuf, CoroutineEntry co_entry,
   1449                                BdrvRequestFlags flags,
   1450                                BlockCompletionFunc *cb, void *opaque)
   1451{
   1452    BlkAioEmAIOCB *acb;
   1453    Coroutine *co;
   1454
   1455    blk_inc_in_flight(blk);
   1456    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
   1457    acb->rwco = (BlkRwCo) {
   1458        .blk    = blk,
   1459        .offset = offset,
   1460        .iobuf  = iobuf,
   1461        .flags  = flags,
   1462        .ret    = NOT_DONE,
   1463    };
   1464    acb->bytes = bytes;
   1465    acb->has_returned = false;
   1466
   1467    co = qemu_coroutine_create(co_entry, acb);
   1468    bdrv_coroutine_enter(blk_bs(blk), co);
   1469
   1470    acb->has_returned = true;
   1471    if (acb->rwco.ret != NOT_DONE) {
   1472        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
   1473                                         blk_aio_complete_bh, acb);
   1474    }
   1475
   1476    return &acb->common;
   1477}
   1478
   1479static void blk_aio_read_entry(void *opaque)
   1480{
   1481    BlkAioEmAIOCB *acb = opaque;
   1482    BlkRwCo *rwco = &acb->rwco;
   1483    QEMUIOVector *qiov = rwco->iobuf;
   1484
   1485    assert(qiov->size == acb->bytes);
   1486    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
   1487                              qiov, rwco->flags);
   1488    blk_aio_complete(acb);
   1489}
   1490
   1491static void blk_aio_write_entry(void *opaque)
   1492{
   1493    BlkAioEmAIOCB *acb = opaque;
   1494    BlkRwCo *rwco = &acb->rwco;
   1495    QEMUIOVector *qiov = rwco->iobuf;
   1496
   1497    assert(!qiov || qiov->size == acb->bytes);
   1498    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
   1499                                    qiov, 0, rwco->flags);
   1500    blk_aio_complete(acb);
   1501}
   1502
   1503BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
   1504                                  int count, BdrvRequestFlags flags,
   1505                                  BlockCompletionFunc *cb, void *opaque)
   1506{
   1507    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
   1508                        flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
   1509}
   1510
   1511int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
   1512{
   1513    int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
   1514    if (ret < 0) {
   1515        return ret;
   1516    }
   1517    return count;
   1518}
   1519
   1520int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
   1521               BdrvRequestFlags flags)
   1522{
   1523    int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
   1524                      flags);
   1525    if (ret < 0) {
   1526        return ret;
   1527    }
   1528    return count;
   1529}
   1530
   1531int64_t blk_getlength(BlockBackend *blk)
   1532{
   1533    if (!blk_is_available(blk)) {
   1534        return -ENOMEDIUM;
   1535    }
   1536
   1537    return bdrv_getlength(blk_bs(blk));
   1538}
   1539
   1540void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
   1541{
   1542    if (!blk_bs(blk)) {
   1543        *nb_sectors_ptr = 0;
   1544    } else {
   1545        bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
   1546    }
   1547}
   1548
   1549int64_t blk_nb_sectors(BlockBackend *blk)
   1550{
   1551    if (!blk_is_available(blk)) {
   1552        return -ENOMEDIUM;
   1553    }
   1554
   1555    return bdrv_nb_sectors(blk_bs(blk));
   1556}
   1557
   1558BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
   1559                           QEMUIOVector *qiov, BdrvRequestFlags flags,
   1560                           BlockCompletionFunc *cb, void *opaque)
   1561{
   1562    return blk_aio_prwv(blk, offset, qiov->size, qiov,
   1563                        blk_aio_read_entry, flags, cb, opaque);
   1564}
   1565
   1566BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
   1567                            QEMUIOVector *qiov, BdrvRequestFlags flags,
   1568                            BlockCompletionFunc *cb, void *opaque)
   1569{
   1570    return blk_aio_prwv(blk, offset, qiov->size, qiov,
   1571                        blk_aio_write_entry, flags, cb, opaque);
   1572}
   1573
   1574void blk_aio_cancel(BlockAIOCB *acb)
   1575{
   1576    bdrv_aio_cancel(acb);
   1577}
   1578
   1579void blk_aio_cancel_async(BlockAIOCB *acb)
   1580{
   1581    bdrv_aio_cancel_async(acb);
   1582}
   1583
   1584/* To be called between exactly one pair of blk_inc/dec_in_flight() */
   1585static int coroutine_fn
   1586blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
   1587{
   1588    blk_wait_while_drained(blk);
   1589
   1590    if (!blk_is_available(blk)) {
   1591        return -ENOMEDIUM;
   1592    }
   1593
   1594    return bdrv_co_ioctl(blk_bs(blk), req, buf);
   1595}
   1596
   1597static void blk_ioctl_entry(void *opaque)
   1598{
   1599    BlkRwCo *rwco = opaque;
   1600    QEMUIOVector *qiov = rwco->iobuf;
   1601
   1602    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
   1603    aio_wait_kick();
   1604}
   1605
   1606int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
   1607{
   1608    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
   1609}
   1610
   1611static void blk_aio_ioctl_entry(void *opaque)
   1612{
   1613    BlkAioEmAIOCB *acb = opaque;
   1614    BlkRwCo *rwco = &acb->rwco;
   1615
   1616    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
   1617
   1618    blk_aio_complete(acb);
   1619}
   1620
   1621BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
   1622                          BlockCompletionFunc *cb, void *opaque)
   1623{
   1624    return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
   1625}
   1626
   1627/* To be called between exactly one pair of blk_inc/dec_in_flight() */
   1628static int coroutine_fn
   1629blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
   1630{
   1631    int ret;
   1632
   1633    blk_wait_while_drained(blk);
   1634
   1635    ret = blk_check_byte_request(blk, offset, bytes);
   1636    if (ret < 0) {
   1637        return ret;
   1638    }
   1639
   1640    return bdrv_co_pdiscard(blk->root, offset, bytes);
   1641}
   1642
   1643static void blk_aio_pdiscard_entry(void *opaque)
   1644{
   1645    BlkAioEmAIOCB *acb = opaque;
   1646    BlkRwCo *rwco = &acb->rwco;
   1647
   1648    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
   1649    blk_aio_complete(acb);
   1650}
   1651
   1652BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
   1653                             int64_t offset, int bytes,
   1654                             BlockCompletionFunc *cb, void *opaque)
   1655{
   1656    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
   1657                        cb, opaque);
   1658}
   1659
   1660int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
   1661{
   1662    int ret;
   1663
   1664    blk_inc_in_flight(blk);
   1665    ret = blk_do_pdiscard(blk, offset, bytes);
   1666    blk_dec_in_flight(blk);
   1667
   1668    return ret;
   1669}
   1670
   1671static void blk_pdiscard_entry(void *opaque)
   1672{
   1673    BlkRwCo *rwco = opaque;
   1674    QEMUIOVector *qiov = rwco->iobuf;
   1675
   1676    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
   1677    aio_wait_kick();
   1678}
   1679
   1680int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
   1681{
   1682    return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
   1683}
   1684
   1685/* To be called between exactly one pair of blk_inc/dec_in_flight() */
   1686static int coroutine_fn blk_do_flush(BlockBackend *blk)
   1687{
   1688    blk_wait_while_drained(blk);
   1689
   1690    if (!blk_is_available(blk)) {
   1691        return -ENOMEDIUM;
   1692    }
   1693
   1694    return bdrv_co_flush(blk_bs(blk));
   1695}
   1696
   1697static void blk_aio_flush_entry(void *opaque)
   1698{
   1699    BlkAioEmAIOCB *acb = opaque;
   1700    BlkRwCo *rwco = &acb->rwco;
   1701
   1702    rwco->ret = blk_do_flush(rwco->blk);
   1703    blk_aio_complete(acb);
   1704}
   1705
   1706BlockAIOCB *blk_aio_flush(BlockBackend *blk,
   1707                          BlockCompletionFunc *cb, void *opaque)
   1708{
   1709    return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
   1710}
   1711
   1712int coroutine_fn blk_co_flush(BlockBackend *blk)
   1713{
   1714    int ret;
   1715
   1716    blk_inc_in_flight(blk);
   1717    ret = blk_do_flush(blk);
   1718    blk_dec_in_flight(blk);
   1719
   1720    return ret;
   1721}
   1722
   1723static void blk_flush_entry(void *opaque)
   1724{
   1725    BlkRwCo *rwco = opaque;
   1726    rwco->ret = blk_do_flush(rwco->blk);
   1727    aio_wait_kick();
   1728}
   1729
   1730int blk_flush(BlockBackend *blk)
   1731{
   1732    return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
   1733}
   1734
   1735void blk_drain(BlockBackend *blk)
   1736{
   1737    BlockDriverState *bs = blk_bs(blk);
   1738
   1739    if (bs) {
   1740        bdrv_drained_begin(bs);
   1741    }
   1742
   1743    /* We may have -ENOMEDIUM completions in flight */
   1744    AIO_WAIT_WHILE(blk_get_aio_context(blk),
   1745                   qatomic_mb_read(&blk->in_flight) > 0);
   1746
   1747    if (bs) {
   1748        bdrv_drained_end(bs);
   1749    }
   1750}
   1751
   1752void blk_drain_all(void)
   1753{
   1754    BlockBackend *blk = NULL;
   1755
   1756    bdrv_drain_all_begin();
   1757
   1758    while ((blk = blk_all_next(blk)) != NULL) {
   1759        AioContext *ctx = blk_get_aio_context(blk);
   1760
   1761        aio_context_acquire(ctx);
   1762
   1763        /* We may have -ENOMEDIUM completions in flight */
   1764        AIO_WAIT_WHILE(ctx, qatomic_mb_read(&blk->in_flight) > 0);
   1765
   1766        aio_context_release(ctx);
   1767    }
   1768
   1769    bdrv_drain_all_end();
   1770}
   1771
   1772void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
   1773                      BlockdevOnError on_write_error)
   1774{
   1775    blk->on_read_error = on_read_error;
   1776    blk->on_write_error = on_write_error;
   1777}
   1778
   1779BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
   1780{
   1781    return is_read ? blk->on_read_error : blk->on_write_error;
   1782}
   1783
   1784BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
   1785                                      int error)
   1786{
   1787    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
   1788
   1789    switch (on_err) {
   1790    case BLOCKDEV_ON_ERROR_ENOSPC:
   1791        return (error == ENOSPC) ?
   1792               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
   1793    case BLOCKDEV_ON_ERROR_STOP:
   1794        return BLOCK_ERROR_ACTION_STOP;
   1795    case BLOCKDEV_ON_ERROR_REPORT:
   1796        return BLOCK_ERROR_ACTION_REPORT;
   1797    case BLOCKDEV_ON_ERROR_IGNORE:
   1798        return BLOCK_ERROR_ACTION_IGNORE;
   1799    case BLOCKDEV_ON_ERROR_AUTO:
   1800    default:
   1801        abort();
   1802    }
   1803}
   1804
   1805static void send_qmp_error_event(BlockBackend *blk,
   1806                                 BlockErrorAction action,
   1807                                 bool is_read, int error)
   1808{
   1809    IoOperationType optype;
   1810    BlockDriverState *bs = blk_bs(blk);
   1811
   1812    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
   1813    qapi_event_send_block_io_error(blk_name(blk), !!bs,
   1814                                   bs ? bdrv_get_node_name(bs) : NULL, optype,
   1815                                   action, blk_iostatus_is_enabled(blk),
   1816                                   error == ENOSPC, strerror(error));
   1817}
   1818
   1819/* This is done by device models because, while the block layer knows
   1820 * about the error, it does not know whether an operation comes from
   1821 * the device or the block layer (from a job, for example).
   1822 */
   1823void blk_error_action(BlockBackend *blk, BlockErrorAction action,
   1824                      bool is_read, int error)
   1825{
   1826    assert(error >= 0);
   1827
   1828    if (action == BLOCK_ERROR_ACTION_STOP) {
   1829        /* First set the iostatus, so that "info block" returns an iostatus
   1830         * that matches the events raised so far (an additional error iostatus
   1831         * is fine, but not a lost one).
   1832         */
   1833        blk_iostatus_set_err(blk, error);
   1834
   1835        /* Then raise the request to stop the VM and the event.
   1836         * qemu_system_vmstop_request_prepare has two effects.  First,
   1837         * it ensures that the STOP event always comes after the
   1838         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
   1839         * can observe the STOP event and do a "cont" before the STOP
   1840         * event is issued, the VM will not stop.  In this case, vm_start()
   1841         * also ensures that the STOP/RESUME pair of events is emitted.
   1842         */
   1843        qemu_system_vmstop_request_prepare();
   1844        send_qmp_error_event(blk, action, is_read, error);
   1845        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
   1846    } else {
   1847        send_qmp_error_event(blk, action, is_read, error);
   1848    }
   1849}
   1850
   1851/*
   1852 * Returns true if the BlockBackend can support taking write permissions
   1853 * (because its root node is not read-only).
   1854 */
   1855bool blk_supports_write_perm(BlockBackend *blk)
   1856{
   1857    BlockDriverState *bs = blk_bs(blk);
   1858
   1859    if (bs) {
   1860        return !bdrv_is_read_only(bs);
   1861    } else {
   1862        return blk->root_state.open_flags & BDRV_O_RDWR;
   1863    }
   1864}
   1865
   1866/*
   1867 * Returns true if the BlockBackend can be written to in its current
   1868 * configuration (i.e. if write permission have been requested)
   1869 */
   1870bool blk_is_writable(BlockBackend *blk)
   1871{
   1872    return blk->perm & BLK_PERM_WRITE;
   1873}
   1874
   1875bool blk_is_sg(BlockBackend *blk)
   1876{
   1877    BlockDriverState *bs = blk_bs(blk);
   1878
   1879    if (!bs) {
   1880        return false;
   1881    }
   1882
   1883    return bdrv_is_sg(bs);
   1884}
   1885
   1886bool blk_enable_write_cache(BlockBackend *blk)
   1887{
   1888    return blk->enable_write_cache;
   1889}
   1890
   1891void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
   1892{
   1893    blk->enable_write_cache = wce;
   1894}
   1895
   1896void blk_invalidate_cache(BlockBackend *blk, Error **errp)
   1897{
   1898    BlockDriverState *bs = blk_bs(blk);
   1899
   1900    if (!bs) {
   1901        error_setg(errp, "Device '%s' has no medium", blk->name);
   1902        return;
   1903    }
   1904
   1905    bdrv_invalidate_cache(bs, errp);
   1906}
   1907
   1908bool blk_is_inserted(BlockBackend *blk)
   1909{
   1910    BlockDriverState *bs = blk_bs(blk);
   1911
   1912    return bs && bdrv_is_inserted(bs);
   1913}
   1914
   1915bool blk_is_available(BlockBackend *blk)
   1916{
   1917    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
   1918}
   1919
   1920void blk_lock_medium(BlockBackend *blk, bool locked)
   1921{
   1922    BlockDriverState *bs = blk_bs(blk);
   1923
   1924    if (bs) {
   1925        bdrv_lock_medium(bs, locked);
   1926    }
   1927}
   1928
   1929void blk_eject(BlockBackend *blk, bool eject_flag)
   1930{
   1931    BlockDriverState *bs = blk_bs(blk);
   1932    char *id;
   1933
   1934    if (bs) {
   1935        bdrv_eject(bs, eject_flag);
   1936    }
   1937
   1938    /* Whether or not we ejected on the backend,
   1939     * the frontend experienced a tray event. */
   1940    id = blk_get_attached_dev_id(blk);
   1941    qapi_event_send_device_tray_moved(blk_name(blk), id,
   1942                                      eject_flag);
   1943    g_free(id);
   1944}
   1945
   1946int blk_get_flags(BlockBackend *blk)
   1947{
   1948    BlockDriverState *bs = blk_bs(blk);
   1949
   1950    if (bs) {
   1951        return bdrv_get_flags(bs);
   1952    } else {
   1953        return blk->root_state.open_flags;
   1954    }
   1955}
   1956
   1957/* Returns the minimum request alignment, in bytes; guaranteed nonzero */
   1958uint32_t blk_get_request_alignment(BlockBackend *blk)
   1959{
   1960    BlockDriverState *bs = blk_bs(blk);
   1961    return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
   1962}
   1963
   1964/* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */
   1965uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
   1966{
   1967    BlockDriverState *bs = blk_bs(blk);
   1968    uint64_t max = INT_MAX;
   1969
   1970    if (bs) {
   1971        max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
   1972        max = MIN_NON_ZERO(max, bs->bl.max_transfer);
   1973    }
   1974    return ROUND_DOWN(max, blk_get_request_alignment(blk));
   1975}
   1976
   1977/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
   1978uint32_t blk_get_max_transfer(BlockBackend *blk)
   1979{
   1980    BlockDriverState *bs = blk_bs(blk);
   1981    uint32_t max = INT_MAX;
   1982
   1983    if (bs) {
   1984        max = MIN_NON_ZERO(max, bs->bl.max_transfer);
   1985    }
   1986    return ROUND_DOWN(max, blk_get_request_alignment(blk));
   1987}
   1988
   1989int blk_get_max_hw_iov(BlockBackend *blk)
   1990{
   1991    return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
   1992                        blk->root->bs->bl.max_iov);
   1993}
   1994
   1995int blk_get_max_iov(BlockBackend *blk)
   1996{
   1997    return blk->root->bs->bl.max_iov;
   1998}
   1999
   2000void blk_set_guest_block_size(BlockBackend *blk, int align)
   2001{
   2002    blk->guest_block_size = align;
   2003}
   2004
   2005void *blk_try_blockalign(BlockBackend *blk, size_t size)
   2006{
   2007    return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
   2008}
   2009
   2010void *blk_blockalign(BlockBackend *blk, size_t size)
   2011{
   2012    return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
   2013}
   2014
   2015bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
   2016{
   2017    BlockDriverState *bs = blk_bs(blk);
   2018
   2019    if (!bs) {
   2020        return false;
   2021    }
   2022
   2023    return bdrv_op_is_blocked(bs, op, errp);
   2024}
   2025
   2026void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
   2027{
   2028    BlockDriverState *bs = blk_bs(blk);
   2029
   2030    if (bs) {
   2031        bdrv_op_unblock(bs, op, reason);
   2032    }
   2033}
   2034
   2035void blk_op_block_all(BlockBackend *blk, Error *reason)
   2036{
   2037    BlockDriverState *bs = blk_bs(blk);
   2038
   2039    if (bs) {
   2040        bdrv_op_block_all(bs, reason);
   2041    }
   2042}
   2043
   2044void blk_op_unblock_all(BlockBackend *blk, Error *reason)
   2045{
   2046    BlockDriverState *bs = blk_bs(blk);
   2047
   2048    if (bs) {
   2049        bdrv_op_unblock_all(bs, reason);
   2050    }
   2051}
   2052
   2053AioContext *blk_get_aio_context(BlockBackend *blk)
   2054{
   2055    BlockDriverState *bs = blk_bs(blk);
   2056
   2057    if (bs) {
   2058        AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
   2059        assert(ctx == blk->ctx);
   2060    }
   2061
   2062    return blk->ctx;
   2063}
   2064
   2065static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
   2066{
   2067    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
   2068    return blk_get_aio_context(blk_acb->blk);
   2069}
   2070
   2071static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
   2072                                  bool update_root_node, Error **errp)
   2073{
   2074    BlockDriverState *bs = blk_bs(blk);
   2075    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
   2076    int ret;
   2077
   2078    if (bs) {
   2079        if (update_root_node) {
   2080            ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
   2081                                                 errp);
   2082            if (ret < 0) {
   2083                return ret;
   2084            }
   2085        }
   2086        if (tgm->throttle_state) {
   2087            bdrv_drained_begin(bs);
   2088            throttle_group_detach_aio_context(tgm);
   2089            throttle_group_attach_aio_context(tgm, new_context);
   2090            bdrv_drained_end(bs);
   2091        }
   2092    }
   2093
   2094    blk->ctx = new_context;
   2095    return 0;
   2096}
   2097
   2098int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
   2099                        Error **errp)
   2100{
   2101    return blk_do_set_aio_context(blk, new_context, true, errp);
   2102}
   2103
   2104static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
   2105                                     GSList **ignore, Error **errp)
   2106{
   2107    BlockBackend *blk = child->opaque;
   2108
   2109    if (blk->allow_aio_context_change) {
   2110        return true;
   2111    }
   2112
   2113    /* Only manually created BlockBackends that are not attached to anything
   2114     * can change their AioContext without updating their user. */
   2115    if (!blk->name || blk->dev) {
   2116        /* TODO Add BB name/QOM path */
   2117        error_setg(errp, "Cannot change iothread of active block backend");
   2118        return false;
   2119    }
   2120
   2121    return true;
   2122}
   2123
   2124static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
   2125                                 GSList **ignore)
   2126{
   2127    BlockBackend *blk = child->opaque;
   2128    blk_do_set_aio_context(blk, ctx, false, &error_abort);
   2129}
   2130
   2131void blk_add_aio_context_notifier(BlockBackend *blk,
   2132        void (*attached_aio_context)(AioContext *new_context, void *opaque),
   2133        void (*detach_aio_context)(void *opaque), void *opaque)
   2134{
   2135    BlockBackendAioNotifier *notifier;
   2136    BlockDriverState *bs = blk_bs(blk);
   2137
   2138    notifier = g_new(BlockBackendAioNotifier, 1);
   2139    notifier->attached_aio_context = attached_aio_context;
   2140    notifier->detach_aio_context = detach_aio_context;
   2141    notifier->opaque = opaque;
   2142    QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
   2143
   2144    if (bs) {
   2145        bdrv_add_aio_context_notifier(bs, attached_aio_context,
   2146                                      detach_aio_context, opaque);
   2147    }
   2148}
   2149
   2150void blk_remove_aio_context_notifier(BlockBackend *blk,
   2151                                     void (*attached_aio_context)(AioContext *,
   2152                                                                  void *),
   2153                                     void (*detach_aio_context)(void *),
   2154                                     void *opaque)
   2155{
   2156    BlockBackendAioNotifier *notifier;
   2157    BlockDriverState *bs = blk_bs(blk);
   2158
   2159    if (bs) {
   2160        bdrv_remove_aio_context_notifier(bs, attached_aio_context,
   2161                                         detach_aio_context, opaque);
   2162    }
   2163
   2164    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
   2165        if (notifier->attached_aio_context == attached_aio_context &&
   2166            notifier->detach_aio_context == detach_aio_context &&
   2167            notifier->opaque == opaque) {
   2168            QLIST_REMOVE(notifier, list);
   2169            g_free(notifier);
   2170            return;
   2171        }
   2172    }
   2173
   2174    abort();
   2175}
   2176
   2177void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
   2178{
   2179    notifier_list_add(&blk->remove_bs_notifiers, notify);
   2180}
   2181
   2182void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
   2183{
   2184    notifier_list_add(&blk->insert_bs_notifiers, notify);
   2185}
   2186
   2187void blk_io_plug(BlockBackend *blk)
   2188{
   2189    BlockDriverState *bs = blk_bs(blk);
   2190
   2191    if (bs) {
   2192        bdrv_io_plug(bs);
   2193    }
   2194}
   2195
   2196void blk_io_unplug(BlockBackend *blk)
   2197{
   2198    BlockDriverState *bs = blk_bs(blk);
   2199
   2200    if (bs) {
   2201        bdrv_io_unplug(bs);
   2202    }
   2203}
   2204
   2205BlockAcctStats *blk_get_stats(BlockBackend *blk)
   2206{
   2207    return &blk->stats;
   2208}
   2209
   2210void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
   2211                  BlockCompletionFunc *cb, void *opaque)
   2212{
   2213    return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
   2214}
   2215
   2216int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
   2217                                      int bytes, BdrvRequestFlags flags)
   2218{
   2219    return blk_co_pwritev(blk, offset, bytes, NULL,
   2220                          flags | BDRV_REQ_ZERO_WRITE);
   2221}
   2222
   2223int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
   2224                          int count)
   2225{
   2226    return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
   2227                   BDRV_REQ_WRITE_COMPRESSED);
   2228}
   2229
   2230int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
   2231                 PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
   2232{
   2233    if (!blk_is_available(blk)) {
   2234        error_setg(errp, "No medium inserted");
   2235        return -ENOMEDIUM;
   2236    }
   2237
   2238    return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
   2239}
   2240
   2241int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
   2242                     int64_t pos, int size)
   2243{
   2244    int ret;
   2245
   2246    if (!blk_is_available(blk)) {
   2247        return -ENOMEDIUM;
   2248    }
   2249
   2250    ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
   2251    if (ret < 0) {
   2252        return ret;
   2253    }
   2254
   2255    if (ret == size && !blk->enable_write_cache) {
   2256        ret = bdrv_flush(blk_bs(blk));
   2257    }
   2258
   2259    return ret < 0 ? ret : size;
   2260}
   2261
   2262int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
   2263{
   2264    if (!blk_is_available(blk)) {
   2265        return -ENOMEDIUM;
   2266    }
   2267
   2268    return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
   2269}
   2270
   2271int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
   2272{
   2273    if (!blk_is_available(blk)) {
   2274        return -ENOMEDIUM;
   2275    }
   2276
   2277    return bdrv_probe_blocksizes(blk_bs(blk), bsz);
   2278}
   2279
   2280int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
   2281{
   2282    if (!blk_is_available(blk)) {
   2283        return -ENOMEDIUM;
   2284    }
   2285
   2286    return bdrv_probe_geometry(blk_bs(blk), geo);
   2287}
   2288
   2289/*
   2290 * Updates the BlockBackendRootState object with data from the currently
   2291 * attached BlockDriverState.
   2292 */
   2293void blk_update_root_state(BlockBackend *blk)
   2294{
   2295    assert(blk->root);
   2296
   2297    blk->root_state.open_flags    = blk->root->bs->open_flags;
   2298    blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
   2299}
   2300
   2301/*
   2302 * Returns the detect-zeroes setting to be used for bdrv_open() of a
   2303 * BlockDriverState which is supposed to inherit the root state.
   2304 */
   2305bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
   2306{
   2307    return blk->root_state.detect_zeroes;
   2308}
   2309
   2310/*
   2311 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
   2312 * supposed to inherit the root state.
   2313 */
   2314int blk_get_open_flags_from_root_state(BlockBackend *blk)
   2315{
   2316    return blk->root_state.open_flags;
   2317}
   2318
   2319BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
   2320{
   2321    return &blk->root_state;
   2322}
   2323
   2324int blk_commit_all(void)
   2325{
   2326    BlockBackend *blk = NULL;
   2327
   2328    while ((blk = blk_all_next(blk)) != NULL) {
   2329        AioContext *aio_context = blk_get_aio_context(blk);
   2330        BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk));
   2331
   2332        aio_context_acquire(aio_context);
   2333        if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) {
   2334            int ret;
   2335
   2336            ret = bdrv_commit(unfiltered_bs);
   2337            if (ret < 0) {
   2338                aio_context_release(aio_context);
   2339                return ret;
   2340            }
   2341        }
   2342        aio_context_release(aio_context);
   2343    }
   2344    return 0;
   2345}
   2346
   2347
   2348/* throttling disk I/O limits */
   2349void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
   2350{
   2351    throttle_group_config(&blk->public.throttle_group_member, cfg);
   2352}
   2353
   2354void blk_io_limits_disable(BlockBackend *blk)
   2355{
   2356    BlockDriverState *bs = blk_bs(blk);
   2357    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
   2358    assert(tgm->throttle_state);
   2359    if (bs) {
   2360        bdrv_drained_begin(bs);
   2361    }
   2362    throttle_group_unregister_tgm(tgm);
   2363    if (bs) {
   2364        bdrv_drained_end(bs);
   2365    }
   2366}
   2367
   2368/* should be called before blk_set_io_limits if a limit is set */
   2369void blk_io_limits_enable(BlockBackend *blk, const char *group)
   2370{
   2371    assert(!blk->public.throttle_group_member.throttle_state);
   2372    throttle_group_register_tgm(&blk->public.throttle_group_member,
   2373                                group, blk_get_aio_context(blk));
   2374}
   2375
   2376void blk_io_limits_update_group(BlockBackend *blk, const char *group)
   2377{
   2378    /* this BB is not part of any group */
   2379    if (!blk->public.throttle_group_member.throttle_state) {
   2380        return;
   2381    }
   2382
   2383    /* this BB is a part of the same group than the one we want */
   2384    if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
   2385                group)) {
   2386        return;
   2387    }
   2388
   2389    /* need to change the group this bs belong to */
   2390    blk_io_limits_disable(blk);
   2391    blk_io_limits_enable(blk, group);
   2392}
   2393
   2394static void blk_root_drained_begin(BdrvChild *child)
   2395{
   2396    BlockBackend *blk = child->opaque;
   2397    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
   2398
   2399    if (++blk->quiesce_counter == 1) {
   2400        if (blk->dev_ops && blk->dev_ops->drained_begin) {
   2401            blk->dev_ops->drained_begin(blk->dev_opaque);
   2402        }
   2403    }
   2404
   2405    /* Note that blk->root may not be accessible here yet if we are just
   2406     * attaching to a BlockDriverState that is drained. Use child instead. */
   2407
   2408    if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
   2409        throttle_group_restart_tgm(tgm);
   2410    }
   2411}
   2412
   2413static bool blk_root_drained_poll(BdrvChild *child)
   2414{
   2415    BlockBackend *blk = child->opaque;
   2416    bool busy = false;
   2417    assert(blk->quiesce_counter);
   2418
   2419    if (blk->dev_ops && blk->dev_ops->drained_poll) {
   2420        busy = blk->dev_ops->drained_poll(blk->dev_opaque);
   2421    }
   2422    return busy || !!blk->in_flight;
   2423}
   2424
   2425static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
   2426{
   2427    BlockBackend *blk = child->opaque;
   2428    assert(blk->quiesce_counter);
   2429
   2430    assert(blk->public.throttle_group_member.io_limits_disabled);
   2431    qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
   2432
   2433    if (--blk->quiesce_counter == 0) {
   2434        if (blk->dev_ops && blk->dev_ops->drained_end) {
   2435            blk->dev_ops->drained_end(blk->dev_opaque);
   2436        }
   2437        while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
   2438            /* Resume all queued requests */
   2439        }
   2440    }
   2441}
   2442
   2443void blk_register_buf(BlockBackend *blk, void *host, size_t size)
   2444{
   2445    bdrv_register_buf(blk_bs(blk), host, size);
   2446}
   2447
   2448void blk_unregister_buf(BlockBackend *blk, void *host)
   2449{
   2450    bdrv_unregister_buf(blk_bs(blk), host);
   2451}
   2452
   2453int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
   2454                                   BlockBackend *blk_out, int64_t off_out,
   2455                                   int bytes, BdrvRequestFlags read_flags,
   2456                                   BdrvRequestFlags write_flags)
   2457{
   2458    int r;
   2459    r = blk_check_byte_request(blk_in, off_in, bytes);
   2460    if (r) {
   2461        return r;
   2462    }
   2463    r = blk_check_byte_request(blk_out, off_out, bytes);
   2464    if (r) {
   2465        return r;
   2466    }
   2467    return bdrv_co_copy_range(blk_in->root, off_in,
   2468                              blk_out->root, off_out,
   2469                              bytes, read_flags, write_flags);
   2470}
   2471
   2472const BdrvChild *blk_root(BlockBackend *blk)
   2473{
   2474    return blk->root;
   2475}
   2476
   2477int blk_make_empty(BlockBackend *blk, Error **errp)
   2478{
   2479    if (!blk_is_available(blk)) {
   2480        error_setg(errp, "No medium inserted");
   2481        return -ENOMEDIUM;
   2482    }
   2483
   2484    return bdrv_make_empty(blk->root, errp);
   2485}