cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

virtio.c (111805B)


      1/*
      2 * Virtio Support
      3 *
      4 * Copyright IBM, Corp. 2007
      5 *
      6 * Authors:
      7 *  Anthony Liguori   <aliguori@us.ibm.com>
      8 *
      9 * This work is licensed under the terms of the GNU GPL, version 2.  See
     10 * the COPYING file in the top-level directory.
     11 *
     12 */
     13
     14#include "qemu/osdep.h"
     15#include "qapi/error.h"
     16#include "cpu.h"
     17#include "trace.h"
     18#include "qemu/error-report.h"
     19#include "qemu/log.h"
     20#include "qemu/main-loop.h"
     21#include "qemu/module.h"
     22#include "hw/virtio/virtio.h"
     23#include "migration/qemu-file-types.h"
     24#include "qemu/atomic.h"
     25#include "hw/virtio/virtio-bus.h"
     26#include "hw/qdev-properties.h"
     27#include "hw/virtio/virtio-access.h"
     28#include "sysemu/dma.h"
     29#include "sysemu/runstate.h"
     30#include "standard-headers/linux/virtio_ids.h"
     31
     32/*
     33 * The alignment to use between consumer and producer parts of vring.
     34 * x86 pagesize again. This is the default, used by transports like PCI
     35 * which don't provide a means for the guest to tell the host the alignment.
     36 */
     37#define VIRTIO_PCI_VRING_ALIGN         4096
     38
     39typedef struct VRingDesc
     40{
     41    uint64_t addr;
     42    uint32_t len;
     43    uint16_t flags;
     44    uint16_t next;
     45} VRingDesc;
     46
     47typedef struct VRingPackedDesc {
     48    uint64_t addr;
     49    uint32_t len;
     50    uint16_t id;
     51    uint16_t flags;
     52} VRingPackedDesc;
     53
     54typedef struct VRingAvail
     55{
     56    uint16_t flags;
     57    uint16_t idx;
     58    uint16_t ring[];
     59} VRingAvail;
     60
     61typedef struct VRingUsedElem
     62{
     63    uint32_t id;
     64    uint32_t len;
     65} VRingUsedElem;
     66
     67typedef struct VRingUsed
     68{
     69    uint16_t flags;
     70    uint16_t idx;
     71    VRingUsedElem ring[];
     72} VRingUsed;
     73
     74typedef struct VRingMemoryRegionCaches {
     75    struct rcu_head rcu;
     76    MemoryRegionCache desc;
     77    MemoryRegionCache avail;
     78    MemoryRegionCache used;
     79} VRingMemoryRegionCaches;
     80
     81typedef struct VRing
     82{
     83    unsigned int num;
     84    unsigned int num_default;
     85    unsigned int align;
     86    hwaddr desc;
     87    hwaddr avail;
     88    hwaddr used;
     89    VRingMemoryRegionCaches *caches;
     90} VRing;
     91
     92typedef struct VRingPackedDescEvent {
     93    uint16_t off_wrap;
     94    uint16_t flags;
     95} VRingPackedDescEvent ;
     96
     97struct VirtQueue
     98{
     99    VRing vring;
    100    VirtQueueElement *used_elems;
    101
    102    /* Next head to pop */
    103    uint16_t last_avail_idx;
    104    bool last_avail_wrap_counter;
    105
    106    /* Last avail_idx read from VQ. */
    107    uint16_t shadow_avail_idx;
    108    bool shadow_avail_wrap_counter;
    109
    110    uint16_t used_idx;
    111    bool used_wrap_counter;
    112
    113    /* Last used index value we have signalled on */
    114    uint16_t signalled_used;
    115
    116    /* Last used index value we have signalled on */
    117    bool signalled_used_valid;
    118
    119    /* Notification enabled? */
    120    bool notification;
    121
    122    uint16_t queue_index;
    123
    124    unsigned int inuse;
    125
    126    uint16_t vector;
    127    VirtIOHandleOutput handle_output;
    128    VirtIOHandleAIOOutput handle_aio_output;
    129    VirtIODevice *vdev;
    130    EventNotifier guest_notifier;
    131    EventNotifier host_notifier;
    132    bool host_notifier_enabled;
    133    QLIST_ENTRY(VirtQueue) node;
    134};
    135
    136/* Called within call_rcu().  */
    137static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
    138{
    139    assert(caches != NULL);
    140    address_space_cache_destroy(&caches->desc);
    141    address_space_cache_destroy(&caches->avail);
    142    address_space_cache_destroy(&caches->used);
    143    g_free(caches);
    144}
    145
    146static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
    147{
    148    VRingMemoryRegionCaches *caches;
    149
    150    caches = qatomic_read(&vq->vring.caches);
    151    qatomic_rcu_set(&vq->vring.caches, NULL);
    152    if (caches) {
    153        call_rcu(caches, virtio_free_region_cache, rcu);
    154    }
    155}
    156
    157static void virtio_init_region_cache(VirtIODevice *vdev, int n)
    158{
    159    VirtQueue *vq = &vdev->vq[n];
    160    VRingMemoryRegionCaches *old = vq->vring.caches;
    161    VRingMemoryRegionCaches *new = NULL;
    162    hwaddr addr, size;
    163    int64_t len;
    164    bool packed;
    165
    166
    167    addr = vq->vring.desc;
    168    if (!addr) {
    169        goto out_no_cache;
    170    }
    171    new = g_new0(VRingMemoryRegionCaches, 1);
    172    size = virtio_queue_get_desc_size(vdev, n);
    173    packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
    174                                   true : false;
    175    len = address_space_cache_init(&new->desc, vdev->dma_as,
    176                                   addr, size, packed);
    177    if (len < size) {
    178        virtio_error(vdev, "Cannot map desc");
    179        goto err_desc;
    180    }
    181
    182    size = virtio_queue_get_used_size(vdev, n);
    183    len = address_space_cache_init(&new->used, vdev->dma_as,
    184                                   vq->vring.used, size, true);
    185    if (len < size) {
    186        virtio_error(vdev, "Cannot map used");
    187        goto err_used;
    188    }
    189
    190    size = virtio_queue_get_avail_size(vdev, n);
    191    len = address_space_cache_init(&new->avail, vdev->dma_as,
    192                                   vq->vring.avail, size, false);
    193    if (len < size) {
    194        virtio_error(vdev, "Cannot map avail");
    195        goto err_avail;
    196    }
    197
    198    qatomic_rcu_set(&vq->vring.caches, new);
    199    if (old) {
    200        call_rcu(old, virtio_free_region_cache, rcu);
    201    }
    202    return;
    203
    204err_avail:
    205    address_space_cache_destroy(&new->avail);
    206err_used:
    207    address_space_cache_destroy(&new->used);
    208err_desc:
    209    address_space_cache_destroy(&new->desc);
    210out_no_cache:
    211    g_free(new);
    212    virtio_virtqueue_reset_region_cache(vq);
    213}
    214
    215/* virt queue functions */
    216void virtio_queue_update_rings(VirtIODevice *vdev, int n)
    217{
    218    VRing *vring = &vdev->vq[n].vring;
    219
    220    if (!vring->num || !vring->desc || !vring->align) {
    221        /* not yet setup -> nothing to do */
    222        return;
    223    }
    224    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
    225    vring->used = vring_align(vring->avail +
    226                              offsetof(VRingAvail, ring[vring->num]),
    227                              vring->align);
    228    virtio_init_region_cache(vdev, n);
    229}
    230
    231/* Called within rcu_read_lock().  */
    232static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
    233                                  MemoryRegionCache *cache, int i)
    234{
    235    address_space_read_cached(cache, i * sizeof(VRingDesc),
    236                              desc, sizeof(VRingDesc));
    237    virtio_tswap64s(vdev, &desc->addr);
    238    virtio_tswap32s(vdev, &desc->len);
    239    virtio_tswap16s(vdev, &desc->flags);
    240    virtio_tswap16s(vdev, &desc->next);
    241}
    242
    243static void vring_packed_event_read(VirtIODevice *vdev,
    244                                    MemoryRegionCache *cache,
    245                                    VRingPackedDescEvent *e)
    246{
    247    hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
    248    hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
    249
    250    address_space_read_cached(cache, off_flags, &e->flags,
    251                              sizeof(e->flags));
    252    /* Make sure flags is seen before off_wrap */
    253    smp_rmb();
    254    address_space_read_cached(cache, off_off, &e->off_wrap,
    255                              sizeof(e->off_wrap));
    256    virtio_tswap16s(vdev, &e->off_wrap);
    257    virtio_tswap16s(vdev, &e->flags);
    258}
    259
    260static void vring_packed_off_wrap_write(VirtIODevice *vdev,
    261                                        MemoryRegionCache *cache,
    262                                        uint16_t off_wrap)
    263{
    264    hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
    265
    266    virtio_tswap16s(vdev, &off_wrap);
    267    address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
    268    address_space_cache_invalidate(cache, off, sizeof(off_wrap));
    269}
    270
    271static void vring_packed_flags_write(VirtIODevice *vdev,
    272                                     MemoryRegionCache *cache, uint16_t flags)
    273{
    274    hwaddr off = offsetof(VRingPackedDescEvent, flags);
    275
    276    virtio_tswap16s(vdev, &flags);
    277    address_space_write_cached(cache, off, &flags, sizeof(flags));
    278    address_space_cache_invalidate(cache, off, sizeof(flags));
    279}
    280
    281/* Called within rcu_read_lock().  */
    282static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
    283{
    284    return qatomic_rcu_read(&vq->vring.caches);
    285}
    286
    287/* Called within rcu_read_lock().  */
    288static inline uint16_t vring_avail_flags(VirtQueue *vq)
    289{
    290    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
    291    hwaddr pa = offsetof(VRingAvail, flags);
    292
    293    if (!caches) {
    294        return 0;
    295    }
    296
    297    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
    298}
    299
    300/* Called within rcu_read_lock().  */
    301static inline uint16_t vring_avail_idx(VirtQueue *vq)
    302{
    303    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
    304    hwaddr pa = offsetof(VRingAvail, idx);
    305
    306    if (!caches) {
    307        return 0;
    308    }
    309
    310    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
    311    return vq->shadow_avail_idx;
    312}
    313
    314/* Called within rcu_read_lock().  */
    315static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
    316{
    317    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
    318    hwaddr pa = offsetof(VRingAvail, ring[i]);
    319
    320    if (!caches) {
    321        return 0;
    322    }
    323
    324    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
    325}
    326
    327/* Called within rcu_read_lock().  */
    328static inline uint16_t vring_get_used_event(VirtQueue *vq)
    329{
    330    return vring_avail_ring(vq, vq->vring.num);
    331}
    332
    333/* Called within rcu_read_lock().  */
    334static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
    335                                    int i)
    336{
    337    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
    338    hwaddr pa = offsetof(VRingUsed, ring[i]);
    339
    340    if (!caches) {
    341        return;
    342    }
    343
    344    virtio_tswap32s(vq->vdev, &uelem->id);
    345    virtio_tswap32s(vq->vdev, &uelem->len);
    346    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
    347    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
    348}
    349
    350/* Called within rcu_read_lock().  */
    351static uint16_t vring_used_idx(VirtQueue *vq)
    352{
    353    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
    354    hwaddr pa = offsetof(VRingUsed, idx);
    355
    356    if (!caches) {
    357        return 0;
    358    }
    359
    360    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
    361}
    362
    363/* Called within rcu_read_lock().  */
    364static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
    365{
    366    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
    367    hwaddr pa = offsetof(VRingUsed, idx);
    368
    369    if (caches) {
    370        virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
    371        address_space_cache_invalidate(&caches->used, pa, sizeof(val));
    372    }
    373
    374    vq->used_idx = val;
    375}
    376
    377/* Called within rcu_read_lock().  */
    378static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
    379{
    380    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
    381    VirtIODevice *vdev = vq->vdev;
    382    hwaddr pa = offsetof(VRingUsed, flags);
    383    uint16_t flags;
    384
    385    if (!caches) {
    386        return;
    387    }
    388
    389    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
    390    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
    391    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
    392}
    393
    394/* Called within rcu_read_lock().  */
    395static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
    396{
    397    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
    398    VirtIODevice *vdev = vq->vdev;
    399    hwaddr pa = offsetof(VRingUsed, flags);
    400    uint16_t flags;
    401
    402    if (!caches) {
    403        return;
    404    }
    405
    406    flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
    407    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
    408    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
    409}
    410
    411/* Called within rcu_read_lock().  */
    412static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
    413{
    414    VRingMemoryRegionCaches *caches;
    415    hwaddr pa;
    416    if (!vq->notification) {
    417        return;
    418    }
    419
    420    caches = vring_get_region_caches(vq);
    421    if (!caches) {
    422        return;
    423    }
    424
    425    pa = offsetof(VRingUsed, ring[vq->vring.num]);
    426    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
    427    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
    428}
    429
    430static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
    431{
    432    RCU_READ_LOCK_GUARD();
    433
    434    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
    435        vring_set_avail_event(vq, vring_avail_idx(vq));
    436    } else if (enable) {
    437        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
    438    } else {
    439        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
    440    }
    441    if (enable) {
    442        /* Expose avail event/used flags before caller checks the avail idx. */
    443        smp_mb();
    444    }
    445}
    446
    447static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
    448{
    449    uint16_t off_wrap;
    450    VRingPackedDescEvent e;
    451    VRingMemoryRegionCaches *caches;
    452
    453    RCU_READ_LOCK_GUARD();
    454    caches = vring_get_region_caches(vq);
    455    if (!caches) {
    456        return;
    457    }
    458
    459    vring_packed_event_read(vq->vdev, &caches->used, &e);
    460
    461    if (!enable) {
    462        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
    463    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
    464        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
    465        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
    466        /* Make sure off_wrap is wrote before flags */
    467        smp_wmb();
    468        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
    469    } else {
    470        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
    471    }
    472
    473    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
    474    if (enable) {
    475        /* Expose avail event/used flags before caller checks the avail idx. */
    476        smp_mb();
    477    }
    478}
    479
    480bool virtio_queue_get_notification(VirtQueue *vq)
    481{
    482    return vq->notification;
    483}
    484
    485void virtio_queue_set_notification(VirtQueue *vq, int enable)
    486{
    487    vq->notification = enable;
    488
    489    if (!vq->vring.desc) {
    490        return;
    491    }
    492
    493    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
    494        virtio_queue_packed_set_notification(vq, enable);
    495    } else {
    496        virtio_queue_split_set_notification(vq, enable);
    497    }
    498}
    499
    500int virtio_queue_ready(VirtQueue *vq)
    501{
    502    return vq->vring.avail != 0;
    503}
    504
    505static void vring_packed_desc_read_flags(VirtIODevice *vdev,
    506                                         uint16_t *flags,
    507                                         MemoryRegionCache *cache,
    508                                         int i)
    509{
    510    address_space_read_cached(cache,
    511                              i * sizeof(VRingPackedDesc) +
    512                              offsetof(VRingPackedDesc, flags),
    513                              flags, sizeof(*flags));
    514    virtio_tswap16s(vdev, flags);
    515}
    516
    517static void vring_packed_desc_read(VirtIODevice *vdev,
    518                                   VRingPackedDesc *desc,
    519                                   MemoryRegionCache *cache,
    520                                   int i, bool strict_order)
    521{
    522    hwaddr off = i * sizeof(VRingPackedDesc);
    523
    524    vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
    525
    526    if (strict_order) {
    527        /* Make sure flags is read before the rest fields. */
    528        smp_rmb();
    529    }
    530
    531    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
    532                              &desc->addr, sizeof(desc->addr));
    533    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
    534                              &desc->id, sizeof(desc->id));
    535    address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
    536                              &desc->len, sizeof(desc->len));
    537    virtio_tswap64s(vdev, &desc->addr);
    538    virtio_tswap16s(vdev, &desc->id);
    539    virtio_tswap32s(vdev, &desc->len);
    540}
    541
    542static void vring_packed_desc_write_data(VirtIODevice *vdev,
    543                                         VRingPackedDesc *desc,
    544                                         MemoryRegionCache *cache,
    545                                         int i)
    546{
    547    hwaddr off_id = i * sizeof(VRingPackedDesc) +
    548                    offsetof(VRingPackedDesc, id);
    549    hwaddr off_len = i * sizeof(VRingPackedDesc) +
    550                    offsetof(VRingPackedDesc, len);
    551
    552    virtio_tswap32s(vdev, &desc->len);
    553    virtio_tswap16s(vdev, &desc->id);
    554    address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
    555    address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
    556    address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
    557    address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
    558}
    559
    560static void vring_packed_desc_write_flags(VirtIODevice *vdev,
    561                                          VRingPackedDesc *desc,
    562                                          MemoryRegionCache *cache,
    563                                          int i)
    564{
    565    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
    566
    567    virtio_tswap16s(vdev, &desc->flags);
    568    address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
    569    address_space_cache_invalidate(cache, off, sizeof(desc->flags));
    570}
    571
    572static void vring_packed_desc_write(VirtIODevice *vdev,
    573                                    VRingPackedDesc *desc,
    574                                    MemoryRegionCache *cache,
    575                                    int i, bool strict_order)
    576{
    577    vring_packed_desc_write_data(vdev, desc, cache, i);
    578    if (strict_order) {
    579        /* Make sure data is wrote before flags. */
    580        smp_wmb();
    581    }
    582    vring_packed_desc_write_flags(vdev, desc, cache, i);
    583}
    584
    585static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
    586{
    587    bool avail, used;
    588
    589    avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
    590    used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
    591    return (avail != used) && (avail == wrap_counter);
    592}
    593
    594/* Fetch avail_idx from VQ memory only when we really need to know if
    595 * guest has added some buffers.
    596 * Called within rcu_read_lock().  */
    597static int virtio_queue_empty_rcu(VirtQueue *vq)
    598{
    599    if (virtio_device_disabled(vq->vdev)) {
    600        return 1;
    601    }
    602
    603    if (unlikely(!vq->vring.avail)) {
    604        return 1;
    605    }
    606
    607    if (vq->shadow_avail_idx != vq->last_avail_idx) {
    608        return 0;
    609    }
    610
    611    return vring_avail_idx(vq) == vq->last_avail_idx;
    612}
    613
    614static int virtio_queue_split_empty(VirtQueue *vq)
    615{
    616    bool empty;
    617
    618    if (virtio_device_disabled(vq->vdev)) {
    619        return 1;
    620    }
    621
    622    if (unlikely(!vq->vring.avail)) {
    623        return 1;
    624    }
    625
    626    if (vq->shadow_avail_idx != vq->last_avail_idx) {
    627        return 0;
    628    }
    629
    630    RCU_READ_LOCK_GUARD();
    631    empty = vring_avail_idx(vq) == vq->last_avail_idx;
    632    return empty;
    633}
    634
    635/* Called within rcu_read_lock().  */
    636static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
    637{
    638    struct VRingPackedDesc desc;
    639    VRingMemoryRegionCaches *cache;
    640
    641    if (unlikely(!vq->vring.desc)) {
    642        return 1;
    643    }
    644
    645    cache = vring_get_region_caches(vq);
    646    if (!cache) {
    647        return 1;
    648    }
    649
    650    vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
    651                                 vq->last_avail_idx);
    652
    653    return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
    654}
    655
    656static int virtio_queue_packed_empty(VirtQueue *vq)
    657{
    658    RCU_READ_LOCK_GUARD();
    659    return virtio_queue_packed_empty_rcu(vq);
    660}
    661
    662int virtio_queue_empty(VirtQueue *vq)
    663{
    664    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
    665        return virtio_queue_packed_empty(vq);
    666    } else {
    667        return virtio_queue_split_empty(vq);
    668    }
    669}
    670
    671static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
    672                               unsigned int len)
    673{
    674    AddressSpace *dma_as = vq->vdev->dma_as;
    675    unsigned int offset;
    676    int i;
    677
    678    offset = 0;
    679    for (i = 0; i < elem->in_num; i++) {
    680        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
    681
    682        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
    683                         elem->in_sg[i].iov_len,
    684                         DMA_DIRECTION_FROM_DEVICE, size);
    685
    686        offset += size;
    687    }
    688
    689    for (i = 0; i < elem->out_num; i++)
    690        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
    691                         elem->out_sg[i].iov_len,
    692                         DMA_DIRECTION_TO_DEVICE,
    693                         elem->out_sg[i].iov_len);
    694}
    695
    696/* virtqueue_detach_element:
    697 * @vq: The #VirtQueue
    698 * @elem: The #VirtQueueElement
    699 * @len: number of bytes written
    700 *
    701 * Detach the element from the virtqueue.  This function is suitable for device
    702 * reset or other situations where a #VirtQueueElement is simply freed and will
    703 * not be pushed or discarded.
    704 */
    705void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
    706                              unsigned int len)
    707{
    708    vq->inuse -= elem->ndescs;
    709    virtqueue_unmap_sg(vq, elem, len);
    710}
    711
    712static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
    713{
    714    vq->last_avail_idx -= num;
    715}
    716
    717static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
    718{
    719    if (vq->last_avail_idx < num) {
    720        vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
    721        vq->last_avail_wrap_counter ^= 1;
    722    } else {
    723        vq->last_avail_idx -= num;
    724    }
    725}
    726
    727/* virtqueue_unpop:
    728 * @vq: The #VirtQueue
    729 * @elem: The #VirtQueueElement
    730 * @len: number of bytes written
    731 *
    732 * Pretend the most recent element wasn't popped from the virtqueue.  The next
    733 * call to virtqueue_pop() will refetch the element.
    734 */
    735void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
    736                     unsigned int len)
    737{
    738
    739    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
    740        virtqueue_packed_rewind(vq, 1);
    741    } else {
    742        virtqueue_split_rewind(vq, 1);
    743    }
    744
    745    virtqueue_detach_element(vq, elem, len);
    746}
    747
    748/* virtqueue_rewind:
    749 * @vq: The #VirtQueue
    750 * @num: Number of elements to push back
    751 *
    752 * Pretend that elements weren't popped from the virtqueue.  The next
    753 * virtqueue_pop() will refetch the oldest element.
    754 *
    755 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
    756 *
    757 * Returns: true on success, false if @num is greater than the number of in use
    758 * elements.
    759 */
    760bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
    761{
    762    if (num > vq->inuse) {
    763        return false;
    764    }
    765
    766    vq->inuse -= num;
    767    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
    768        virtqueue_packed_rewind(vq, num);
    769    } else {
    770        virtqueue_split_rewind(vq, num);
    771    }
    772    return true;
    773}
    774
    775static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
    776                    unsigned int len, unsigned int idx)
    777{
    778    VRingUsedElem uelem;
    779
    780    if (unlikely(!vq->vring.used)) {
    781        return;
    782    }
    783
    784    idx = (idx + vq->used_idx) % vq->vring.num;
    785
    786    uelem.id = elem->index;
    787    uelem.len = len;
    788    vring_used_write(vq, &uelem, idx);
    789}
    790
    791static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
    792                                  unsigned int len, unsigned int idx)
    793{
    794    vq->used_elems[idx].index = elem->index;
    795    vq->used_elems[idx].len = len;
    796    vq->used_elems[idx].ndescs = elem->ndescs;
    797}
    798
    799static void virtqueue_packed_fill_desc(VirtQueue *vq,
    800                                       const VirtQueueElement *elem,
    801                                       unsigned int idx,
    802                                       bool strict_order)
    803{
    804    uint16_t head;
    805    VRingMemoryRegionCaches *caches;
    806    VRingPackedDesc desc = {
    807        .id = elem->index,
    808        .len = elem->len,
    809    };
    810    bool wrap_counter = vq->used_wrap_counter;
    811
    812    if (unlikely(!vq->vring.desc)) {
    813        return;
    814    }
    815
    816    head = vq->used_idx + idx;
    817    if (head >= vq->vring.num) {
    818        head -= vq->vring.num;
    819        wrap_counter ^= 1;
    820    }
    821    if (wrap_counter) {
    822        desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
    823        desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
    824    } else {
    825        desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
    826        desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
    827    }
    828
    829    caches = vring_get_region_caches(vq);
    830    if (!caches) {
    831        return;
    832    }
    833
    834    vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
    835}
    836
    837/* Called within rcu_read_lock().  */
    838void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
    839                    unsigned int len, unsigned int idx)
    840{
    841    trace_virtqueue_fill(vq, elem, len, idx);
    842
    843    virtqueue_unmap_sg(vq, elem, len);
    844
    845    if (virtio_device_disabled(vq->vdev)) {
    846        return;
    847    }
    848
    849    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
    850        virtqueue_packed_fill(vq, elem, len, idx);
    851    } else {
    852        virtqueue_split_fill(vq, elem, len, idx);
    853    }
    854}
    855
    856/* Called within rcu_read_lock().  */
    857static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
    858{
    859    uint16_t old, new;
    860
    861    if (unlikely(!vq->vring.used)) {
    862        return;
    863    }
    864
    865    /* Make sure buffer is written before we update index. */
    866    smp_wmb();
    867    trace_virtqueue_flush(vq, count);
    868    old = vq->used_idx;
    869    new = old + count;
    870    vring_used_idx_set(vq, new);
    871    vq->inuse -= count;
    872    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
    873        vq->signalled_used_valid = false;
    874}
    875
    876static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
    877{
    878    unsigned int i, ndescs = 0;
    879
    880    if (unlikely(!vq->vring.desc)) {
    881        return;
    882    }
    883
    884    for (i = 1; i < count; i++) {
    885        virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
    886        ndescs += vq->used_elems[i].ndescs;
    887    }
    888    virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
    889    ndescs += vq->used_elems[0].ndescs;
    890
    891    vq->inuse -= ndescs;
    892    vq->used_idx += ndescs;
    893    if (vq->used_idx >= vq->vring.num) {
    894        vq->used_idx -= vq->vring.num;
    895        vq->used_wrap_counter ^= 1;
    896    }
    897}
    898
    899void virtqueue_flush(VirtQueue *vq, unsigned int count)
    900{
    901    if (virtio_device_disabled(vq->vdev)) {
    902        vq->inuse -= count;
    903        return;
    904    }
    905
    906    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
    907        virtqueue_packed_flush(vq, count);
    908    } else {
    909        virtqueue_split_flush(vq, count);
    910    }
    911}
    912
    913void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
    914                    unsigned int len)
    915{
    916    RCU_READ_LOCK_GUARD();
    917    virtqueue_fill(vq, elem, len, 0);
    918    virtqueue_flush(vq, 1);
    919}
    920
    921/* Called within rcu_read_lock().  */
    922static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
    923{
    924    uint16_t num_heads = vring_avail_idx(vq) - idx;
    925
    926    /* Check it isn't doing very strange things with descriptor numbers. */
    927    if (num_heads > vq->vring.num) {
    928        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
    929                     idx, vq->shadow_avail_idx);
    930        return -EINVAL;
    931    }
    932    /* On success, callers read a descriptor at vq->last_avail_idx.
    933     * Make sure descriptor read does not bypass avail index read. */
    934    if (num_heads) {
    935        smp_rmb();
    936    }
    937
    938    return num_heads;
    939}
    940
    941/* Called within rcu_read_lock().  */
    942static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
    943                               unsigned int *head)
    944{
    945    /* Grab the next descriptor number they're advertising, and increment
    946     * the index we've seen. */
    947    *head = vring_avail_ring(vq, idx % vq->vring.num);
    948
    949    /* If their number is silly, that's a fatal mistake. */
    950    if (*head >= vq->vring.num) {
    951        virtio_error(vq->vdev, "Guest says index %u is available", *head);
    952        return false;
    953    }
    954
    955    return true;
    956}
    957
    958enum {
    959    VIRTQUEUE_READ_DESC_ERROR = -1,
    960    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
    961    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
    962};
    963
    964static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
    965                                          MemoryRegionCache *desc_cache,
    966                                          unsigned int max, unsigned int *next)
    967{
    968    /* If this descriptor says it doesn't chain, we're done. */
    969    if (!(desc->flags & VRING_DESC_F_NEXT)) {
    970        return VIRTQUEUE_READ_DESC_DONE;
    971    }
    972
    973    /* Check they're not leading us off end of descriptors. */
    974    *next = desc->next;
    975    /* Make sure compiler knows to grab that: we don't want it changing! */
    976    smp_wmb();
    977
    978    if (*next >= max) {
    979        virtio_error(vdev, "Desc next is %u", *next);
    980        return VIRTQUEUE_READ_DESC_ERROR;
    981    }
    982
    983    vring_split_desc_read(vdev, desc, desc_cache, *next);
    984    return VIRTQUEUE_READ_DESC_MORE;
    985}
    986
    987/* Called within rcu_read_lock().  */
    988static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
    989                            unsigned int *in_bytes, unsigned int *out_bytes,
    990                            unsigned max_in_bytes, unsigned max_out_bytes,
    991                            VRingMemoryRegionCaches *caches)
    992{
    993    VirtIODevice *vdev = vq->vdev;
    994    unsigned int max, idx;
    995    unsigned int total_bufs, in_total, out_total;
    996    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
    997    int64_t len = 0;
    998    int rc;
    999
   1000    idx = vq->last_avail_idx;
   1001    total_bufs = in_total = out_total = 0;
   1002
   1003    max = vq->vring.num;
   1004
   1005    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
   1006        MemoryRegionCache *desc_cache = &caches->desc;
   1007        unsigned int num_bufs;
   1008        VRingDesc desc;
   1009        unsigned int i;
   1010
   1011        num_bufs = total_bufs;
   1012
   1013        if (!virtqueue_get_head(vq, idx++, &i)) {
   1014            goto err;
   1015        }
   1016
   1017        vring_split_desc_read(vdev, &desc, desc_cache, i);
   1018
   1019        if (desc.flags & VRING_DESC_F_INDIRECT) {
   1020            if (!desc.len || (desc.len % sizeof(VRingDesc))) {
   1021                virtio_error(vdev, "Invalid size for indirect buffer table");
   1022                goto err;
   1023            }
   1024
   1025            /* If we've got too many, that implies a descriptor loop. */
   1026            if (num_bufs >= max) {
   1027                virtio_error(vdev, "Looped descriptor");
   1028                goto err;
   1029            }
   1030
   1031            /* loop over the indirect descriptor table */
   1032            len = address_space_cache_init(&indirect_desc_cache,
   1033                                           vdev->dma_as,
   1034                                           desc.addr, desc.len, false);
   1035            desc_cache = &indirect_desc_cache;
   1036            if (len < desc.len) {
   1037                virtio_error(vdev, "Cannot map indirect buffer");
   1038                goto err;
   1039            }
   1040
   1041            max = desc.len / sizeof(VRingDesc);
   1042            num_bufs = i = 0;
   1043            vring_split_desc_read(vdev, &desc, desc_cache, i);
   1044        }
   1045
   1046        do {
   1047            /* If we've got too many, that implies a descriptor loop. */
   1048            if (++num_bufs > max) {
   1049                virtio_error(vdev, "Looped descriptor");
   1050                goto err;
   1051            }
   1052
   1053            if (desc.flags & VRING_DESC_F_WRITE) {
   1054                in_total += desc.len;
   1055            } else {
   1056                out_total += desc.len;
   1057            }
   1058            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
   1059                goto done;
   1060            }
   1061
   1062            rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
   1063        } while (rc == VIRTQUEUE_READ_DESC_MORE);
   1064
   1065        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
   1066            goto err;
   1067        }
   1068
   1069        if (desc_cache == &indirect_desc_cache) {
   1070            address_space_cache_destroy(&indirect_desc_cache);
   1071            total_bufs++;
   1072        } else {
   1073            total_bufs = num_bufs;
   1074        }
   1075    }
   1076
   1077    if (rc < 0) {
   1078        goto err;
   1079    }
   1080
   1081done:
   1082    address_space_cache_destroy(&indirect_desc_cache);
   1083    if (in_bytes) {
   1084        *in_bytes = in_total;
   1085    }
   1086    if (out_bytes) {
   1087        *out_bytes = out_total;
   1088    }
   1089    return;
   1090
   1091err:
   1092    in_total = out_total = 0;
   1093    goto done;
   1094}
   1095
   1096static int virtqueue_packed_read_next_desc(VirtQueue *vq,
   1097                                           VRingPackedDesc *desc,
   1098                                           MemoryRegionCache
   1099                                           *desc_cache,
   1100                                           unsigned int max,
   1101                                           unsigned int *next,
   1102                                           bool indirect)
   1103{
   1104    /* If this descriptor says it doesn't chain, we're done. */
   1105    if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
   1106        return VIRTQUEUE_READ_DESC_DONE;
   1107    }
   1108
   1109    ++*next;
   1110    if (*next == max) {
   1111        if (indirect) {
   1112            return VIRTQUEUE_READ_DESC_DONE;
   1113        } else {
   1114            (*next) -= vq->vring.num;
   1115        }
   1116    }
   1117
   1118    vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
   1119    return VIRTQUEUE_READ_DESC_MORE;
   1120}
   1121
   1122/* Called within rcu_read_lock().  */
   1123static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
   1124                                             unsigned int *in_bytes,
   1125                                             unsigned int *out_bytes,
   1126                                             unsigned max_in_bytes,
   1127                                             unsigned max_out_bytes,
   1128                                             VRingMemoryRegionCaches *caches)
   1129{
   1130    VirtIODevice *vdev = vq->vdev;
   1131    unsigned int max, idx;
   1132    unsigned int total_bufs, in_total, out_total;
   1133    MemoryRegionCache *desc_cache;
   1134    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
   1135    int64_t len = 0;
   1136    VRingPackedDesc desc;
   1137    bool wrap_counter;
   1138
   1139    idx = vq->last_avail_idx;
   1140    wrap_counter = vq->last_avail_wrap_counter;
   1141    total_bufs = in_total = out_total = 0;
   1142
   1143    max = vq->vring.num;
   1144
   1145    for (;;) {
   1146        unsigned int num_bufs = total_bufs;
   1147        unsigned int i = idx;
   1148        int rc;
   1149
   1150        desc_cache = &caches->desc;
   1151        vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
   1152        if (!is_desc_avail(desc.flags, wrap_counter)) {
   1153            break;
   1154        }
   1155
   1156        if (desc.flags & VRING_DESC_F_INDIRECT) {
   1157            if (desc.len % sizeof(VRingPackedDesc)) {
   1158                virtio_error(vdev, "Invalid size for indirect buffer table");
   1159                goto err;
   1160            }
   1161
   1162            /* If we've got too many, that implies a descriptor loop. */
   1163            if (num_bufs >= max) {
   1164                virtio_error(vdev, "Looped descriptor");
   1165                goto err;
   1166            }
   1167
   1168            /* loop over the indirect descriptor table */
   1169            len = address_space_cache_init(&indirect_desc_cache,
   1170                                           vdev->dma_as,
   1171                                           desc.addr, desc.len, false);
   1172            desc_cache = &indirect_desc_cache;
   1173            if (len < desc.len) {
   1174                virtio_error(vdev, "Cannot map indirect buffer");
   1175                goto err;
   1176            }
   1177
   1178            max = desc.len / sizeof(VRingPackedDesc);
   1179            num_bufs = i = 0;
   1180            vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
   1181        }
   1182
   1183        do {
   1184            /* If we've got too many, that implies a descriptor loop. */
   1185            if (++num_bufs > max) {
   1186                virtio_error(vdev, "Looped descriptor");
   1187                goto err;
   1188            }
   1189
   1190            if (desc.flags & VRING_DESC_F_WRITE) {
   1191                in_total += desc.len;
   1192            } else {
   1193                out_total += desc.len;
   1194            }
   1195            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
   1196                goto done;
   1197            }
   1198
   1199            rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
   1200                                                 &i, desc_cache ==
   1201                                                 &indirect_desc_cache);
   1202        } while (rc == VIRTQUEUE_READ_DESC_MORE);
   1203
   1204        if (desc_cache == &indirect_desc_cache) {
   1205            address_space_cache_destroy(&indirect_desc_cache);
   1206            total_bufs++;
   1207            idx++;
   1208        } else {
   1209            idx += num_bufs - total_bufs;
   1210            total_bufs = num_bufs;
   1211        }
   1212
   1213        if (idx >= vq->vring.num) {
   1214            idx -= vq->vring.num;
   1215            wrap_counter ^= 1;
   1216        }
   1217    }
   1218
   1219    /* Record the index and wrap counter for a kick we want */
   1220    vq->shadow_avail_idx = idx;
   1221    vq->shadow_avail_wrap_counter = wrap_counter;
   1222done:
   1223    address_space_cache_destroy(&indirect_desc_cache);
   1224    if (in_bytes) {
   1225        *in_bytes = in_total;
   1226    }
   1227    if (out_bytes) {
   1228        *out_bytes = out_total;
   1229    }
   1230    return;
   1231
   1232err:
   1233    in_total = out_total = 0;
   1234    goto done;
   1235}
   1236
   1237void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
   1238                               unsigned int *out_bytes,
   1239                               unsigned max_in_bytes, unsigned max_out_bytes)
   1240{
   1241    uint16_t desc_size;
   1242    VRingMemoryRegionCaches *caches;
   1243
   1244    RCU_READ_LOCK_GUARD();
   1245
   1246    if (unlikely(!vq->vring.desc)) {
   1247        goto err;
   1248    }
   1249
   1250    caches = vring_get_region_caches(vq);
   1251    if (!caches) {
   1252        goto err;
   1253    }
   1254
   1255    desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
   1256                                sizeof(VRingPackedDesc) : sizeof(VRingDesc);
   1257    if (caches->desc.len < vq->vring.num * desc_size) {
   1258        virtio_error(vq->vdev, "Cannot map descriptor ring");
   1259        goto err;
   1260    }
   1261
   1262    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
   1263        virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
   1264                                         max_in_bytes, max_out_bytes,
   1265                                         caches);
   1266    } else {
   1267        virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
   1268                                        max_in_bytes, max_out_bytes,
   1269                                        caches);
   1270    }
   1271
   1272    return;
   1273err:
   1274    if (in_bytes) {
   1275        *in_bytes = 0;
   1276    }
   1277    if (out_bytes) {
   1278        *out_bytes = 0;
   1279    }
   1280}
   1281
   1282int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
   1283                          unsigned int out_bytes)
   1284{
   1285    unsigned int in_total, out_total;
   1286
   1287    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
   1288    return in_bytes <= in_total && out_bytes <= out_total;
   1289}
   1290
   1291static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
   1292                               hwaddr *addr, struct iovec *iov,
   1293                               unsigned int max_num_sg, bool is_write,
   1294                               hwaddr pa, size_t sz)
   1295{
   1296    bool ok = false;
   1297    unsigned num_sg = *p_num_sg;
   1298    assert(num_sg <= max_num_sg);
   1299
   1300    if (!sz) {
   1301        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
   1302        goto out;
   1303    }
   1304
   1305    while (sz) {
   1306        hwaddr len = sz;
   1307
   1308        if (num_sg == max_num_sg) {
   1309            virtio_error(vdev, "virtio: too many write descriptors in "
   1310                               "indirect table");
   1311            goto out;
   1312        }
   1313
   1314        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
   1315                                              is_write ?
   1316                                              DMA_DIRECTION_FROM_DEVICE :
   1317                                              DMA_DIRECTION_TO_DEVICE);
   1318        if (!iov[num_sg].iov_base) {
   1319            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
   1320            goto out;
   1321        }
   1322
   1323        iov[num_sg].iov_len = len;
   1324        addr[num_sg] = pa;
   1325
   1326        sz -= len;
   1327        pa += len;
   1328        num_sg++;
   1329    }
   1330    ok = true;
   1331
   1332out:
   1333    *p_num_sg = num_sg;
   1334    return ok;
   1335}
   1336
   1337/* Only used by error code paths before we have a VirtQueueElement (therefore
   1338 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
   1339 * yet.
   1340 */
   1341static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
   1342                                    struct iovec *iov)
   1343{
   1344    unsigned int i;
   1345
   1346    for (i = 0; i < out_num + in_num; i++) {
   1347        int is_write = i >= out_num;
   1348
   1349        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
   1350        iov++;
   1351    }
   1352}
   1353
   1354static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
   1355                                hwaddr *addr, unsigned int num_sg,
   1356                                bool is_write)
   1357{
   1358    unsigned int i;
   1359    hwaddr len;
   1360
   1361    for (i = 0; i < num_sg; i++) {
   1362        len = sg[i].iov_len;
   1363        sg[i].iov_base = dma_memory_map(vdev->dma_as,
   1364                                        addr[i], &len, is_write ?
   1365                                        DMA_DIRECTION_FROM_DEVICE :
   1366                                        DMA_DIRECTION_TO_DEVICE);
   1367        if (!sg[i].iov_base) {
   1368            error_report("virtio: error trying to map MMIO memory");
   1369            exit(1);
   1370        }
   1371        if (len != sg[i].iov_len) {
   1372            error_report("virtio: unexpected memory split");
   1373            exit(1);
   1374        }
   1375    }
   1376}
   1377
   1378void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
   1379{
   1380    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
   1381    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
   1382                                                                        false);
   1383}
   1384
   1385static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
   1386{
   1387    VirtQueueElement *elem;
   1388    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
   1389    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
   1390    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
   1391    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
   1392    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
   1393    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
   1394
   1395    assert(sz >= sizeof(VirtQueueElement));
   1396    elem = g_malloc(out_sg_end);
   1397    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
   1398    elem->out_num = out_num;
   1399    elem->in_num = in_num;
   1400    elem->in_addr = (void *)elem + in_addr_ofs;
   1401    elem->out_addr = (void *)elem + out_addr_ofs;
   1402    elem->in_sg = (void *)elem + in_sg_ofs;
   1403    elem->out_sg = (void *)elem + out_sg_ofs;
   1404    return elem;
   1405}
   1406
   1407static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
   1408{
   1409    unsigned int i, head, max;
   1410    VRingMemoryRegionCaches *caches;
   1411    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
   1412    MemoryRegionCache *desc_cache;
   1413    int64_t len;
   1414    VirtIODevice *vdev = vq->vdev;
   1415    VirtQueueElement *elem = NULL;
   1416    unsigned out_num, in_num, elem_entries;
   1417    hwaddr addr[VIRTQUEUE_MAX_SIZE];
   1418    struct iovec iov[VIRTQUEUE_MAX_SIZE];
   1419    VRingDesc desc;
   1420    int rc;
   1421
   1422    RCU_READ_LOCK_GUARD();
   1423    if (virtio_queue_empty_rcu(vq)) {
   1424        goto done;
   1425    }
   1426    /* Needed after virtio_queue_empty(), see comment in
   1427     * virtqueue_num_heads(). */
   1428    smp_rmb();
   1429
   1430    /* When we start there are none of either input nor output. */
   1431    out_num = in_num = elem_entries = 0;
   1432
   1433    max = vq->vring.num;
   1434
   1435    if (vq->inuse >= vq->vring.num) {
   1436        virtio_error(vdev, "Virtqueue size exceeded");
   1437        goto done;
   1438    }
   1439
   1440    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
   1441        goto done;
   1442    }
   1443
   1444    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
   1445        vring_set_avail_event(vq, vq->last_avail_idx);
   1446    }
   1447
   1448    i = head;
   1449
   1450    caches = vring_get_region_caches(vq);
   1451    if (!caches) {
   1452        virtio_error(vdev, "Region caches not initialized");
   1453        goto done;
   1454    }
   1455
   1456    if (caches->desc.len < max * sizeof(VRingDesc)) {
   1457        virtio_error(vdev, "Cannot map descriptor ring");
   1458        goto done;
   1459    }
   1460
   1461    desc_cache = &caches->desc;
   1462    vring_split_desc_read(vdev, &desc, desc_cache, i);
   1463    if (desc.flags & VRING_DESC_F_INDIRECT) {
   1464        if (!desc.len || (desc.len % sizeof(VRingDesc))) {
   1465            virtio_error(vdev, "Invalid size for indirect buffer table");
   1466            goto done;
   1467        }
   1468
   1469        /* loop over the indirect descriptor table */
   1470        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
   1471                                       desc.addr, desc.len, false);
   1472        desc_cache = &indirect_desc_cache;
   1473        if (len < desc.len) {
   1474            virtio_error(vdev, "Cannot map indirect buffer");
   1475            goto done;
   1476        }
   1477
   1478        max = desc.len / sizeof(VRingDesc);
   1479        i = 0;
   1480        vring_split_desc_read(vdev, &desc, desc_cache, i);
   1481    }
   1482
   1483    /* Collect all the descriptors */
   1484    do {
   1485        bool map_ok;
   1486
   1487        if (desc.flags & VRING_DESC_F_WRITE) {
   1488            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
   1489                                        iov + out_num,
   1490                                        VIRTQUEUE_MAX_SIZE - out_num, true,
   1491                                        desc.addr, desc.len);
   1492        } else {
   1493            if (in_num) {
   1494                virtio_error(vdev, "Incorrect order for descriptors");
   1495                goto err_undo_map;
   1496            }
   1497            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
   1498                                        VIRTQUEUE_MAX_SIZE, false,
   1499                                        desc.addr, desc.len);
   1500        }
   1501        if (!map_ok) {
   1502            goto err_undo_map;
   1503        }
   1504
   1505        /* If we've got too many, that implies a descriptor loop. */
   1506        if (++elem_entries > max) {
   1507            virtio_error(vdev, "Looped descriptor");
   1508            goto err_undo_map;
   1509        }
   1510
   1511        rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
   1512    } while (rc == VIRTQUEUE_READ_DESC_MORE);
   1513
   1514    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
   1515        goto err_undo_map;
   1516    }
   1517
   1518    /* Now copy what we have collected and mapped */
   1519    elem = virtqueue_alloc_element(sz, out_num, in_num);
   1520    elem->index = head;
   1521    elem->ndescs = 1;
   1522    for (i = 0; i < out_num; i++) {
   1523        elem->out_addr[i] = addr[i];
   1524        elem->out_sg[i] = iov[i];
   1525    }
   1526    for (i = 0; i < in_num; i++) {
   1527        elem->in_addr[i] = addr[out_num + i];
   1528        elem->in_sg[i] = iov[out_num + i];
   1529    }
   1530
   1531    vq->inuse++;
   1532
   1533    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
   1534done:
   1535    address_space_cache_destroy(&indirect_desc_cache);
   1536
   1537    return elem;
   1538
   1539err_undo_map:
   1540    virtqueue_undo_map_desc(out_num, in_num, iov);
   1541    goto done;
   1542}
   1543
   1544static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
   1545{
   1546    unsigned int i, max;
   1547    VRingMemoryRegionCaches *caches;
   1548    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
   1549    MemoryRegionCache *desc_cache;
   1550    int64_t len;
   1551    VirtIODevice *vdev = vq->vdev;
   1552    VirtQueueElement *elem = NULL;
   1553    unsigned out_num, in_num, elem_entries;
   1554    hwaddr addr[VIRTQUEUE_MAX_SIZE];
   1555    struct iovec iov[VIRTQUEUE_MAX_SIZE];
   1556    VRingPackedDesc desc;
   1557    uint16_t id;
   1558    int rc;
   1559
   1560    RCU_READ_LOCK_GUARD();
   1561    if (virtio_queue_packed_empty_rcu(vq)) {
   1562        goto done;
   1563    }
   1564
   1565    /* When we start there are none of either input nor output. */
   1566    out_num = in_num = elem_entries = 0;
   1567
   1568    max = vq->vring.num;
   1569
   1570    if (vq->inuse >= vq->vring.num) {
   1571        virtio_error(vdev, "Virtqueue size exceeded");
   1572        goto done;
   1573    }
   1574
   1575    i = vq->last_avail_idx;
   1576
   1577    caches = vring_get_region_caches(vq);
   1578    if (!caches) {
   1579        virtio_error(vdev, "Region caches not initialized");
   1580        goto done;
   1581    }
   1582
   1583    if (caches->desc.len < max * sizeof(VRingDesc)) {
   1584        virtio_error(vdev, "Cannot map descriptor ring");
   1585        goto done;
   1586    }
   1587
   1588    desc_cache = &caches->desc;
   1589    vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
   1590    id = desc.id;
   1591    if (desc.flags & VRING_DESC_F_INDIRECT) {
   1592        if (desc.len % sizeof(VRingPackedDesc)) {
   1593            virtio_error(vdev, "Invalid size for indirect buffer table");
   1594            goto done;
   1595        }
   1596
   1597        /* loop over the indirect descriptor table */
   1598        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
   1599                                       desc.addr, desc.len, false);
   1600        desc_cache = &indirect_desc_cache;
   1601        if (len < desc.len) {
   1602            virtio_error(vdev, "Cannot map indirect buffer");
   1603            goto done;
   1604        }
   1605
   1606        max = desc.len / sizeof(VRingPackedDesc);
   1607        i = 0;
   1608        vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
   1609    }
   1610
   1611    /* Collect all the descriptors */
   1612    do {
   1613        bool map_ok;
   1614
   1615        if (desc.flags & VRING_DESC_F_WRITE) {
   1616            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
   1617                                        iov + out_num,
   1618                                        VIRTQUEUE_MAX_SIZE - out_num, true,
   1619                                        desc.addr, desc.len);
   1620        } else {
   1621            if (in_num) {
   1622                virtio_error(vdev, "Incorrect order for descriptors");
   1623                goto err_undo_map;
   1624            }
   1625            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
   1626                                        VIRTQUEUE_MAX_SIZE, false,
   1627                                        desc.addr, desc.len);
   1628        }
   1629        if (!map_ok) {
   1630            goto err_undo_map;
   1631        }
   1632
   1633        /* If we've got too many, that implies a descriptor loop. */
   1634        if (++elem_entries > max) {
   1635            virtio_error(vdev, "Looped descriptor");
   1636            goto err_undo_map;
   1637        }
   1638
   1639        rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
   1640                                             desc_cache ==
   1641                                             &indirect_desc_cache);
   1642    } while (rc == VIRTQUEUE_READ_DESC_MORE);
   1643
   1644    /* Now copy what we have collected and mapped */
   1645    elem = virtqueue_alloc_element(sz, out_num, in_num);
   1646    for (i = 0; i < out_num; i++) {
   1647        elem->out_addr[i] = addr[i];
   1648        elem->out_sg[i] = iov[i];
   1649    }
   1650    for (i = 0; i < in_num; i++) {
   1651        elem->in_addr[i] = addr[out_num + i];
   1652        elem->in_sg[i] = iov[out_num + i];
   1653    }
   1654
   1655    elem->index = id;
   1656    elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
   1657    vq->last_avail_idx += elem->ndescs;
   1658    vq->inuse += elem->ndescs;
   1659
   1660    if (vq->last_avail_idx >= vq->vring.num) {
   1661        vq->last_avail_idx -= vq->vring.num;
   1662        vq->last_avail_wrap_counter ^= 1;
   1663    }
   1664
   1665    vq->shadow_avail_idx = vq->last_avail_idx;
   1666    vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
   1667
   1668    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
   1669done:
   1670    address_space_cache_destroy(&indirect_desc_cache);
   1671
   1672    return elem;
   1673
   1674err_undo_map:
   1675    virtqueue_undo_map_desc(out_num, in_num, iov);
   1676    goto done;
   1677}
   1678
   1679void *virtqueue_pop(VirtQueue *vq, size_t sz)
   1680{
   1681    if (virtio_device_disabled(vq->vdev)) {
   1682        return NULL;
   1683    }
   1684
   1685    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
   1686        return virtqueue_packed_pop(vq, sz);
   1687    } else {
   1688        return virtqueue_split_pop(vq, sz);
   1689    }
   1690}
   1691
   1692static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
   1693{
   1694    VRingMemoryRegionCaches *caches;
   1695    MemoryRegionCache *desc_cache;
   1696    unsigned int dropped = 0;
   1697    VirtQueueElement elem = {};
   1698    VirtIODevice *vdev = vq->vdev;
   1699    VRingPackedDesc desc;
   1700
   1701    RCU_READ_LOCK_GUARD();
   1702
   1703    caches = vring_get_region_caches(vq);
   1704    if (!caches) {
   1705        return 0;
   1706    }
   1707
   1708    desc_cache = &caches->desc;
   1709
   1710    virtio_queue_set_notification(vq, 0);
   1711
   1712    while (vq->inuse < vq->vring.num) {
   1713        unsigned int idx = vq->last_avail_idx;
   1714        /*
   1715         * works similar to virtqueue_pop but does not map buffers
   1716         * and does not allocate any memory.
   1717         */
   1718        vring_packed_desc_read(vdev, &desc, desc_cache,
   1719                               vq->last_avail_idx , true);
   1720        if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
   1721            break;
   1722        }
   1723        elem.index = desc.id;
   1724        elem.ndescs = 1;
   1725        while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
   1726                                               vq->vring.num, &idx, false)) {
   1727            ++elem.ndescs;
   1728        }
   1729        /*
   1730         * immediately push the element, nothing to unmap
   1731         * as both in_num and out_num are set to 0.
   1732         */
   1733        virtqueue_push(vq, &elem, 0);
   1734        dropped++;
   1735        vq->last_avail_idx += elem.ndescs;
   1736        if (vq->last_avail_idx >= vq->vring.num) {
   1737            vq->last_avail_idx -= vq->vring.num;
   1738            vq->last_avail_wrap_counter ^= 1;
   1739        }
   1740    }
   1741
   1742    return dropped;
   1743}
   1744
   1745static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
   1746{
   1747    unsigned int dropped = 0;
   1748    VirtQueueElement elem = {};
   1749    VirtIODevice *vdev = vq->vdev;
   1750    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
   1751
   1752    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
   1753        /* works similar to virtqueue_pop but does not map buffers
   1754        * and does not allocate any memory */
   1755        smp_rmb();
   1756        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
   1757            break;
   1758        }
   1759        vq->inuse++;
   1760        vq->last_avail_idx++;
   1761        if (fEventIdx) {
   1762            vring_set_avail_event(vq, vq->last_avail_idx);
   1763        }
   1764        /* immediately push the element, nothing to unmap
   1765         * as both in_num and out_num are set to 0 */
   1766        virtqueue_push(vq, &elem, 0);
   1767        dropped++;
   1768    }
   1769
   1770    return dropped;
   1771}
   1772
   1773/* virtqueue_drop_all:
   1774 * @vq: The #VirtQueue
   1775 * Drops all queued buffers and indicates them to the guest
   1776 * as if they are done. Useful when buffers can not be
   1777 * processed but must be returned to the guest.
   1778 */
   1779unsigned int virtqueue_drop_all(VirtQueue *vq)
   1780{
   1781    struct VirtIODevice *vdev = vq->vdev;
   1782
   1783    if (virtio_device_disabled(vq->vdev)) {
   1784        return 0;
   1785    }
   1786
   1787    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   1788        return virtqueue_packed_drop_all(vq);
   1789    } else {
   1790        return virtqueue_split_drop_all(vq);
   1791    }
   1792}
   1793
   1794/* Reading and writing a structure directly to QEMUFile is *awful*, but
   1795 * it is what QEMU has always done by mistake.  We can change it sooner
   1796 * or later by bumping the version number of the affected vm states.
   1797 * In the meanwhile, since the in-memory layout of VirtQueueElement
   1798 * has changed, we need to marshal to and from the layout that was
   1799 * used before the change.
   1800 */
   1801typedef struct VirtQueueElementOld {
   1802    unsigned int index;
   1803    unsigned int out_num;
   1804    unsigned int in_num;
   1805    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
   1806    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
   1807    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
   1808    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
   1809} VirtQueueElementOld;
   1810
   1811void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
   1812{
   1813    VirtQueueElement *elem;
   1814    VirtQueueElementOld data;
   1815    int i;
   1816
   1817    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
   1818
   1819    /* TODO: teach all callers that this can fail, and return failure instead
   1820     * of asserting here.
   1821     * This is just one thing (there are probably more) that must be
   1822     * fixed before we can allow NDEBUG compilation.
   1823     */
   1824    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
   1825    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
   1826
   1827    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
   1828    elem->index = data.index;
   1829
   1830    for (i = 0; i < elem->in_num; i++) {
   1831        elem->in_addr[i] = data.in_addr[i];
   1832    }
   1833
   1834    for (i = 0; i < elem->out_num; i++) {
   1835        elem->out_addr[i] = data.out_addr[i];
   1836    }
   1837
   1838    for (i = 0; i < elem->in_num; i++) {
   1839        /* Base is overwritten by virtqueue_map.  */
   1840        elem->in_sg[i].iov_base = 0;
   1841        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
   1842    }
   1843
   1844    for (i = 0; i < elem->out_num; i++) {
   1845        /* Base is overwritten by virtqueue_map.  */
   1846        elem->out_sg[i].iov_base = 0;
   1847        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
   1848    }
   1849
   1850    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   1851        qemu_get_be32s(f, &elem->ndescs);
   1852    }
   1853
   1854    virtqueue_map(vdev, elem);
   1855    return elem;
   1856}
   1857
   1858void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
   1859                                VirtQueueElement *elem)
   1860{
   1861    VirtQueueElementOld data;
   1862    int i;
   1863
   1864    memset(&data, 0, sizeof(data));
   1865    data.index = elem->index;
   1866    data.in_num = elem->in_num;
   1867    data.out_num = elem->out_num;
   1868
   1869    for (i = 0; i < elem->in_num; i++) {
   1870        data.in_addr[i] = elem->in_addr[i];
   1871    }
   1872
   1873    for (i = 0; i < elem->out_num; i++) {
   1874        data.out_addr[i] = elem->out_addr[i];
   1875    }
   1876
   1877    for (i = 0; i < elem->in_num; i++) {
   1878        /* Base is overwritten by virtqueue_map when loading.  Do not
   1879         * save it, as it would leak the QEMU address space layout.  */
   1880        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
   1881    }
   1882
   1883    for (i = 0; i < elem->out_num; i++) {
   1884        /* Do not save iov_base as above.  */
   1885        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
   1886    }
   1887
   1888    if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   1889        qemu_put_be32s(f, &elem->ndescs);
   1890    }
   1891
   1892    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
   1893}
   1894
   1895/* virtio device */
   1896static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
   1897{
   1898    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   1899    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   1900
   1901    if (virtio_device_disabled(vdev)) {
   1902        return;
   1903    }
   1904
   1905    if (k->notify) {
   1906        k->notify(qbus->parent, vector);
   1907    }
   1908}
   1909
   1910void virtio_update_irq(VirtIODevice *vdev)
   1911{
   1912    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
   1913}
   1914
   1915static int virtio_validate_features(VirtIODevice *vdev)
   1916{
   1917    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   1918
   1919    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
   1920        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
   1921        return -EFAULT;
   1922    }
   1923
   1924    if (k->validate_features) {
   1925        return k->validate_features(vdev);
   1926    } else {
   1927        return 0;
   1928    }
   1929}
   1930
   1931int virtio_set_status(VirtIODevice *vdev, uint8_t val)
   1932{
   1933    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   1934    trace_virtio_set_status(vdev, val);
   1935
   1936    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   1937        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
   1938            val & VIRTIO_CONFIG_S_FEATURES_OK) {
   1939            int ret = virtio_validate_features(vdev);
   1940
   1941            if (ret) {
   1942                return ret;
   1943            }
   1944        }
   1945    }
   1946
   1947    if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
   1948        (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
   1949        virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
   1950    }
   1951
   1952    if (k->set_status) {
   1953        k->set_status(vdev, val);
   1954    }
   1955    vdev->status = val;
   1956
   1957    return 0;
   1958}
   1959
   1960static enum virtio_device_endian virtio_default_endian(void)
   1961{
   1962    if (target_words_bigendian()) {
   1963        return VIRTIO_DEVICE_ENDIAN_BIG;
   1964    } else {
   1965        return VIRTIO_DEVICE_ENDIAN_LITTLE;
   1966    }
   1967}
   1968
   1969static enum virtio_device_endian virtio_current_cpu_endian(void)
   1970{
   1971    if (cpu_virtio_is_big_endian(current_cpu)) {
   1972        return VIRTIO_DEVICE_ENDIAN_BIG;
   1973    } else {
   1974        return VIRTIO_DEVICE_ENDIAN_LITTLE;
   1975    }
   1976}
   1977
   1978void virtio_reset(void *opaque)
   1979{
   1980    VirtIODevice *vdev = opaque;
   1981    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   1982    int i;
   1983
   1984    virtio_set_status(vdev, 0);
   1985    if (current_cpu) {
   1986        /* Guest initiated reset */
   1987        vdev->device_endian = virtio_current_cpu_endian();
   1988    } else {
   1989        /* System reset */
   1990        vdev->device_endian = virtio_default_endian();
   1991    }
   1992
   1993    if (k->reset) {
   1994        k->reset(vdev);
   1995    }
   1996
   1997    vdev->start_on_kick = false;
   1998    vdev->started = false;
   1999    vdev->broken = false;
   2000    vdev->guest_features = 0;
   2001    vdev->queue_sel = 0;
   2002    vdev->status = 0;
   2003    vdev->disabled = false;
   2004    qatomic_set(&vdev->isr, 0);
   2005    vdev->config_vector = VIRTIO_NO_VECTOR;
   2006    virtio_notify_vector(vdev, vdev->config_vector);
   2007
   2008    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   2009        vdev->vq[i].vring.desc = 0;
   2010        vdev->vq[i].vring.avail = 0;
   2011        vdev->vq[i].vring.used = 0;
   2012        vdev->vq[i].last_avail_idx = 0;
   2013        vdev->vq[i].shadow_avail_idx = 0;
   2014        vdev->vq[i].used_idx = 0;
   2015        vdev->vq[i].last_avail_wrap_counter = true;
   2016        vdev->vq[i].shadow_avail_wrap_counter = true;
   2017        vdev->vq[i].used_wrap_counter = true;
   2018        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
   2019        vdev->vq[i].signalled_used = 0;
   2020        vdev->vq[i].signalled_used_valid = false;
   2021        vdev->vq[i].notification = true;
   2022        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
   2023        vdev->vq[i].inuse = 0;
   2024        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
   2025    }
   2026}
   2027
   2028uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
   2029{
   2030    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2031    uint8_t val;
   2032
   2033    if (addr + sizeof(val) > vdev->config_len) {
   2034        return (uint32_t)-1;
   2035    }
   2036
   2037    k->get_config(vdev, vdev->config);
   2038
   2039    val = ldub_p(vdev->config + addr);
   2040    return val;
   2041}
   2042
   2043uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
   2044{
   2045    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2046    uint16_t val;
   2047
   2048    if (addr + sizeof(val) > vdev->config_len) {
   2049        return (uint32_t)-1;
   2050    }
   2051
   2052    k->get_config(vdev, vdev->config);
   2053
   2054    val = lduw_p(vdev->config + addr);
   2055    return val;
   2056}
   2057
   2058uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
   2059{
   2060    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2061    uint32_t val;
   2062
   2063    if (addr + sizeof(val) > vdev->config_len) {
   2064        return (uint32_t)-1;
   2065    }
   2066
   2067    k->get_config(vdev, vdev->config);
   2068
   2069    val = ldl_p(vdev->config + addr);
   2070    return val;
   2071}
   2072
   2073void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
   2074{
   2075    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2076    uint8_t val = data;
   2077
   2078    if (addr + sizeof(val) > vdev->config_len) {
   2079        return;
   2080    }
   2081
   2082    stb_p(vdev->config + addr, val);
   2083
   2084    if (k->set_config) {
   2085        k->set_config(vdev, vdev->config);
   2086    }
   2087}
   2088
   2089void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
   2090{
   2091    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2092    uint16_t val = data;
   2093
   2094    if (addr + sizeof(val) > vdev->config_len) {
   2095        return;
   2096    }
   2097
   2098    stw_p(vdev->config + addr, val);
   2099
   2100    if (k->set_config) {
   2101        k->set_config(vdev, vdev->config);
   2102    }
   2103}
   2104
   2105void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
   2106{
   2107    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2108    uint32_t val = data;
   2109
   2110    if (addr + sizeof(val) > vdev->config_len) {
   2111        return;
   2112    }
   2113
   2114    stl_p(vdev->config + addr, val);
   2115
   2116    if (k->set_config) {
   2117        k->set_config(vdev, vdev->config);
   2118    }
   2119}
   2120
   2121uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
   2122{
   2123    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2124    uint8_t val;
   2125
   2126    if (addr + sizeof(val) > vdev->config_len) {
   2127        return (uint32_t)-1;
   2128    }
   2129
   2130    k->get_config(vdev, vdev->config);
   2131
   2132    val = ldub_p(vdev->config + addr);
   2133    return val;
   2134}
   2135
   2136uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
   2137{
   2138    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2139    uint16_t val;
   2140
   2141    if (addr + sizeof(val) > vdev->config_len) {
   2142        return (uint32_t)-1;
   2143    }
   2144
   2145    k->get_config(vdev, vdev->config);
   2146
   2147    val = lduw_le_p(vdev->config + addr);
   2148    return val;
   2149}
   2150
   2151uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
   2152{
   2153    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2154    uint32_t val;
   2155
   2156    if (addr + sizeof(val) > vdev->config_len) {
   2157        return (uint32_t)-1;
   2158    }
   2159
   2160    k->get_config(vdev, vdev->config);
   2161
   2162    val = ldl_le_p(vdev->config + addr);
   2163    return val;
   2164}
   2165
   2166void virtio_config_modern_writeb(VirtIODevice *vdev,
   2167                                 uint32_t addr, uint32_t data)
   2168{
   2169    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2170    uint8_t val = data;
   2171
   2172    if (addr + sizeof(val) > vdev->config_len) {
   2173        return;
   2174    }
   2175
   2176    stb_p(vdev->config + addr, val);
   2177
   2178    if (k->set_config) {
   2179        k->set_config(vdev, vdev->config);
   2180    }
   2181}
   2182
   2183void virtio_config_modern_writew(VirtIODevice *vdev,
   2184                                 uint32_t addr, uint32_t data)
   2185{
   2186    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2187    uint16_t val = data;
   2188
   2189    if (addr + sizeof(val) > vdev->config_len) {
   2190        return;
   2191    }
   2192
   2193    stw_le_p(vdev->config + addr, val);
   2194
   2195    if (k->set_config) {
   2196        k->set_config(vdev, vdev->config);
   2197    }
   2198}
   2199
   2200void virtio_config_modern_writel(VirtIODevice *vdev,
   2201                                 uint32_t addr, uint32_t data)
   2202{
   2203    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2204    uint32_t val = data;
   2205
   2206    if (addr + sizeof(val) > vdev->config_len) {
   2207        return;
   2208    }
   2209
   2210    stl_le_p(vdev->config + addr, val);
   2211
   2212    if (k->set_config) {
   2213        k->set_config(vdev, vdev->config);
   2214    }
   2215}
   2216
   2217void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
   2218{
   2219    if (!vdev->vq[n].vring.num) {
   2220        return;
   2221    }
   2222    vdev->vq[n].vring.desc = addr;
   2223    virtio_queue_update_rings(vdev, n);
   2224}
   2225
   2226hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
   2227{
   2228    return vdev->vq[n].vring.desc;
   2229}
   2230
   2231void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
   2232                            hwaddr avail, hwaddr used)
   2233{
   2234    if (!vdev->vq[n].vring.num) {
   2235        return;
   2236    }
   2237    vdev->vq[n].vring.desc = desc;
   2238    vdev->vq[n].vring.avail = avail;
   2239    vdev->vq[n].vring.used = used;
   2240    virtio_init_region_cache(vdev, n);
   2241}
   2242
   2243void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
   2244{
   2245    /* Don't allow guest to flip queue between existent and
   2246     * nonexistent states, or to set it to an invalid size.
   2247     */
   2248    if (!!num != !!vdev->vq[n].vring.num ||
   2249        num > VIRTQUEUE_MAX_SIZE ||
   2250        num < 0) {
   2251        return;
   2252    }
   2253    vdev->vq[n].vring.num = num;
   2254}
   2255
   2256VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
   2257{
   2258    return QLIST_FIRST(&vdev->vector_queues[vector]);
   2259}
   2260
   2261VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
   2262{
   2263    return QLIST_NEXT(vq, node);
   2264}
   2265
   2266int virtio_queue_get_num(VirtIODevice *vdev, int n)
   2267{
   2268    return vdev->vq[n].vring.num;
   2269}
   2270
   2271int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
   2272{
   2273    return vdev->vq[n].vring.num_default;
   2274}
   2275
   2276int virtio_get_num_queues(VirtIODevice *vdev)
   2277{
   2278    int i;
   2279
   2280    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   2281        if (!virtio_queue_get_num(vdev, i)) {
   2282            break;
   2283        }
   2284    }
   2285
   2286    return i;
   2287}
   2288
   2289void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
   2290{
   2291    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   2292    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   2293
   2294    /* virtio-1 compliant devices cannot change the alignment */
   2295    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   2296        error_report("tried to modify queue alignment for virtio-1 device");
   2297        return;
   2298    }
   2299    /* Check that the transport told us it was going to do this
   2300     * (so a buggy transport will immediately assert rather than
   2301     * silently failing to migrate this state)
   2302     */
   2303    assert(k->has_variable_vring_alignment);
   2304
   2305    if (align) {
   2306        vdev->vq[n].vring.align = align;
   2307        virtio_queue_update_rings(vdev, n);
   2308    }
   2309}
   2310
   2311static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
   2312{
   2313    bool ret = false;
   2314
   2315    if (vq->vring.desc && vq->handle_aio_output) {
   2316        VirtIODevice *vdev = vq->vdev;
   2317
   2318        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
   2319        ret = vq->handle_aio_output(vdev, vq);
   2320
   2321        if (unlikely(vdev->start_on_kick)) {
   2322            virtio_set_started(vdev, true);
   2323        }
   2324    }
   2325
   2326    return ret;
   2327}
   2328
   2329static void virtio_queue_notify_vq(VirtQueue *vq)
   2330{
   2331    if (vq->vring.desc && vq->handle_output) {
   2332        VirtIODevice *vdev = vq->vdev;
   2333
   2334        if (unlikely(vdev->broken)) {
   2335            return;
   2336        }
   2337
   2338        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
   2339        vq->handle_output(vdev, vq);
   2340
   2341        if (unlikely(vdev->start_on_kick)) {
   2342            virtio_set_started(vdev, true);
   2343        }
   2344    }
   2345}
   2346
   2347void virtio_queue_notify(VirtIODevice *vdev, int n)
   2348{
   2349    VirtQueue *vq = &vdev->vq[n];
   2350
   2351    if (unlikely(!vq->vring.desc || vdev->broken)) {
   2352        return;
   2353    }
   2354
   2355    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
   2356    if (vq->host_notifier_enabled) {
   2357        event_notifier_set(&vq->host_notifier);
   2358    } else if (vq->handle_output) {
   2359        vq->handle_output(vdev, vq);
   2360
   2361        if (unlikely(vdev->start_on_kick)) {
   2362            virtio_set_started(vdev, true);
   2363        }
   2364    }
   2365}
   2366
   2367uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
   2368{
   2369    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
   2370        VIRTIO_NO_VECTOR;
   2371}
   2372
   2373void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
   2374{
   2375    VirtQueue *vq = &vdev->vq[n];
   2376
   2377    if (n < VIRTIO_QUEUE_MAX) {
   2378        if (vdev->vector_queues &&
   2379            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
   2380            QLIST_REMOVE(vq, node);
   2381        }
   2382        vdev->vq[n].vector = vector;
   2383        if (vdev->vector_queues &&
   2384            vector != VIRTIO_NO_VECTOR) {
   2385            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
   2386        }
   2387    }
   2388}
   2389
   2390VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
   2391                            VirtIOHandleOutput handle_output)
   2392{
   2393    int i;
   2394
   2395    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   2396        if (vdev->vq[i].vring.num == 0)
   2397            break;
   2398    }
   2399
   2400    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
   2401        abort();
   2402
   2403    vdev->vq[i].vring.num = queue_size;
   2404    vdev->vq[i].vring.num_default = queue_size;
   2405    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
   2406    vdev->vq[i].handle_output = handle_output;
   2407    vdev->vq[i].handle_aio_output = NULL;
   2408    vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
   2409                                       queue_size);
   2410
   2411    return &vdev->vq[i];
   2412}
   2413
   2414void virtio_delete_queue(VirtQueue *vq)
   2415{
   2416    vq->vring.num = 0;
   2417    vq->vring.num_default = 0;
   2418    vq->handle_output = NULL;
   2419    vq->handle_aio_output = NULL;
   2420    g_free(vq->used_elems);
   2421    vq->used_elems = NULL;
   2422    virtio_virtqueue_reset_region_cache(vq);
   2423}
   2424
   2425void virtio_del_queue(VirtIODevice *vdev, int n)
   2426{
   2427    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
   2428        abort();
   2429    }
   2430
   2431    virtio_delete_queue(&vdev->vq[n]);
   2432}
   2433
   2434static void virtio_set_isr(VirtIODevice *vdev, int value)
   2435{
   2436    uint8_t old = qatomic_read(&vdev->isr);
   2437
   2438    /* Do not write ISR if it does not change, so that its cacheline remains
   2439     * shared in the common case where the guest does not read it.
   2440     */
   2441    if ((old & value) != value) {
   2442        qatomic_or(&vdev->isr, value);
   2443    }
   2444}
   2445
   2446/* Called within rcu_read_lock(). */
   2447static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
   2448{
   2449    uint16_t old, new;
   2450    bool v;
   2451    /* We need to expose used array entries before checking used event. */
   2452    smp_mb();
   2453    /* Always notify when queue is empty (when feature acknowledge) */
   2454    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
   2455        !vq->inuse && virtio_queue_empty(vq)) {
   2456        return true;
   2457    }
   2458
   2459    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
   2460        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
   2461    }
   2462
   2463    v = vq->signalled_used_valid;
   2464    vq->signalled_used_valid = true;
   2465    old = vq->signalled_used;
   2466    new = vq->signalled_used = vq->used_idx;
   2467    return !v || vring_need_event(vring_get_used_event(vq), new, old);
   2468}
   2469
   2470static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
   2471                                    uint16_t off_wrap, uint16_t new,
   2472                                    uint16_t old)
   2473{
   2474    int off = off_wrap & ~(1 << 15);
   2475
   2476    if (wrap != off_wrap >> 15) {
   2477        off -= vq->vring.num;
   2478    }
   2479
   2480    return vring_need_event(off, new, old);
   2481}
   2482
   2483/* Called within rcu_read_lock(). */
   2484static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
   2485{
   2486    VRingPackedDescEvent e;
   2487    uint16_t old, new;
   2488    bool v;
   2489    VRingMemoryRegionCaches *caches;
   2490
   2491    caches = vring_get_region_caches(vq);
   2492    if (!caches) {
   2493        return false;
   2494    }
   2495
   2496    vring_packed_event_read(vdev, &caches->avail, &e);
   2497
   2498    old = vq->signalled_used;
   2499    new = vq->signalled_used = vq->used_idx;
   2500    v = vq->signalled_used_valid;
   2501    vq->signalled_used_valid = true;
   2502
   2503    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
   2504        return false;
   2505    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
   2506        return true;
   2507    }
   2508
   2509    return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
   2510                                         e.off_wrap, new, old);
   2511}
   2512
   2513/* Called within rcu_read_lock().  */
   2514static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
   2515{
   2516    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   2517        return virtio_packed_should_notify(vdev, vq);
   2518    } else {
   2519        return virtio_split_should_notify(vdev, vq);
   2520    }
   2521}
   2522
   2523void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
   2524{
   2525    WITH_RCU_READ_LOCK_GUARD() {
   2526        if (!virtio_should_notify(vdev, vq)) {
   2527            return;
   2528        }
   2529    }
   2530
   2531    trace_virtio_notify_irqfd(vdev, vq);
   2532
   2533    /*
   2534     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
   2535     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
   2536     * incorrectly polling this bit during crashdump and hibernation
   2537     * in MSI mode, causing a hang if this bit is never updated.
   2538     * Recent releases of Windows do not really shut down, but rather
   2539     * log out and hibernate to make the next startup faster.  Hence,
   2540     * this manifested as a more serious hang during shutdown with
   2541     *
   2542     * Next driver release from 2016 fixed this problem, so working around it
   2543     * is not a must, but it's easy to do so let's do it here.
   2544     *
   2545     * Note: it's safe to update ISR from any thread as it was switched
   2546     * to an atomic operation.
   2547     */
   2548    virtio_set_isr(vq->vdev, 0x1);
   2549    event_notifier_set(&vq->guest_notifier);
   2550}
   2551
   2552static void virtio_irq(VirtQueue *vq)
   2553{
   2554    virtio_set_isr(vq->vdev, 0x1);
   2555    virtio_notify_vector(vq->vdev, vq->vector);
   2556}
   2557
   2558void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
   2559{
   2560    WITH_RCU_READ_LOCK_GUARD() {
   2561        if (!virtio_should_notify(vdev, vq)) {
   2562            return;
   2563        }
   2564    }
   2565
   2566    trace_virtio_notify(vdev, vq);
   2567    virtio_irq(vq);
   2568}
   2569
   2570void virtio_notify_config(VirtIODevice *vdev)
   2571{
   2572    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
   2573        return;
   2574
   2575    virtio_set_isr(vdev, 0x3);
   2576    vdev->generation++;
   2577    virtio_notify_vector(vdev, vdev->config_vector);
   2578}
   2579
   2580static bool virtio_device_endian_needed(void *opaque)
   2581{
   2582    VirtIODevice *vdev = opaque;
   2583
   2584    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
   2585    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   2586        return vdev->device_endian != virtio_default_endian();
   2587    }
   2588    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
   2589    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
   2590}
   2591
   2592static bool virtio_64bit_features_needed(void *opaque)
   2593{
   2594    VirtIODevice *vdev = opaque;
   2595
   2596    return (vdev->host_features >> 32) != 0;
   2597}
   2598
   2599static bool virtio_virtqueue_needed(void *opaque)
   2600{
   2601    VirtIODevice *vdev = opaque;
   2602
   2603    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
   2604}
   2605
   2606static bool virtio_packed_virtqueue_needed(void *opaque)
   2607{
   2608    VirtIODevice *vdev = opaque;
   2609
   2610    return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
   2611}
   2612
   2613static bool virtio_ringsize_needed(void *opaque)
   2614{
   2615    VirtIODevice *vdev = opaque;
   2616    int i;
   2617
   2618    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   2619        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
   2620            return true;
   2621        }
   2622    }
   2623    return false;
   2624}
   2625
   2626static bool virtio_extra_state_needed(void *opaque)
   2627{
   2628    VirtIODevice *vdev = opaque;
   2629    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   2630    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   2631
   2632    return k->has_extra_state &&
   2633        k->has_extra_state(qbus->parent);
   2634}
   2635
   2636static bool virtio_broken_needed(void *opaque)
   2637{
   2638    VirtIODevice *vdev = opaque;
   2639
   2640    return vdev->broken;
   2641}
   2642
   2643static bool virtio_started_needed(void *opaque)
   2644{
   2645    VirtIODevice *vdev = opaque;
   2646
   2647    return vdev->started;
   2648}
   2649
   2650static bool virtio_disabled_needed(void *opaque)
   2651{
   2652    VirtIODevice *vdev = opaque;
   2653
   2654    return vdev->disabled;
   2655}
   2656
   2657static const VMStateDescription vmstate_virtqueue = {
   2658    .name = "virtqueue_state",
   2659    .version_id = 1,
   2660    .minimum_version_id = 1,
   2661    .fields = (VMStateField[]) {
   2662        VMSTATE_UINT64(vring.avail, struct VirtQueue),
   2663        VMSTATE_UINT64(vring.used, struct VirtQueue),
   2664        VMSTATE_END_OF_LIST()
   2665    }
   2666};
   2667
   2668static const VMStateDescription vmstate_packed_virtqueue = {
   2669    .name = "packed_virtqueue_state",
   2670    .version_id = 1,
   2671    .minimum_version_id = 1,
   2672    .fields = (VMStateField[]) {
   2673        VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
   2674        VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
   2675        VMSTATE_UINT16(used_idx, struct VirtQueue),
   2676        VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
   2677        VMSTATE_UINT32(inuse, struct VirtQueue),
   2678        VMSTATE_END_OF_LIST()
   2679    }
   2680};
   2681
   2682static const VMStateDescription vmstate_virtio_virtqueues = {
   2683    .name = "virtio/virtqueues",
   2684    .version_id = 1,
   2685    .minimum_version_id = 1,
   2686    .needed = &virtio_virtqueue_needed,
   2687    .fields = (VMStateField[]) {
   2688        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
   2689                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
   2690        VMSTATE_END_OF_LIST()
   2691    }
   2692};
   2693
   2694static const VMStateDescription vmstate_virtio_packed_virtqueues = {
   2695    .name = "virtio/packed_virtqueues",
   2696    .version_id = 1,
   2697    .minimum_version_id = 1,
   2698    .needed = &virtio_packed_virtqueue_needed,
   2699    .fields = (VMStateField[]) {
   2700        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
   2701                      VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
   2702        VMSTATE_END_OF_LIST()
   2703    }
   2704};
   2705
   2706static const VMStateDescription vmstate_ringsize = {
   2707    .name = "ringsize_state",
   2708    .version_id = 1,
   2709    .minimum_version_id = 1,
   2710    .fields = (VMStateField[]) {
   2711        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
   2712        VMSTATE_END_OF_LIST()
   2713    }
   2714};
   2715
   2716static const VMStateDescription vmstate_virtio_ringsize = {
   2717    .name = "virtio/ringsize",
   2718    .version_id = 1,
   2719    .minimum_version_id = 1,
   2720    .needed = &virtio_ringsize_needed,
   2721    .fields = (VMStateField[]) {
   2722        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
   2723                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
   2724        VMSTATE_END_OF_LIST()
   2725    }
   2726};
   2727
   2728static int get_extra_state(QEMUFile *f, void *pv, size_t size,
   2729                           const VMStateField *field)
   2730{
   2731    VirtIODevice *vdev = pv;
   2732    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   2733    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   2734
   2735    if (!k->load_extra_state) {
   2736        return -1;
   2737    } else {
   2738        return k->load_extra_state(qbus->parent, f);
   2739    }
   2740}
   2741
   2742static int put_extra_state(QEMUFile *f, void *pv, size_t size,
   2743                           const VMStateField *field, JSONWriter *vmdesc)
   2744{
   2745    VirtIODevice *vdev = pv;
   2746    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   2747    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   2748
   2749    k->save_extra_state(qbus->parent, f);
   2750    return 0;
   2751}
   2752
   2753static const VMStateInfo vmstate_info_extra_state = {
   2754    .name = "virtqueue_extra_state",
   2755    .get = get_extra_state,
   2756    .put = put_extra_state,
   2757};
   2758
   2759static const VMStateDescription vmstate_virtio_extra_state = {
   2760    .name = "virtio/extra_state",
   2761    .version_id = 1,
   2762    .minimum_version_id = 1,
   2763    .needed = &virtio_extra_state_needed,
   2764    .fields = (VMStateField[]) {
   2765        {
   2766            .name         = "extra_state",
   2767            .version_id   = 0,
   2768            .field_exists = NULL,
   2769            .size         = 0,
   2770            .info         = &vmstate_info_extra_state,
   2771            .flags        = VMS_SINGLE,
   2772            .offset       = 0,
   2773        },
   2774        VMSTATE_END_OF_LIST()
   2775    }
   2776};
   2777
   2778static const VMStateDescription vmstate_virtio_device_endian = {
   2779    .name = "virtio/device_endian",
   2780    .version_id = 1,
   2781    .minimum_version_id = 1,
   2782    .needed = &virtio_device_endian_needed,
   2783    .fields = (VMStateField[]) {
   2784        VMSTATE_UINT8(device_endian, VirtIODevice),
   2785        VMSTATE_END_OF_LIST()
   2786    }
   2787};
   2788
   2789static const VMStateDescription vmstate_virtio_64bit_features = {
   2790    .name = "virtio/64bit_features",
   2791    .version_id = 1,
   2792    .minimum_version_id = 1,
   2793    .needed = &virtio_64bit_features_needed,
   2794    .fields = (VMStateField[]) {
   2795        VMSTATE_UINT64(guest_features, VirtIODevice),
   2796        VMSTATE_END_OF_LIST()
   2797    }
   2798};
   2799
   2800static const VMStateDescription vmstate_virtio_broken = {
   2801    .name = "virtio/broken",
   2802    .version_id = 1,
   2803    .minimum_version_id = 1,
   2804    .needed = &virtio_broken_needed,
   2805    .fields = (VMStateField[]) {
   2806        VMSTATE_BOOL(broken, VirtIODevice),
   2807        VMSTATE_END_OF_LIST()
   2808    }
   2809};
   2810
   2811static const VMStateDescription vmstate_virtio_started = {
   2812    .name = "virtio/started",
   2813    .version_id = 1,
   2814    .minimum_version_id = 1,
   2815    .needed = &virtio_started_needed,
   2816    .fields = (VMStateField[]) {
   2817        VMSTATE_BOOL(started, VirtIODevice),
   2818        VMSTATE_END_OF_LIST()
   2819    }
   2820};
   2821
   2822static const VMStateDescription vmstate_virtio_disabled = {
   2823    .name = "virtio/disabled",
   2824    .version_id = 1,
   2825    .minimum_version_id = 1,
   2826    .needed = &virtio_disabled_needed,
   2827    .fields = (VMStateField[]) {
   2828        VMSTATE_BOOL(disabled, VirtIODevice),
   2829        VMSTATE_END_OF_LIST()
   2830    }
   2831};
   2832
   2833static const VMStateDescription vmstate_virtio = {
   2834    .name = "virtio",
   2835    .version_id = 1,
   2836    .minimum_version_id = 1,
   2837    .minimum_version_id_old = 1,
   2838    .fields = (VMStateField[]) {
   2839        VMSTATE_END_OF_LIST()
   2840    },
   2841    .subsections = (const VMStateDescription*[]) {
   2842        &vmstate_virtio_device_endian,
   2843        &vmstate_virtio_64bit_features,
   2844        &vmstate_virtio_virtqueues,
   2845        &vmstate_virtio_ringsize,
   2846        &vmstate_virtio_broken,
   2847        &vmstate_virtio_extra_state,
   2848        &vmstate_virtio_started,
   2849        &vmstate_virtio_packed_virtqueues,
   2850        &vmstate_virtio_disabled,
   2851        NULL
   2852    }
   2853};
   2854
   2855int virtio_save(VirtIODevice *vdev, QEMUFile *f)
   2856{
   2857    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   2858    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   2859    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
   2860    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
   2861    int i;
   2862
   2863    if (k->save_config) {
   2864        k->save_config(qbus->parent, f);
   2865    }
   2866
   2867    qemu_put_8s(f, &vdev->status);
   2868    qemu_put_8s(f, &vdev->isr);
   2869    qemu_put_be16s(f, &vdev->queue_sel);
   2870    qemu_put_be32s(f, &guest_features_lo);
   2871    qemu_put_be32(f, vdev->config_len);
   2872    qemu_put_buffer(f, vdev->config, vdev->config_len);
   2873
   2874    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   2875        if (vdev->vq[i].vring.num == 0)
   2876            break;
   2877    }
   2878
   2879    qemu_put_be32(f, i);
   2880
   2881    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   2882        if (vdev->vq[i].vring.num == 0)
   2883            break;
   2884
   2885        qemu_put_be32(f, vdev->vq[i].vring.num);
   2886        if (k->has_variable_vring_alignment) {
   2887            qemu_put_be32(f, vdev->vq[i].vring.align);
   2888        }
   2889        /*
   2890         * Save desc now, the rest of the ring addresses are saved in
   2891         * subsections for VIRTIO-1 devices.
   2892         */
   2893        qemu_put_be64(f, vdev->vq[i].vring.desc);
   2894        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
   2895        if (k->save_queue) {
   2896            k->save_queue(qbus->parent, i, f);
   2897        }
   2898    }
   2899
   2900    if (vdc->save != NULL) {
   2901        vdc->save(vdev, f);
   2902    }
   2903
   2904    if (vdc->vmsd) {
   2905        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
   2906        if (ret) {
   2907            return ret;
   2908        }
   2909    }
   2910
   2911    /* Subsections */
   2912    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
   2913}
   2914
   2915/* A wrapper for use as a VMState .put function */
   2916static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
   2917                              const VMStateField *field, JSONWriter *vmdesc)
   2918{
   2919    return virtio_save(VIRTIO_DEVICE(opaque), f);
   2920}
   2921
   2922/* A wrapper for use as a VMState .get function */
   2923static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
   2924                             const VMStateField *field)
   2925{
   2926    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
   2927    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
   2928
   2929    return virtio_load(vdev, f, dc->vmsd->version_id);
   2930}
   2931
   2932const VMStateInfo  virtio_vmstate_info = {
   2933    .name = "virtio",
   2934    .get = virtio_device_get,
   2935    .put = virtio_device_put,
   2936};
   2937
   2938static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
   2939{
   2940    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
   2941    bool bad = (val & ~(vdev->host_features)) != 0;
   2942
   2943    val &= vdev->host_features;
   2944    if (k->set_features) {
   2945        k->set_features(vdev, val);
   2946    }
   2947    vdev->guest_features = val;
   2948    return bad ? -1 : 0;
   2949}
   2950
   2951int virtio_set_features(VirtIODevice *vdev, uint64_t val)
   2952{
   2953    int ret;
   2954    /*
   2955     * The driver must not attempt to set features after feature negotiation
   2956     * has finished.
   2957     */
   2958    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
   2959        return -EINVAL;
   2960    }
   2961    ret = virtio_set_features_nocheck(vdev, val);
   2962    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
   2963        /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches.  */
   2964        int i;
   2965        for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   2966            if (vdev->vq[i].vring.num != 0) {
   2967                virtio_init_region_cache(vdev, i);
   2968            }
   2969        }
   2970    }
   2971    if (!ret) {
   2972        if (!virtio_device_started(vdev, vdev->status) &&
   2973            !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   2974            vdev->start_on_kick = true;
   2975        }
   2976    }
   2977    return ret;
   2978}
   2979
   2980size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
   2981                                      uint64_t host_features)
   2982{
   2983    size_t config_size = 0;
   2984    int i;
   2985
   2986    for (i = 0; feature_sizes[i].flags != 0; i++) {
   2987        if (host_features & feature_sizes[i].flags) {
   2988            config_size = MAX(feature_sizes[i].end, config_size);
   2989        }
   2990    }
   2991
   2992    return config_size;
   2993}
   2994
   2995int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
   2996{
   2997    int i, ret;
   2998    int32_t config_len;
   2999    uint32_t num;
   3000    uint32_t features;
   3001    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3002    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   3003    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
   3004
   3005    /*
   3006     * We poison the endianness to ensure it does not get used before
   3007     * subsections have been loaded.
   3008     */
   3009    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
   3010
   3011    if (k->load_config) {
   3012        ret = k->load_config(qbus->parent, f);
   3013        if (ret)
   3014            return ret;
   3015    }
   3016
   3017    qemu_get_8s(f, &vdev->status);
   3018    qemu_get_8s(f, &vdev->isr);
   3019    qemu_get_be16s(f, &vdev->queue_sel);
   3020    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
   3021        return -1;
   3022    }
   3023    qemu_get_be32s(f, &features);
   3024
   3025    /*
   3026     * Temporarily set guest_features low bits - needed by
   3027     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
   3028     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
   3029     *
   3030     * Note: devices should always test host features in future - don't create
   3031     * new dependencies like this.
   3032     */
   3033    vdev->guest_features = features;
   3034
   3035    config_len = qemu_get_be32(f);
   3036
   3037    /*
   3038     * There are cases where the incoming config can be bigger or smaller
   3039     * than what we have; so load what we have space for, and skip
   3040     * any excess that's in the stream.
   3041     */
   3042    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
   3043
   3044    while (config_len > vdev->config_len) {
   3045        qemu_get_byte(f);
   3046        config_len--;
   3047    }
   3048
   3049    num = qemu_get_be32(f);
   3050
   3051    if (num > VIRTIO_QUEUE_MAX) {
   3052        error_report("Invalid number of virtqueues: 0x%x", num);
   3053        return -1;
   3054    }
   3055
   3056    for (i = 0; i < num; i++) {
   3057        vdev->vq[i].vring.num = qemu_get_be32(f);
   3058        if (k->has_variable_vring_alignment) {
   3059            vdev->vq[i].vring.align = qemu_get_be32(f);
   3060        }
   3061        vdev->vq[i].vring.desc = qemu_get_be64(f);
   3062        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
   3063        vdev->vq[i].signalled_used_valid = false;
   3064        vdev->vq[i].notification = true;
   3065
   3066        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
   3067            error_report("VQ %d address 0x0 "
   3068                         "inconsistent with Host index 0x%x",
   3069                         i, vdev->vq[i].last_avail_idx);
   3070            return -1;
   3071        }
   3072        if (k->load_queue) {
   3073            ret = k->load_queue(qbus->parent, i, f);
   3074            if (ret)
   3075                return ret;
   3076        }
   3077    }
   3078
   3079    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
   3080
   3081    if (vdc->load != NULL) {
   3082        ret = vdc->load(vdev, f, version_id);
   3083        if (ret) {
   3084            return ret;
   3085        }
   3086    }
   3087
   3088    if (vdc->vmsd) {
   3089        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
   3090        if (ret) {
   3091            return ret;
   3092        }
   3093    }
   3094
   3095    /* Subsections */
   3096    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
   3097    if (ret) {
   3098        return ret;
   3099    }
   3100
   3101    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
   3102        vdev->device_endian = virtio_default_endian();
   3103    }
   3104
   3105    if (virtio_64bit_features_needed(vdev)) {
   3106        /*
   3107         * Subsection load filled vdev->guest_features.  Run them
   3108         * through virtio_set_features to sanity-check them against
   3109         * host_features.
   3110         */
   3111        uint64_t features64 = vdev->guest_features;
   3112        if (virtio_set_features_nocheck(vdev, features64) < 0) {
   3113            error_report("Features 0x%" PRIx64 " unsupported. "
   3114                         "Allowed features: 0x%" PRIx64,
   3115                         features64, vdev->host_features);
   3116            return -1;
   3117        }
   3118    } else {
   3119        if (virtio_set_features_nocheck(vdev, features) < 0) {
   3120            error_report("Features 0x%x unsupported. "
   3121                         "Allowed features: 0x%" PRIx64,
   3122                         features, vdev->host_features);
   3123            return -1;
   3124        }
   3125    }
   3126
   3127    if (!virtio_device_started(vdev, vdev->status) &&
   3128        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   3129        vdev->start_on_kick = true;
   3130    }
   3131
   3132    RCU_READ_LOCK_GUARD();
   3133    for (i = 0; i < num; i++) {
   3134        if (vdev->vq[i].vring.desc) {
   3135            uint16_t nheads;
   3136
   3137            /*
   3138             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
   3139             * only the region cache needs to be set up.  Legacy devices need
   3140             * to calculate used and avail ring addresses based on the desc
   3141             * address.
   3142             */
   3143            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   3144                virtio_init_region_cache(vdev, i);
   3145            } else {
   3146                virtio_queue_update_rings(vdev, i);
   3147            }
   3148
   3149            if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   3150                vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
   3151                vdev->vq[i].shadow_avail_wrap_counter =
   3152                                        vdev->vq[i].last_avail_wrap_counter;
   3153                continue;
   3154            }
   3155
   3156            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
   3157            /* Check it isn't doing strange things with descriptor numbers. */
   3158            if (nheads > vdev->vq[i].vring.num) {
   3159                virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
   3160                             "inconsistent with Host index 0x%x: delta 0x%x",
   3161                             i, vdev->vq[i].vring.num,
   3162                             vring_avail_idx(&vdev->vq[i]),
   3163                             vdev->vq[i].last_avail_idx, nheads);
   3164                vdev->vq[i].used_idx = 0;
   3165                vdev->vq[i].shadow_avail_idx = 0;
   3166                vdev->vq[i].inuse = 0;
   3167                continue;
   3168            }
   3169            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
   3170            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
   3171
   3172            /*
   3173             * Some devices migrate VirtQueueElements that have been popped
   3174             * from the avail ring but not yet returned to the used ring.
   3175             * Since max ring size < UINT16_MAX it's safe to use modulo
   3176             * UINT16_MAX + 1 subtraction.
   3177             */
   3178            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
   3179                                vdev->vq[i].used_idx);
   3180            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
   3181                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
   3182                             "used_idx 0x%x",
   3183                             i, vdev->vq[i].vring.num,
   3184                             vdev->vq[i].last_avail_idx,
   3185                             vdev->vq[i].used_idx);
   3186                return -1;
   3187            }
   3188        }
   3189    }
   3190
   3191    if (vdc->post_load) {
   3192        ret = vdc->post_load(vdev);
   3193        if (ret) {
   3194            return ret;
   3195        }
   3196    }
   3197
   3198    return 0;
   3199}
   3200
   3201void virtio_cleanup(VirtIODevice *vdev)
   3202{
   3203    qemu_del_vm_change_state_handler(vdev->vmstate);
   3204}
   3205
   3206static void virtio_vmstate_change(void *opaque, bool running, RunState state)
   3207{
   3208    VirtIODevice *vdev = opaque;
   3209    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3210    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   3211    bool backend_run = running && virtio_device_started(vdev, vdev->status);
   3212    vdev->vm_running = running;
   3213
   3214    if (backend_run) {
   3215        virtio_set_status(vdev, vdev->status);
   3216    }
   3217
   3218    if (k->vmstate_change) {
   3219        k->vmstate_change(qbus->parent, backend_run);
   3220    }
   3221
   3222    if (!backend_run) {
   3223        virtio_set_status(vdev, vdev->status);
   3224    }
   3225}
   3226
   3227void virtio_instance_init_common(Object *proxy_obj, void *data,
   3228                                 size_t vdev_size, const char *vdev_name)
   3229{
   3230    DeviceState *vdev = data;
   3231
   3232    object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
   3233                                       vdev_size, vdev_name, &error_abort,
   3234                                       NULL);
   3235    qdev_alias_all_properties(vdev, proxy_obj);
   3236}
   3237
   3238void virtio_init(VirtIODevice *vdev, const char *name,
   3239                 uint16_t device_id, size_t config_size)
   3240{
   3241    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3242    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   3243    int i;
   3244    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
   3245
   3246    if (nvectors) {
   3247        vdev->vector_queues =
   3248            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
   3249    }
   3250
   3251    vdev->start_on_kick = false;
   3252    vdev->started = false;
   3253    vdev->device_id = device_id;
   3254    vdev->status = 0;
   3255    qatomic_set(&vdev->isr, 0);
   3256    vdev->queue_sel = 0;
   3257    vdev->config_vector = VIRTIO_NO_VECTOR;
   3258    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
   3259    vdev->vm_running = runstate_is_running();
   3260    vdev->broken = false;
   3261    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   3262        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
   3263        vdev->vq[i].vdev = vdev;
   3264        vdev->vq[i].queue_index = i;
   3265        vdev->vq[i].host_notifier_enabled = false;
   3266    }
   3267
   3268    vdev->name = name;
   3269    vdev->config_len = config_size;
   3270    if (vdev->config_len) {
   3271        vdev->config = g_malloc0(config_size);
   3272    } else {
   3273        vdev->config = NULL;
   3274    }
   3275    vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
   3276            virtio_vmstate_change, vdev);
   3277    vdev->device_endian = virtio_default_endian();
   3278    vdev->use_guest_notifier_mask = true;
   3279}
   3280
   3281/*
   3282 * Only devices that have already been around prior to defining the virtio
   3283 * standard support legacy mode; this includes devices not specified in the
   3284 * standard. All newer devices conform to the virtio standard only.
   3285 */
   3286bool virtio_legacy_allowed(VirtIODevice *vdev)
   3287{
   3288    switch (vdev->device_id) {
   3289    case VIRTIO_ID_NET:
   3290    case VIRTIO_ID_BLOCK:
   3291    case VIRTIO_ID_CONSOLE:
   3292    case VIRTIO_ID_RNG:
   3293    case VIRTIO_ID_BALLOON:
   3294    case VIRTIO_ID_RPMSG:
   3295    case VIRTIO_ID_SCSI:
   3296    case VIRTIO_ID_9P:
   3297    case VIRTIO_ID_RPROC_SERIAL:
   3298    case VIRTIO_ID_CAIF:
   3299        return true;
   3300    default:
   3301        return false;
   3302    }
   3303}
   3304
   3305bool virtio_legacy_check_disabled(VirtIODevice *vdev)
   3306{
   3307    return vdev->disable_legacy_check;
   3308}
   3309
   3310hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
   3311{
   3312    return vdev->vq[n].vring.desc;
   3313}
   3314
   3315bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
   3316{
   3317    return virtio_queue_get_desc_addr(vdev, n) != 0;
   3318}
   3319
   3320bool virtio_queue_enabled(VirtIODevice *vdev, int n)
   3321{
   3322    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3323    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   3324
   3325    if (k->queue_enabled) {
   3326        return k->queue_enabled(qbus->parent, n);
   3327    }
   3328    return virtio_queue_enabled_legacy(vdev, n);
   3329}
   3330
   3331hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
   3332{
   3333    return vdev->vq[n].vring.avail;
   3334}
   3335
   3336hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
   3337{
   3338    return vdev->vq[n].vring.used;
   3339}
   3340
   3341hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
   3342{
   3343    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
   3344}
   3345
   3346hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
   3347{
   3348    int s;
   3349
   3350    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   3351        return sizeof(struct VRingPackedDescEvent);
   3352    }
   3353
   3354    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
   3355    return offsetof(VRingAvail, ring) +
   3356        sizeof(uint16_t) * vdev->vq[n].vring.num + s;
   3357}
   3358
   3359hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
   3360{
   3361    int s;
   3362
   3363    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   3364        return sizeof(struct VRingPackedDescEvent);
   3365    }
   3366
   3367    s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
   3368    return offsetof(VRingUsed, ring) +
   3369        sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
   3370}
   3371
   3372static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
   3373                                                           int n)
   3374{
   3375    unsigned int avail, used;
   3376
   3377    avail = vdev->vq[n].last_avail_idx;
   3378    avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
   3379
   3380    used = vdev->vq[n].used_idx;
   3381    used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
   3382
   3383    return avail | used << 16;
   3384}
   3385
   3386static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
   3387                                                      int n)
   3388{
   3389    return vdev->vq[n].last_avail_idx;
   3390}
   3391
   3392unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
   3393{
   3394    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   3395        return virtio_queue_packed_get_last_avail_idx(vdev, n);
   3396    } else {
   3397        return virtio_queue_split_get_last_avail_idx(vdev, n);
   3398    }
   3399}
   3400
   3401static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
   3402                                                   int n, unsigned int idx)
   3403{
   3404    struct VirtQueue *vq = &vdev->vq[n];
   3405
   3406    vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
   3407    vq->last_avail_wrap_counter =
   3408        vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
   3409    idx >>= 16;
   3410    vq->used_idx = idx & 0x7ffff;
   3411    vq->used_wrap_counter = !!(idx & 0x8000);
   3412}
   3413
   3414static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
   3415                                                  int n, unsigned int idx)
   3416{
   3417        vdev->vq[n].last_avail_idx = idx;
   3418        vdev->vq[n].shadow_avail_idx = idx;
   3419}
   3420
   3421void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
   3422                                     unsigned int idx)
   3423{
   3424    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   3425        virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
   3426    } else {
   3427        virtio_queue_split_set_last_avail_idx(vdev, n, idx);
   3428    }
   3429}
   3430
   3431static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
   3432                                                       int n)
   3433{
   3434    /* We don't have a reference like avail idx in shared memory */
   3435    return;
   3436}
   3437
   3438static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
   3439                                                      int n)
   3440{
   3441    RCU_READ_LOCK_GUARD();
   3442    if (vdev->vq[n].vring.desc) {
   3443        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
   3444        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
   3445    }
   3446}
   3447
   3448void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
   3449{
   3450    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   3451        virtio_queue_packed_restore_last_avail_idx(vdev, n);
   3452    } else {
   3453        virtio_queue_split_restore_last_avail_idx(vdev, n);
   3454    }
   3455}
   3456
   3457static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
   3458{
   3459    /* used idx was updated through set_last_avail_idx() */
   3460    return;
   3461}
   3462
   3463static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
   3464{
   3465    RCU_READ_LOCK_GUARD();
   3466    if (vdev->vq[n].vring.desc) {
   3467        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
   3468    }
   3469}
   3470
   3471void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
   3472{
   3473    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
   3474        return virtio_queue_packed_update_used_idx(vdev, n);
   3475    } else {
   3476        return virtio_split_packed_update_used_idx(vdev, n);
   3477    }
   3478}
   3479
   3480void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
   3481{
   3482    vdev->vq[n].signalled_used_valid = false;
   3483}
   3484
   3485VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
   3486{
   3487    return vdev->vq + n;
   3488}
   3489
   3490uint16_t virtio_get_queue_index(VirtQueue *vq)
   3491{
   3492    return vq->queue_index;
   3493}
   3494
   3495static void virtio_queue_guest_notifier_read(EventNotifier *n)
   3496{
   3497    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
   3498    if (event_notifier_test_and_clear(n)) {
   3499        virtio_irq(vq);
   3500    }
   3501}
   3502
   3503void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
   3504                                                bool with_irqfd)
   3505{
   3506    if (assign && !with_irqfd) {
   3507        event_notifier_set_handler(&vq->guest_notifier,
   3508                                   virtio_queue_guest_notifier_read);
   3509    } else {
   3510        event_notifier_set_handler(&vq->guest_notifier, NULL);
   3511    }
   3512    if (!assign) {
   3513        /* Test and clear notifier before closing it,
   3514         * in case poll callback didn't have time to run. */
   3515        virtio_queue_guest_notifier_read(&vq->guest_notifier);
   3516    }
   3517}
   3518
   3519EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
   3520{
   3521    return &vq->guest_notifier;
   3522}
   3523
   3524static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
   3525{
   3526    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
   3527    if (event_notifier_test_and_clear(n)) {
   3528        virtio_queue_notify_aio_vq(vq);
   3529    }
   3530}
   3531
   3532static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
   3533{
   3534    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
   3535
   3536    virtio_queue_set_notification(vq, 0);
   3537}
   3538
   3539static bool virtio_queue_host_notifier_aio_poll(void *opaque)
   3540{
   3541    EventNotifier *n = opaque;
   3542    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
   3543
   3544    if (!vq->vring.desc || virtio_queue_empty(vq)) {
   3545        return false;
   3546    }
   3547
   3548    return virtio_queue_notify_aio_vq(vq);
   3549}
   3550
   3551static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
   3552{
   3553    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
   3554
   3555    /* Caller polls once more after this to catch requests that race with us */
   3556    virtio_queue_set_notification(vq, 1);
   3557}
   3558
   3559void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
   3560                                                VirtIOHandleAIOOutput handle_output)
   3561{
   3562    if (handle_output) {
   3563        vq->handle_aio_output = handle_output;
   3564        aio_set_event_notifier(ctx, &vq->host_notifier, true,
   3565                               virtio_queue_host_notifier_aio_read,
   3566                               virtio_queue_host_notifier_aio_poll);
   3567        aio_set_event_notifier_poll(ctx, &vq->host_notifier,
   3568                                    virtio_queue_host_notifier_aio_poll_begin,
   3569                                    virtio_queue_host_notifier_aio_poll_end);
   3570    } else {
   3571        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
   3572        /* Test and clear notifier before after disabling event,
   3573         * in case poll callback didn't have time to run. */
   3574        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
   3575        vq->handle_aio_output = NULL;
   3576    }
   3577}
   3578
   3579void virtio_queue_host_notifier_read(EventNotifier *n)
   3580{
   3581    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
   3582    if (event_notifier_test_and_clear(n)) {
   3583        virtio_queue_notify_vq(vq);
   3584    }
   3585}
   3586
   3587EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
   3588{
   3589    return &vq->host_notifier;
   3590}
   3591
   3592void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
   3593{
   3594    vq->host_notifier_enabled = enabled;
   3595}
   3596
   3597int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
   3598                                      MemoryRegion *mr, bool assign)
   3599{
   3600    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3601    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
   3602
   3603    if (k->set_host_notifier_mr) {
   3604        return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
   3605    }
   3606
   3607    return -1;
   3608}
   3609
   3610void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
   3611{
   3612    g_free(vdev->bus_name);
   3613    vdev->bus_name = g_strdup(bus_name);
   3614}
   3615
   3616void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
   3617{
   3618    va_list ap;
   3619
   3620    va_start(ap, fmt);
   3621    error_vreport(fmt, ap);
   3622    va_end(ap);
   3623
   3624    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   3625        vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
   3626        virtio_notify_config(vdev);
   3627    }
   3628
   3629    vdev->broken = true;
   3630}
   3631
   3632static void virtio_memory_listener_commit(MemoryListener *listener)
   3633{
   3634    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
   3635    int i;
   3636
   3637    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   3638        if (vdev->vq[i].vring.num == 0) {
   3639            break;
   3640        }
   3641        virtio_init_region_cache(vdev, i);
   3642    }
   3643}
   3644
   3645static void virtio_device_realize(DeviceState *dev, Error **errp)
   3646{
   3647    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
   3648    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
   3649    Error *err = NULL;
   3650
   3651    /* Devices should either use vmsd or the load/save methods */
   3652    assert(!vdc->vmsd || !vdc->load);
   3653
   3654    if (vdc->realize != NULL) {
   3655        vdc->realize(dev, &err);
   3656        if (err != NULL) {
   3657            error_propagate(errp, err);
   3658            return;
   3659        }
   3660    }
   3661
   3662    virtio_bus_device_plugged(vdev, &err);
   3663    if (err != NULL) {
   3664        error_propagate(errp, err);
   3665        vdc->unrealize(dev);
   3666        return;
   3667    }
   3668
   3669    vdev->listener.commit = virtio_memory_listener_commit;
   3670    vdev->listener.name = "virtio";
   3671    memory_listener_register(&vdev->listener, vdev->dma_as);
   3672}
   3673
   3674static void virtio_device_unrealize(DeviceState *dev)
   3675{
   3676    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
   3677    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
   3678
   3679    memory_listener_unregister(&vdev->listener);
   3680    virtio_bus_device_unplugged(vdev);
   3681
   3682    if (vdc->unrealize != NULL) {
   3683        vdc->unrealize(dev);
   3684    }
   3685
   3686    g_free(vdev->bus_name);
   3687    vdev->bus_name = NULL;
   3688}
   3689
   3690static void virtio_device_free_virtqueues(VirtIODevice *vdev)
   3691{
   3692    int i;
   3693    if (!vdev->vq) {
   3694        return;
   3695    }
   3696
   3697    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   3698        if (vdev->vq[i].vring.num == 0) {
   3699            break;
   3700        }
   3701        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
   3702    }
   3703    g_free(vdev->vq);
   3704}
   3705
   3706static void virtio_device_instance_finalize(Object *obj)
   3707{
   3708    VirtIODevice *vdev = VIRTIO_DEVICE(obj);
   3709
   3710    virtio_device_free_virtqueues(vdev);
   3711
   3712    g_free(vdev->config);
   3713    g_free(vdev->vector_queues);
   3714}
   3715
   3716static Property virtio_properties[] = {
   3717    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
   3718    DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
   3719    DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
   3720    DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
   3721                     disable_legacy_check, false),
   3722    DEFINE_PROP_END_OF_LIST(),
   3723};
   3724
   3725static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
   3726{
   3727    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
   3728    int i, n, r, err;
   3729
   3730    /*
   3731     * Batch all the host notifiers in a single transaction to avoid
   3732     * quadratic time complexity in address_space_update_ioeventfds().
   3733     */
   3734    memory_region_transaction_begin();
   3735    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
   3736        VirtQueue *vq = &vdev->vq[n];
   3737        if (!virtio_queue_get_num(vdev, n)) {
   3738            continue;
   3739        }
   3740        r = virtio_bus_set_host_notifier(qbus, n, true);
   3741        if (r < 0) {
   3742            err = r;
   3743            goto assign_error;
   3744        }
   3745        event_notifier_set_handler(&vq->host_notifier,
   3746                                   virtio_queue_host_notifier_read);
   3747    }
   3748
   3749    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
   3750        /* Kick right away to begin processing requests already in vring */
   3751        VirtQueue *vq = &vdev->vq[n];
   3752        if (!vq->vring.num) {
   3753            continue;
   3754        }
   3755        event_notifier_set(&vq->host_notifier);
   3756    }
   3757    memory_region_transaction_commit();
   3758    return 0;
   3759
   3760assign_error:
   3761    i = n; /* save n for a second iteration after transaction is committed. */
   3762    while (--n >= 0) {
   3763        VirtQueue *vq = &vdev->vq[n];
   3764        if (!virtio_queue_get_num(vdev, n)) {
   3765            continue;
   3766        }
   3767
   3768        event_notifier_set_handler(&vq->host_notifier, NULL);
   3769        r = virtio_bus_set_host_notifier(qbus, n, false);
   3770        assert(r >= 0);
   3771    }
   3772    /*
   3773     * The transaction expects the ioeventfds to be open when it
   3774     * commits. Do it now, before the cleanup loop.
   3775     */
   3776    memory_region_transaction_commit();
   3777
   3778    while (--i >= 0) {
   3779        if (!virtio_queue_get_num(vdev, i)) {
   3780            continue;
   3781        }
   3782        virtio_bus_cleanup_host_notifier(qbus, i);
   3783    }
   3784    return err;
   3785}
   3786
   3787int virtio_device_start_ioeventfd(VirtIODevice *vdev)
   3788{
   3789    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3790    VirtioBusState *vbus = VIRTIO_BUS(qbus);
   3791
   3792    return virtio_bus_start_ioeventfd(vbus);
   3793}
   3794
   3795static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
   3796{
   3797    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
   3798    int n, r;
   3799
   3800    /*
   3801     * Batch all the host notifiers in a single transaction to avoid
   3802     * quadratic time complexity in address_space_update_ioeventfds().
   3803     */
   3804    memory_region_transaction_begin();
   3805    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
   3806        VirtQueue *vq = &vdev->vq[n];
   3807
   3808        if (!virtio_queue_get_num(vdev, n)) {
   3809            continue;
   3810        }
   3811        event_notifier_set_handler(&vq->host_notifier, NULL);
   3812        r = virtio_bus_set_host_notifier(qbus, n, false);
   3813        assert(r >= 0);
   3814    }
   3815    /*
   3816     * The transaction expects the ioeventfds to be open when it
   3817     * commits. Do it now, before the cleanup loop.
   3818     */
   3819    memory_region_transaction_commit();
   3820
   3821    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
   3822        if (!virtio_queue_get_num(vdev, n)) {
   3823            continue;
   3824        }
   3825        virtio_bus_cleanup_host_notifier(qbus, n);
   3826    }
   3827}
   3828
   3829int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
   3830{
   3831    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3832    VirtioBusState *vbus = VIRTIO_BUS(qbus);
   3833
   3834    return virtio_bus_grab_ioeventfd(vbus);
   3835}
   3836
   3837void virtio_device_release_ioeventfd(VirtIODevice *vdev)
   3838{
   3839    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3840    VirtioBusState *vbus = VIRTIO_BUS(qbus);
   3841
   3842    virtio_bus_release_ioeventfd(vbus);
   3843}
   3844
   3845static void virtio_device_class_init(ObjectClass *klass, void *data)
   3846{
   3847    /* Set the default value here. */
   3848    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
   3849    DeviceClass *dc = DEVICE_CLASS(klass);
   3850
   3851    dc->realize = virtio_device_realize;
   3852    dc->unrealize = virtio_device_unrealize;
   3853    dc->bus_type = TYPE_VIRTIO_BUS;
   3854    device_class_set_props(dc, virtio_properties);
   3855    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
   3856    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
   3857
   3858    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
   3859}
   3860
   3861bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
   3862{
   3863    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
   3864    VirtioBusState *vbus = VIRTIO_BUS(qbus);
   3865
   3866    return virtio_bus_ioeventfd_enabled(vbus);
   3867}
   3868
   3869static const TypeInfo virtio_device_info = {
   3870    .name = TYPE_VIRTIO_DEVICE,
   3871    .parent = TYPE_DEVICE,
   3872    .instance_size = sizeof(VirtIODevice),
   3873    .class_init = virtio_device_class_init,
   3874    .instance_finalize = virtio_device_instance_finalize,
   3875    .abstract = true,
   3876    .class_size = sizeof(VirtioDeviceClass),
   3877};
   3878
   3879static void virtio_register_types(void)
   3880{
   3881    type_register_static(&virtio_device_info);
   3882}
   3883
   3884type_init(virtio_register_types)