cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

virtio-pci.c (73024B)


      1/*
      2 * Virtio PCI Bindings
      3 *
      4 * Copyright IBM, Corp. 2007
      5 * Copyright (c) 2009 CodeSourcery
      6 *
      7 * Authors:
      8 *  Anthony Liguori   <aliguori@us.ibm.com>
      9 *  Paul Brook        <paul@codesourcery.com>
     10 *
     11 * This work is licensed under the terms of the GNU GPL, version 2.  See
     12 * the COPYING file in the top-level directory.
     13 *
     14 * Contributions after 2012-01-13 are licensed under the terms of the
     15 * GNU GPL, version 2 or (at your option) any later version.
     16 */
     17
     18#include "qemu/osdep.h"
     19
     20#include "exec/memop.h"
     21#include "standard-headers/linux/virtio_pci.h"
     22#include "hw/boards.h"
     23#include "hw/virtio/virtio.h"
     24#include "migration/qemu-file-types.h"
     25#include "hw/pci/pci.h"
     26#include "hw/pci/pci_bus.h"
     27#include "hw/qdev-properties.h"
     28#include "qapi/error.h"
     29#include "qemu/error-report.h"
     30#include "qemu/log.h"
     31#include "qemu/module.h"
     32#include "hw/pci/msi.h"
     33#include "hw/pci/msix.h"
     34#include "hw/loader.h"
     35#include "sysemu/kvm.h"
     36#include "virtio-pci.h"
     37#include "qemu/range.h"
     38#include "hw/virtio/virtio-bus.h"
     39#include "qapi/visitor.h"
     40#include "sysemu/replay.h"
     41
     42#define VIRTIO_PCI_REGION_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
     43
     44#undef VIRTIO_PCI_CONFIG
     45
     46/* The remaining space is defined by each driver as the per-driver
     47 * configuration space */
     48#define VIRTIO_PCI_CONFIG_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
     49
     50static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
     51                               VirtIOPCIProxy *dev);
     52static void virtio_pci_reset(DeviceState *qdev);
     53
     54/* virtio device */
     55/* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
     56static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
     57{
     58    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
     59}
     60
     61/* DeviceState to VirtIOPCIProxy. Note: used on datapath,
     62 * be careful and test performance if you change this.
     63 */
     64static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
     65{
     66    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
     67}
     68
     69static void virtio_pci_notify(DeviceState *d, uint16_t vector)
     70{
     71    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
     72
     73    if (msix_enabled(&proxy->pci_dev))
     74        msix_notify(&proxy->pci_dev, vector);
     75    else {
     76        VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
     77        pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1);
     78    }
     79}
     80
     81static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
     82{
     83    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     84    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
     85
     86    pci_device_save(&proxy->pci_dev, f);
     87    msix_save(&proxy->pci_dev, f);
     88    if (msix_present(&proxy->pci_dev))
     89        qemu_put_be16(f, vdev->config_vector);
     90}
     91
     92static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
     93    .name = "virtio_pci/modern_queue_state",
     94    .version_id = 1,
     95    .minimum_version_id = 1,
     96    .fields = (VMStateField[]) {
     97        VMSTATE_UINT16(num, VirtIOPCIQueue),
     98        VMSTATE_UNUSED(1), /* enabled was stored as be16 */
     99        VMSTATE_BOOL(enabled, VirtIOPCIQueue),
    100        VMSTATE_UINT32_ARRAY(desc, VirtIOPCIQueue, 2),
    101        VMSTATE_UINT32_ARRAY(avail, VirtIOPCIQueue, 2),
    102        VMSTATE_UINT32_ARRAY(used, VirtIOPCIQueue, 2),
    103        VMSTATE_END_OF_LIST()
    104    }
    105};
    106
    107static bool virtio_pci_modern_state_needed(void *opaque)
    108{
    109    VirtIOPCIProxy *proxy = opaque;
    110
    111    return virtio_pci_modern(proxy);
    112}
    113
    114static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
    115    .name = "virtio_pci/modern_state",
    116    .version_id = 1,
    117    .minimum_version_id = 1,
    118    .needed = &virtio_pci_modern_state_needed,
    119    .fields = (VMStateField[]) {
    120        VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
    121        VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
    122        VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
    123        VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
    124                             vmstate_virtio_pci_modern_queue_state,
    125                             VirtIOPCIQueue),
    126        VMSTATE_END_OF_LIST()
    127    }
    128};
    129
    130static const VMStateDescription vmstate_virtio_pci = {
    131    .name = "virtio_pci",
    132    .version_id = 1,
    133    .minimum_version_id = 1,
    134    .minimum_version_id_old = 1,
    135    .fields = (VMStateField[]) {
    136        VMSTATE_END_OF_LIST()
    137    },
    138    .subsections = (const VMStateDescription*[]) {
    139        &vmstate_virtio_pci_modern_state_sub,
    140        NULL
    141    }
    142};
    143
    144static bool virtio_pci_has_extra_state(DeviceState *d)
    145{
    146    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    147
    148    return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
    149}
    150
    151static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
    152{
    153    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    154
    155    vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
    156}
    157
    158static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
    159{
    160    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    161
    162    return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
    163}
    164
    165static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
    166{
    167    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    168    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    169
    170    if (msix_present(&proxy->pci_dev))
    171        qemu_put_be16(f, virtio_queue_vector(vdev, n));
    172}
    173
    174static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
    175{
    176    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    177    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    178
    179    int ret;
    180    ret = pci_device_load(&proxy->pci_dev, f);
    181    if (ret) {
    182        return ret;
    183    }
    184    msix_unuse_all_vectors(&proxy->pci_dev);
    185    msix_load(&proxy->pci_dev, f);
    186    if (msix_present(&proxy->pci_dev)) {
    187        qemu_get_be16s(f, &vdev->config_vector);
    188    } else {
    189        vdev->config_vector = VIRTIO_NO_VECTOR;
    190    }
    191    if (vdev->config_vector != VIRTIO_NO_VECTOR) {
    192        return msix_vector_use(&proxy->pci_dev, vdev->config_vector);
    193    }
    194    return 0;
    195}
    196
    197static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
    198{
    199    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    200    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    201
    202    uint16_t vector;
    203    if (msix_present(&proxy->pci_dev)) {
    204        qemu_get_be16s(f, &vector);
    205    } else {
    206        vector = VIRTIO_NO_VECTOR;
    207    }
    208    virtio_queue_set_vector(vdev, n, vector);
    209    if (vector != VIRTIO_NO_VECTOR) {
    210        return msix_vector_use(&proxy->pci_dev, vector);
    211    }
    212
    213    return 0;
    214}
    215
    216static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
    217{
    218    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    219
    220    return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
    221}
    222
    223#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
    224
    225static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
    226{
    227    return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
    228        QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
    229}
    230
    231static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
    232                                       int n, bool assign)
    233{
    234    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    235    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    236    VirtQueue *vq = virtio_get_queue(vdev, n);
    237    bool legacy = virtio_pci_legacy(proxy);
    238    bool modern = virtio_pci_modern(proxy);
    239    bool fast_mmio = kvm_ioeventfd_any_length_enabled();
    240    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
    241    MemoryRegion *modern_mr = &proxy->notify.mr;
    242    MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
    243    MemoryRegion *legacy_mr = &proxy->bar;
    244    hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
    245                         virtio_get_queue_index(vq);
    246    hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
    247
    248    if (assign) {
    249        if (modern) {
    250            if (fast_mmio) {
    251                memory_region_add_eventfd(modern_mr, modern_addr, 0,
    252                                          false, n, notifier);
    253            } else {
    254                memory_region_add_eventfd(modern_mr, modern_addr, 2,
    255                                          false, n, notifier);
    256            }
    257            if (modern_pio) {
    258                memory_region_add_eventfd(modern_notify_mr, 0, 2,
    259                                              true, n, notifier);
    260            }
    261        }
    262        if (legacy) {
    263            memory_region_add_eventfd(legacy_mr, legacy_addr, 2,
    264                                      true, n, notifier);
    265        }
    266    } else {
    267        if (modern) {
    268            if (fast_mmio) {
    269                memory_region_del_eventfd(modern_mr, modern_addr, 0,
    270                                          false, n, notifier);
    271            } else {
    272                memory_region_del_eventfd(modern_mr, modern_addr, 2,
    273                                          false, n, notifier);
    274            }
    275            if (modern_pio) {
    276                memory_region_del_eventfd(modern_notify_mr, 0, 2,
    277                                          true, n, notifier);
    278            }
    279        }
    280        if (legacy) {
    281            memory_region_del_eventfd(legacy_mr, legacy_addr, 2,
    282                                      true, n, notifier);
    283        }
    284    }
    285    return 0;
    286}
    287
    288static void virtio_pci_start_ioeventfd(VirtIOPCIProxy *proxy)
    289{
    290    virtio_bus_start_ioeventfd(&proxy->bus);
    291}
    292
    293static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
    294{
    295    virtio_bus_stop_ioeventfd(&proxy->bus);
    296}
    297
    298static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
    299{
    300    VirtIOPCIProxy *proxy = opaque;
    301    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    302    hwaddr pa;
    303
    304    switch (addr) {
    305    case VIRTIO_PCI_GUEST_FEATURES:
    306        /* Guest does not negotiate properly?  We have to assume nothing. */
    307        if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
    308            val = virtio_bus_get_vdev_bad_features(&proxy->bus);
    309        }
    310        virtio_set_features(vdev, val);
    311        break;
    312    case VIRTIO_PCI_QUEUE_PFN:
    313        pa = (hwaddr)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
    314        if (pa == 0) {
    315            virtio_pci_reset(DEVICE(proxy));
    316        }
    317        else
    318            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
    319        break;
    320    case VIRTIO_PCI_QUEUE_SEL:
    321        if (val < VIRTIO_QUEUE_MAX)
    322            vdev->queue_sel = val;
    323        break;
    324    case VIRTIO_PCI_QUEUE_NOTIFY:
    325        if (val < VIRTIO_QUEUE_MAX) {
    326            virtio_queue_notify(vdev, val);
    327        }
    328        break;
    329    case VIRTIO_PCI_STATUS:
    330        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
    331            virtio_pci_stop_ioeventfd(proxy);
    332        }
    333
    334        virtio_set_status(vdev, val & 0xFF);
    335
    336        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
    337            virtio_pci_start_ioeventfd(proxy);
    338        }
    339
    340        if (vdev->status == 0) {
    341            virtio_pci_reset(DEVICE(proxy));
    342        }
    343
    344        /* Linux before 2.6.34 drives the device without enabling
    345           the PCI device bus master bit. Enable it automatically
    346           for the guest. This is a PCI spec violation but so is
    347           initiating DMA with bus master bit clear. */
    348        if (val == (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER)) {
    349            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
    350                                     proxy->pci_dev.config[PCI_COMMAND] |
    351                                     PCI_COMMAND_MASTER, 1);
    352        }
    353        break;
    354    case VIRTIO_MSI_CONFIG_VECTOR:
    355        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
    356        /* Make it possible for guest to discover an error took place. */
    357        if (msix_vector_use(&proxy->pci_dev, val) < 0)
    358            val = VIRTIO_NO_VECTOR;
    359        vdev->config_vector = val;
    360        break;
    361    case VIRTIO_MSI_QUEUE_VECTOR:
    362        msix_vector_unuse(&proxy->pci_dev,
    363                          virtio_queue_vector(vdev, vdev->queue_sel));
    364        /* Make it possible for guest to discover an error took place. */
    365        if (msix_vector_use(&proxy->pci_dev, val) < 0)
    366            val = VIRTIO_NO_VECTOR;
    367        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
    368        break;
    369    default:
    370        qemu_log_mask(LOG_GUEST_ERROR,
    371                      "%s: unexpected address 0x%x value 0x%x\n",
    372                      __func__, addr, val);
    373        break;
    374    }
    375}
    376
    377static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
    378{
    379    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    380    uint32_t ret = 0xFFFFFFFF;
    381
    382    switch (addr) {
    383    case VIRTIO_PCI_HOST_FEATURES:
    384        ret = vdev->host_features;
    385        break;
    386    case VIRTIO_PCI_GUEST_FEATURES:
    387        ret = vdev->guest_features;
    388        break;
    389    case VIRTIO_PCI_QUEUE_PFN:
    390        ret = virtio_queue_get_addr(vdev, vdev->queue_sel)
    391              >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
    392        break;
    393    case VIRTIO_PCI_QUEUE_NUM:
    394        ret = virtio_queue_get_num(vdev, vdev->queue_sel);
    395        break;
    396    case VIRTIO_PCI_QUEUE_SEL:
    397        ret = vdev->queue_sel;
    398        break;
    399    case VIRTIO_PCI_STATUS:
    400        ret = vdev->status;
    401        break;
    402    case VIRTIO_PCI_ISR:
    403        /* reading from the ISR also clears it. */
    404        ret = qatomic_xchg(&vdev->isr, 0);
    405        pci_irq_deassert(&proxy->pci_dev);
    406        break;
    407    case VIRTIO_MSI_CONFIG_VECTOR:
    408        ret = vdev->config_vector;
    409        break;
    410    case VIRTIO_MSI_QUEUE_VECTOR:
    411        ret = virtio_queue_vector(vdev, vdev->queue_sel);
    412        break;
    413    default:
    414        break;
    415    }
    416
    417    return ret;
    418}
    419
    420static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
    421                                       unsigned size)
    422{
    423    VirtIOPCIProxy *proxy = opaque;
    424    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    425    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
    426    uint64_t val = 0;
    427
    428    if (vdev == NULL) {
    429        return UINT64_MAX;
    430    }
    431
    432    if (addr < config) {
    433        return virtio_ioport_read(proxy, addr);
    434    }
    435    addr -= config;
    436
    437    switch (size) {
    438    case 1:
    439        val = virtio_config_readb(vdev, addr);
    440        break;
    441    case 2:
    442        val = virtio_config_readw(vdev, addr);
    443        if (virtio_is_big_endian(vdev)) {
    444            val = bswap16(val);
    445        }
    446        break;
    447    case 4:
    448        val = virtio_config_readl(vdev, addr);
    449        if (virtio_is_big_endian(vdev)) {
    450            val = bswap32(val);
    451        }
    452        break;
    453    }
    454    return val;
    455}
    456
    457static void virtio_pci_config_write(void *opaque, hwaddr addr,
    458                                    uint64_t val, unsigned size)
    459{
    460    VirtIOPCIProxy *proxy = opaque;
    461    uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
    462    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    463
    464    if (vdev == NULL) {
    465        return;
    466    }
    467
    468    if (addr < config) {
    469        virtio_ioport_write(proxy, addr, val);
    470        return;
    471    }
    472    addr -= config;
    473    /*
    474     * Virtio-PCI is odd. Ioports are LE but config space is target native
    475     * endian.
    476     */
    477    switch (size) {
    478    case 1:
    479        virtio_config_writeb(vdev, addr, val);
    480        break;
    481    case 2:
    482        if (virtio_is_big_endian(vdev)) {
    483            val = bswap16(val);
    484        }
    485        virtio_config_writew(vdev, addr, val);
    486        break;
    487    case 4:
    488        if (virtio_is_big_endian(vdev)) {
    489            val = bswap32(val);
    490        }
    491        virtio_config_writel(vdev, addr, val);
    492        break;
    493    }
    494}
    495
    496static const MemoryRegionOps virtio_pci_config_ops = {
    497    .read = virtio_pci_config_read,
    498    .write = virtio_pci_config_write,
    499    .impl = {
    500        .min_access_size = 1,
    501        .max_access_size = 4,
    502    },
    503    .endianness = DEVICE_LITTLE_ENDIAN,
    504};
    505
    506static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy,
    507                                                 hwaddr *off, int len)
    508{
    509    int i;
    510    VirtIOPCIRegion *reg;
    511
    512    for (i = 0; i < ARRAY_SIZE(proxy->regs); ++i) {
    513        reg = &proxy->regs[i];
    514        if (*off >= reg->offset &&
    515            *off + len <= reg->offset + reg->size) {
    516            *off -= reg->offset;
    517            return &reg->mr;
    518        }
    519    }
    520
    521    return NULL;
    522}
    523
    524/* Below are generic functions to do memcpy from/to an address space,
    525 * without byteswaps, with input validation.
    526 *
    527 * As regular address_space_* APIs all do some kind of byteswap at least for
    528 * some host/target combinations, we are forced to explicitly convert to a
    529 * known-endianness integer value.
    530 * It doesn't really matter which endian format to go through, so the code
    531 * below selects the endian that causes the least amount of work on the given
    532 * host.
    533 *
    534 * Note: host pointer must be aligned.
    535 */
    536static
    537void virtio_address_space_write(VirtIOPCIProxy *proxy, hwaddr addr,
    538                                const uint8_t *buf, int len)
    539{
    540    uint64_t val;
    541    MemoryRegion *mr;
    542
    543    /* address_space_* APIs assume an aligned address.
    544     * As address is under guest control, handle illegal values.
    545     */
    546    addr &= ~(len - 1);
    547
    548    mr = virtio_address_space_lookup(proxy, &addr, len);
    549    if (!mr) {
    550        return;
    551    }
    552
    553    /* Make sure caller aligned buf properly */
    554    assert(!(((uintptr_t)buf) & (len - 1)));
    555
    556    switch (len) {
    557    case 1:
    558        val = pci_get_byte(buf);
    559        break;
    560    case 2:
    561        val = pci_get_word(buf);
    562        break;
    563    case 4:
    564        val = pci_get_long(buf);
    565        break;
    566    default:
    567        /* As length is under guest control, handle illegal values. */
    568        return;
    569    }
    570    memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
    571                                 MEMTXATTRS_UNSPECIFIED);
    572}
    573
    574static void
    575virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr,
    576                          uint8_t *buf, int len)
    577{
    578    uint64_t val;
    579    MemoryRegion *mr;
    580
    581    /* address_space_* APIs assume an aligned address.
    582     * As address is under guest control, handle illegal values.
    583     */
    584    addr &= ~(len - 1);
    585
    586    mr = virtio_address_space_lookup(proxy, &addr, len);
    587    if (!mr) {
    588        return;
    589    }
    590
    591    /* Make sure caller aligned buf properly */
    592    assert(!(((uintptr_t)buf) & (len - 1)));
    593
    594    memory_region_dispatch_read(mr, addr, &val, size_memop(len) | MO_LE,
    595                                MEMTXATTRS_UNSPECIFIED);
    596    switch (len) {
    597    case 1:
    598        pci_set_byte(buf, val);
    599        break;
    600    case 2:
    601        pci_set_word(buf, val);
    602        break;
    603    case 4:
    604        pci_set_long(buf, val);
    605        break;
    606    default:
    607        /* As length is under guest control, handle illegal values. */
    608        break;
    609    }
    610}
    611
    612static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
    613                                uint32_t val, int len)
    614{
    615    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
    616    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    617    struct virtio_pci_cfg_cap *cfg;
    618
    619    pci_default_write_config(pci_dev, address, val, len);
    620
    621    if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
    622        pcie_cap_flr_write_config(pci_dev, address, val, len);
    623    }
    624
    625    if (range_covers_byte(address, len, PCI_COMMAND)) {
    626        if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
    627            virtio_set_disabled(vdev, true);
    628            virtio_pci_stop_ioeventfd(proxy);
    629            virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
    630        } else {
    631            virtio_set_disabled(vdev, false);
    632        }
    633    }
    634
    635    if (proxy->config_cap &&
    636        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
    637                                                                  pci_cfg_data),
    638                       sizeof cfg->pci_cfg_data)) {
    639        uint32_t off;
    640        uint32_t len;
    641
    642        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
    643        off = le32_to_cpu(cfg->cap.offset);
    644        len = le32_to_cpu(cfg->cap.length);
    645
    646        if (len == 1 || len == 2 || len == 4) {
    647            assert(len <= sizeof cfg->pci_cfg_data);
    648            virtio_address_space_write(proxy, off, cfg->pci_cfg_data, len);
    649        }
    650    }
    651}
    652
    653static uint32_t virtio_read_config(PCIDevice *pci_dev,
    654                                   uint32_t address, int len)
    655{
    656    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
    657    struct virtio_pci_cfg_cap *cfg;
    658
    659    if (proxy->config_cap &&
    660        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
    661                                                                  pci_cfg_data),
    662                       sizeof cfg->pci_cfg_data)) {
    663        uint32_t off;
    664        uint32_t len;
    665
    666        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
    667        off = le32_to_cpu(cfg->cap.offset);
    668        len = le32_to_cpu(cfg->cap.length);
    669
    670        if (len == 1 || len == 2 || len == 4) {
    671            assert(len <= sizeof cfg->pci_cfg_data);
    672            virtio_address_space_read(proxy, off, cfg->pci_cfg_data, len);
    673        }
    674    }
    675
    676    return pci_default_read_config(pci_dev, address, len);
    677}
    678
    679static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
    680                                        unsigned int queue_no,
    681                                        unsigned int vector)
    682{
    683    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
    684    int ret;
    685
    686    if (irqfd->users == 0) {
    687        ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev);
    688        if (ret < 0) {
    689            return ret;
    690        }
    691        irqfd->virq = ret;
    692    }
    693    irqfd->users++;
    694    return 0;
    695}
    696
    697static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
    698                                             unsigned int vector)
    699{
    700    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
    701    if (--irqfd->users == 0) {
    702        kvm_irqchip_release_virq(kvm_state, irqfd->virq);
    703    }
    704}
    705
    706static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
    707                                 unsigned int queue_no,
    708                                 unsigned int vector)
    709{
    710    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
    711    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    712    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
    713    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    714    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
    715}
    716
    717static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
    718                                      unsigned int queue_no,
    719                                      unsigned int vector)
    720{
    721    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    722    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
    723    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    724    VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
    725    int ret;
    726
    727    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
    728    assert(ret == 0);
    729}
    730
    731static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
    732{
    733    PCIDevice *dev = &proxy->pci_dev;
    734    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    735    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    736    unsigned int vector;
    737    int ret, queue_no;
    738
    739    for (queue_no = 0; queue_no < nvqs; queue_no++) {
    740        if (!virtio_queue_get_num(vdev, queue_no)) {
    741            break;
    742        }
    743        vector = virtio_queue_vector(vdev, queue_no);
    744        if (vector >= msix_nr_vectors_allocated(dev)) {
    745            continue;
    746        }
    747        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
    748        if (ret < 0) {
    749            goto undo;
    750        }
    751        /* If guest supports masking, set up irqfd now.
    752         * Otherwise, delay until unmasked in the frontend.
    753         */
    754        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
    755            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
    756            if (ret < 0) {
    757                kvm_virtio_pci_vq_vector_release(proxy, vector);
    758                goto undo;
    759            }
    760        }
    761    }
    762    return 0;
    763
    764undo:
    765    while (--queue_no >= 0) {
    766        vector = virtio_queue_vector(vdev, queue_no);
    767        if (vector >= msix_nr_vectors_allocated(dev)) {
    768            continue;
    769        }
    770        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
    771            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
    772        }
    773        kvm_virtio_pci_vq_vector_release(proxy, vector);
    774    }
    775    return ret;
    776}
    777
    778static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
    779{
    780    PCIDevice *dev = &proxy->pci_dev;
    781    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    782    unsigned int vector;
    783    int queue_no;
    784    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    785
    786    for (queue_no = 0; queue_no < nvqs; queue_no++) {
    787        if (!virtio_queue_get_num(vdev, queue_no)) {
    788            break;
    789        }
    790        vector = virtio_queue_vector(vdev, queue_no);
    791        if (vector >= msix_nr_vectors_allocated(dev)) {
    792            continue;
    793        }
    794        /* If guest supports masking, clean up irqfd now.
    795         * Otherwise, it was cleaned when masked in the frontend.
    796         */
    797        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
    798            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
    799        }
    800        kvm_virtio_pci_vq_vector_release(proxy, vector);
    801    }
    802}
    803
    804static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
    805                                       unsigned int queue_no,
    806                                       unsigned int vector,
    807                                       MSIMessage msg)
    808{
    809    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    810    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    811    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
    812    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
    813    VirtIOIRQFD *irqfd;
    814    int ret = 0;
    815
    816    if (proxy->vector_irqfd) {
    817        irqfd = &proxy->vector_irqfd[vector];
    818        if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
    819            ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
    820                                               &proxy->pci_dev);
    821            if (ret < 0) {
    822                return ret;
    823            }
    824            kvm_irqchip_commit_routes(kvm_state);
    825        }
    826    }
    827
    828    /* If guest supports masking, irqfd is already setup, unmask it.
    829     * Otherwise, set it up now.
    830     */
    831    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
    832        k->guest_notifier_mask(vdev, queue_no, false);
    833        /* Test after unmasking to avoid losing events. */
    834        if (k->guest_notifier_pending &&
    835            k->guest_notifier_pending(vdev, queue_no)) {
    836            event_notifier_set(n);
    837        }
    838    } else {
    839        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
    840    }
    841    return ret;
    842}
    843
    844static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
    845                                             unsigned int queue_no,
    846                                             unsigned int vector)
    847{
    848    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    849    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    850
    851    /* If guest supports masking, keep irqfd but mask it.
    852     * Otherwise, clean it up now.
    853     */ 
    854    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
    855        k->guest_notifier_mask(vdev, queue_no, true);
    856    } else {
    857        kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
    858    }
    859}
    860
    861static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
    862                                    MSIMessage msg)
    863{
    864    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
    865    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    866    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
    867    int ret, index, unmasked = 0;
    868
    869    while (vq) {
    870        index = virtio_get_queue_index(vq);
    871        if (!virtio_queue_get_num(vdev, index)) {
    872            break;
    873        }
    874        if (index < proxy->nvqs_with_notifiers) {
    875            ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg);
    876            if (ret < 0) {
    877                goto undo;
    878            }
    879            ++unmasked;
    880        }
    881        vq = virtio_vector_next_queue(vq);
    882    }
    883
    884    return 0;
    885
    886undo:
    887    vq = virtio_vector_first_queue(vdev, vector);
    888    while (vq && unmasked >= 0) {
    889        index = virtio_get_queue_index(vq);
    890        if (index < proxy->nvqs_with_notifiers) {
    891            virtio_pci_vq_vector_mask(proxy, index, vector);
    892            --unmasked;
    893        }
    894        vq = virtio_vector_next_queue(vq);
    895    }
    896    return ret;
    897}
    898
    899static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
    900{
    901    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
    902    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    903    VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
    904    int index;
    905
    906    while (vq) {
    907        index = virtio_get_queue_index(vq);
    908        if (!virtio_queue_get_num(vdev, index)) {
    909            break;
    910        }
    911        if (index < proxy->nvqs_with_notifiers) {
    912            virtio_pci_vq_vector_mask(proxy, index, vector);
    913        }
    914        vq = virtio_vector_next_queue(vq);
    915    }
    916}
    917
    918static void virtio_pci_vector_poll(PCIDevice *dev,
    919                                   unsigned int vector_start,
    920                                   unsigned int vector_end)
    921{
    922    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
    923    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    924    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    925    int queue_no;
    926    unsigned int vector;
    927    EventNotifier *notifier;
    928    VirtQueue *vq;
    929
    930    for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
    931        if (!virtio_queue_get_num(vdev, queue_no)) {
    932            break;
    933        }
    934        vector = virtio_queue_vector(vdev, queue_no);
    935        if (vector < vector_start || vector >= vector_end ||
    936            !msix_is_masked(dev, vector)) {
    937            continue;
    938        }
    939        vq = virtio_get_queue(vdev, queue_no);
    940        notifier = virtio_queue_get_guest_notifier(vq);
    941        if (k->guest_notifier_pending) {
    942            if (k->guest_notifier_pending(vdev, queue_no)) {
    943                msix_set_pending(dev, vector);
    944            }
    945        } else if (event_notifier_test_and_clear(notifier)) {
    946            msix_set_pending(dev, vector);
    947        }
    948    }
    949}
    950
    951static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
    952                                         bool with_irqfd)
    953{
    954    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    955    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    956    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
    957    VirtQueue *vq = virtio_get_queue(vdev, n);
    958    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
    959
    960    if (assign) {
    961        int r = event_notifier_init(notifier, 0);
    962        if (r < 0) {
    963            return r;
    964        }
    965        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
    966    } else {
    967        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
    968        event_notifier_cleanup(notifier);
    969    }
    970
    971    if (!msix_enabled(&proxy->pci_dev) &&
    972        vdev->use_guest_notifier_mask &&
    973        vdc->guest_notifier_mask) {
    974        vdc->guest_notifier_mask(vdev, n, !assign);
    975    }
    976
    977    return 0;
    978}
    979
    980static bool virtio_pci_query_guest_notifiers(DeviceState *d)
    981{
    982    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    983    return msix_enabled(&proxy->pci_dev);
    984}
    985
    986static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
    987{
    988    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    989    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    990    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    991    int r, n;
    992    bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
    993        kvm_msi_via_irqfd_enabled();
    994
    995    nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
    996
    997    /* When deassigning, pass a consistent nvqs value
    998     * to avoid leaking notifiers.
    999     */
   1000    assert(assign || nvqs == proxy->nvqs_with_notifiers);
   1001
   1002    proxy->nvqs_with_notifiers = nvqs;
   1003
   1004    /* Must unset vector notifier while guest notifier is still assigned */
   1005    if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) {
   1006        msix_unset_vector_notifiers(&proxy->pci_dev);
   1007        if (proxy->vector_irqfd) {
   1008            kvm_virtio_pci_vector_release(proxy, nvqs);
   1009            g_free(proxy->vector_irqfd);
   1010            proxy->vector_irqfd = NULL;
   1011        }
   1012    }
   1013
   1014    for (n = 0; n < nvqs; n++) {
   1015        if (!virtio_queue_get_num(vdev, n)) {
   1016            break;
   1017        }
   1018
   1019        r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd);
   1020        if (r < 0) {
   1021            goto assign_error;
   1022        }
   1023    }
   1024
   1025    /* Must set vector notifier after guest notifier has been assigned */
   1026    if ((with_irqfd || k->guest_notifier_mask) && assign) {
   1027        if (with_irqfd) {
   1028            proxy->vector_irqfd =
   1029                g_malloc0(sizeof(*proxy->vector_irqfd) *
   1030                          msix_nr_vectors_allocated(&proxy->pci_dev));
   1031            r = kvm_virtio_pci_vector_use(proxy, nvqs);
   1032            if (r < 0) {
   1033                goto assign_error;
   1034            }
   1035        }
   1036        r = msix_set_vector_notifiers(&proxy->pci_dev,
   1037                                      virtio_pci_vector_unmask,
   1038                                      virtio_pci_vector_mask,
   1039                                      virtio_pci_vector_poll);
   1040        if (r < 0) {
   1041            goto notifiers_error;
   1042        }
   1043    }
   1044
   1045    return 0;
   1046
   1047notifiers_error:
   1048    if (with_irqfd) {
   1049        assert(assign);
   1050        kvm_virtio_pci_vector_release(proxy, nvqs);
   1051    }
   1052
   1053assign_error:
   1054    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
   1055    assert(assign);
   1056    while (--n >= 0) {
   1057        virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
   1058    }
   1059    return r;
   1060}
   1061
   1062static int virtio_pci_set_host_notifier_mr(DeviceState *d, int n,
   1063                                           MemoryRegion *mr, bool assign)
   1064{
   1065    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
   1066    int offset;
   1067
   1068    if (n >= VIRTIO_QUEUE_MAX || !virtio_pci_modern(proxy) ||
   1069        virtio_pci_queue_mem_mult(proxy) != memory_region_size(mr)) {
   1070        return -1;
   1071    }
   1072
   1073    if (assign) {
   1074        offset = virtio_pci_queue_mem_mult(proxy) * n;
   1075        memory_region_add_subregion_overlap(&proxy->notify.mr, offset, mr, 1);
   1076    } else {
   1077        memory_region_del_subregion(&proxy->notify.mr, mr);
   1078    }
   1079
   1080    return 0;
   1081}
   1082
   1083static void virtio_pci_vmstate_change(DeviceState *d, bool running)
   1084{
   1085    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
   1086    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1087
   1088    if (running) {
   1089        /* Old QEMU versions did not set bus master enable on status write.
   1090         * Detect DRIVER set and enable it.
   1091         */
   1092        if ((proxy->flags & VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION) &&
   1093            (vdev->status & VIRTIO_CONFIG_S_DRIVER) &&
   1094            !(proxy->pci_dev.config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
   1095            pci_default_write_config(&proxy->pci_dev, PCI_COMMAND,
   1096                                     proxy->pci_dev.config[PCI_COMMAND] |
   1097                                     PCI_COMMAND_MASTER, 1);
   1098        }
   1099        virtio_pci_start_ioeventfd(proxy);
   1100    } else {
   1101        virtio_pci_stop_ioeventfd(proxy);
   1102    }
   1103}
   1104
   1105/*
   1106 * virtio-pci: This is the PCIDevice which has a virtio-pci-bus.
   1107 */
   1108
   1109static int virtio_pci_query_nvectors(DeviceState *d)
   1110{
   1111    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
   1112
   1113    return proxy->nvectors;
   1114}
   1115
   1116static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
   1117{
   1118    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
   1119    PCIDevice *dev = &proxy->pci_dev;
   1120
   1121    return pci_get_address_space(dev);
   1122}
   1123
   1124static bool virtio_pci_iommu_enabled(DeviceState *d)
   1125{
   1126    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
   1127    PCIDevice *dev = &proxy->pci_dev;
   1128    AddressSpace *dma_as = pci_device_iommu_address_space(dev);
   1129
   1130    if (dma_as == &address_space_memory) {
   1131        return false;
   1132    }
   1133
   1134    return true;
   1135}
   1136
   1137static bool virtio_pci_queue_enabled(DeviceState *d, int n)
   1138{
   1139    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
   1140    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1141
   1142    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   1143        return proxy->vqs[n].enabled;
   1144    }
   1145
   1146    return virtio_queue_enabled_legacy(vdev, n);
   1147}
   1148
   1149static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
   1150                                   struct virtio_pci_cap *cap)
   1151{
   1152    PCIDevice *dev = &proxy->pci_dev;
   1153    int offset;
   1154
   1155    offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, 0,
   1156                                cap->cap_len, &error_abort);
   1157
   1158    assert(cap->cap_len >= sizeof *cap);
   1159    memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
   1160           cap->cap_len - PCI_CAP_FLAGS);
   1161
   1162    return offset;
   1163}
   1164
   1165static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
   1166                                       unsigned size)
   1167{
   1168    VirtIOPCIProxy *proxy = opaque;
   1169    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1170    uint32_t val = 0;
   1171    int i;
   1172
   1173    if (vdev == NULL) {
   1174        return UINT64_MAX;
   1175    }
   1176
   1177    switch (addr) {
   1178    case VIRTIO_PCI_COMMON_DFSELECT:
   1179        val = proxy->dfselect;
   1180        break;
   1181    case VIRTIO_PCI_COMMON_DF:
   1182        if (proxy->dfselect <= 1) {
   1183            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
   1184
   1185            val = (vdev->host_features & ~vdc->legacy_features) >>
   1186                (32 * proxy->dfselect);
   1187        }
   1188        break;
   1189    case VIRTIO_PCI_COMMON_GFSELECT:
   1190        val = proxy->gfselect;
   1191        break;
   1192    case VIRTIO_PCI_COMMON_GF:
   1193        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
   1194            val = proxy->guest_features[proxy->gfselect];
   1195        }
   1196        break;
   1197    case VIRTIO_PCI_COMMON_MSIX:
   1198        val = vdev->config_vector;
   1199        break;
   1200    case VIRTIO_PCI_COMMON_NUMQ:
   1201        for (i = 0; i < VIRTIO_QUEUE_MAX; ++i) {
   1202            if (virtio_queue_get_num(vdev, i)) {
   1203                val = i + 1;
   1204            }
   1205        }
   1206        break;
   1207    case VIRTIO_PCI_COMMON_STATUS:
   1208        val = vdev->status;
   1209        break;
   1210    case VIRTIO_PCI_COMMON_CFGGENERATION:
   1211        val = vdev->generation;
   1212        break;
   1213    case VIRTIO_PCI_COMMON_Q_SELECT:
   1214        val = vdev->queue_sel;
   1215        break;
   1216    case VIRTIO_PCI_COMMON_Q_SIZE:
   1217        val = virtio_queue_get_num(vdev, vdev->queue_sel);
   1218        break;
   1219    case VIRTIO_PCI_COMMON_Q_MSIX:
   1220        val = virtio_queue_vector(vdev, vdev->queue_sel);
   1221        break;
   1222    case VIRTIO_PCI_COMMON_Q_ENABLE:
   1223        val = proxy->vqs[vdev->queue_sel].enabled;
   1224        break;
   1225    case VIRTIO_PCI_COMMON_Q_NOFF:
   1226        /* Simply map queues in order */
   1227        val = vdev->queue_sel;
   1228        break;
   1229    case VIRTIO_PCI_COMMON_Q_DESCLO:
   1230        val = proxy->vqs[vdev->queue_sel].desc[0];
   1231        break;
   1232    case VIRTIO_PCI_COMMON_Q_DESCHI:
   1233        val = proxy->vqs[vdev->queue_sel].desc[1];
   1234        break;
   1235    case VIRTIO_PCI_COMMON_Q_AVAILLO:
   1236        val = proxy->vqs[vdev->queue_sel].avail[0];
   1237        break;
   1238    case VIRTIO_PCI_COMMON_Q_AVAILHI:
   1239        val = proxy->vqs[vdev->queue_sel].avail[1];
   1240        break;
   1241    case VIRTIO_PCI_COMMON_Q_USEDLO:
   1242        val = proxy->vqs[vdev->queue_sel].used[0];
   1243        break;
   1244    case VIRTIO_PCI_COMMON_Q_USEDHI:
   1245        val = proxy->vqs[vdev->queue_sel].used[1];
   1246        break;
   1247    default:
   1248        val = 0;
   1249    }
   1250
   1251    return val;
   1252}
   1253
   1254static void virtio_pci_common_write(void *opaque, hwaddr addr,
   1255                                    uint64_t val, unsigned size)
   1256{
   1257    VirtIOPCIProxy *proxy = opaque;
   1258    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1259
   1260    if (vdev == NULL) {
   1261        return;
   1262    }
   1263
   1264    switch (addr) {
   1265    case VIRTIO_PCI_COMMON_DFSELECT:
   1266        proxy->dfselect = val;
   1267        break;
   1268    case VIRTIO_PCI_COMMON_GFSELECT:
   1269        proxy->gfselect = val;
   1270        break;
   1271    case VIRTIO_PCI_COMMON_GF:
   1272        if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
   1273            proxy->guest_features[proxy->gfselect] = val;
   1274            virtio_set_features(vdev,
   1275                                (((uint64_t)proxy->guest_features[1]) << 32) |
   1276                                proxy->guest_features[0]);
   1277        }
   1278        break;
   1279    case VIRTIO_PCI_COMMON_MSIX:
   1280        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
   1281        /* Make it possible for guest to discover an error took place. */
   1282        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
   1283            val = VIRTIO_NO_VECTOR;
   1284        }
   1285        vdev->config_vector = val;
   1286        break;
   1287    case VIRTIO_PCI_COMMON_STATUS:
   1288        if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) {
   1289            virtio_pci_stop_ioeventfd(proxy);
   1290        }
   1291
   1292        virtio_set_status(vdev, val & 0xFF);
   1293
   1294        if (val & VIRTIO_CONFIG_S_DRIVER_OK) {
   1295            virtio_pci_start_ioeventfd(proxy);
   1296        }
   1297
   1298        if (vdev->status == 0) {
   1299            virtio_pci_reset(DEVICE(proxy));
   1300        }
   1301
   1302        break;
   1303    case VIRTIO_PCI_COMMON_Q_SELECT:
   1304        if (val < VIRTIO_QUEUE_MAX) {
   1305            vdev->queue_sel = val;
   1306        }
   1307        break;
   1308    case VIRTIO_PCI_COMMON_Q_SIZE:
   1309        proxy->vqs[vdev->queue_sel].num = val;
   1310        virtio_queue_set_num(vdev, vdev->queue_sel,
   1311                             proxy->vqs[vdev->queue_sel].num);
   1312        break;
   1313    case VIRTIO_PCI_COMMON_Q_MSIX:
   1314        msix_vector_unuse(&proxy->pci_dev,
   1315                          virtio_queue_vector(vdev, vdev->queue_sel));
   1316        /* Make it possible for guest to discover an error took place. */
   1317        if (msix_vector_use(&proxy->pci_dev, val) < 0) {
   1318            val = VIRTIO_NO_VECTOR;
   1319        }
   1320        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
   1321        break;
   1322    case VIRTIO_PCI_COMMON_Q_ENABLE:
   1323        if (val == 1) {
   1324            virtio_queue_set_num(vdev, vdev->queue_sel,
   1325                                 proxy->vqs[vdev->queue_sel].num);
   1326            virtio_queue_set_rings(vdev, vdev->queue_sel,
   1327                       ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 |
   1328                       proxy->vqs[vdev->queue_sel].desc[0],
   1329                       ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 |
   1330                       proxy->vqs[vdev->queue_sel].avail[0],
   1331                       ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 |
   1332                       proxy->vqs[vdev->queue_sel].used[0]);
   1333            proxy->vqs[vdev->queue_sel].enabled = 1;
   1334        } else {
   1335            virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val);
   1336        }
   1337        break;
   1338    case VIRTIO_PCI_COMMON_Q_DESCLO:
   1339        proxy->vqs[vdev->queue_sel].desc[0] = val;
   1340        break;
   1341    case VIRTIO_PCI_COMMON_Q_DESCHI:
   1342        proxy->vqs[vdev->queue_sel].desc[1] = val;
   1343        break;
   1344    case VIRTIO_PCI_COMMON_Q_AVAILLO:
   1345        proxy->vqs[vdev->queue_sel].avail[0] = val;
   1346        break;
   1347    case VIRTIO_PCI_COMMON_Q_AVAILHI:
   1348        proxy->vqs[vdev->queue_sel].avail[1] = val;
   1349        break;
   1350    case VIRTIO_PCI_COMMON_Q_USEDLO:
   1351        proxy->vqs[vdev->queue_sel].used[0] = val;
   1352        break;
   1353    case VIRTIO_PCI_COMMON_Q_USEDHI:
   1354        proxy->vqs[vdev->queue_sel].used[1] = val;
   1355        break;
   1356    default:
   1357        break;
   1358    }
   1359}
   1360
   1361
   1362static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
   1363                                       unsigned size)
   1364{
   1365    VirtIOPCIProxy *proxy = opaque;
   1366    if (virtio_bus_get_device(&proxy->bus) == NULL) {
   1367        return UINT64_MAX;
   1368    }
   1369
   1370    return 0;
   1371}
   1372
   1373static void virtio_pci_notify_write(void *opaque, hwaddr addr,
   1374                                    uint64_t val, unsigned size)
   1375{
   1376    VirtIOPCIProxy *proxy = opaque;
   1377    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1378
   1379    unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
   1380
   1381    if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
   1382        virtio_queue_notify(vdev, queue);
   1383    }
   1384}
   1385
   1386static void virtio_pci_notify_write_pio(void *opaque, hwaddr addr,
   1387                                        uint64_t val, unsigned size)
   1388{
   1389    VirtIOPCIProxy *proxy = opaque;
   1390    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1391
   1392    unsigned queue = val;
   1393
   1394    if (vdev != NULL && queue < VIRTIO_QUEUE_MAX) {
   1395        virtio_queue_notify(vdev, queue);
   1396    }
   1397}
   1398
   1399static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
   1400                                    unsigned size)
   1401{
   1402    VirtIOPCIProxy *proxy = opaque;
   1403    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1404    uint64_t val;
   1405
   1406    if (vdev == NULL) {
   1407        return UINT64_MAX;
   1408    }
   1409
   1410    val = qatomic_xchg(&vdev->isr, 0);
   1411    pci_irq_deassert(&proxy->pci_dev);
   1412    return val;
   1413}
   1414
   1415static void virtio_pci_isr_write(void *opaque, hwaddr addr,
   1416                                 uint64_t val, unsigned size)
   1417{
   1418}
   1419
   1420static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
   1421                                       unsigned size)
   1422{
   1423    VirtIOPCIProxy *proxy = opaque;
   1424    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1425    uint64_t val;
   1426
   1427    if (vdev == NULL) {
   1428        return UINT64_MAX;
   1429    }
   1430
   1431    switch (size) {
   1432    case 1:
   1433        val = virtio_config_modern_readb(vdev, addr);
   1434        break;
   1435    case 2:
   1436        val = virtio_config_modern_readw(vdev, addr);
   1437        break;
   1438    case 4:
   1439        val = virtio_config_modern_readl(vdev, addr);
   1440        break;
   1441    default:
   1442        val = 0;
   1443        break;
   1444    }
   1445    return val;
   1446}
   1447
   1448static void virtio_pci_device_write(void *opaque, hwaddr addr,
   1449                                    uint64_t val, unsigned size)
   1450{
   1451    VirtIOPCIProxy *proxy = opaque;
   1452    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1453
   1454    if (vdev == NULL) {
   1455        return;
   1456    }
   1457
   1458    switch (size) {
   1459    case 1:
   1460        virtio_config_modern_writeb(vdev, addr, val);
   1461        break;
   1462    case 2:
   1463        virtio_config_modern_writew(vdev, addr, val);
   1464        break;
   1465    case 4:
   1466        virtio_config_modern_writel(vdev, addr, val);
   1467        break;
   1468    }
   1469}
   1470
   1471static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
   1472                                           const char *vdev_name)
   1473{
   1474    static const MemoryRegionOps common_ops = {
   1475        .read = virtio_pci_common_read,
   1476        .write = virtio_pci_common_write,
   1477        .impl = {
   1478            .min_access_size = 1,
   1479            .max_access_size = 4,
   1480        },
   1481        .endianness = DEVICE_LITTLE_ENDIAN,
   1482    };
   1483    static const MemoryRegionOps isr_ops = {
   1484        .read = virtio_pci_isr_read,
   1485        .write = virtio_pci_isr_write,
   1486        .impl = {
   1487            .min_access_size = 1,
   1488            .max_access_size = 4,
   1489        },
   1490        .endianness = DEVICE_LITTLE_ENDIAN,
   1491    };
   1492    static const MemoryRegionOps device_ops = {
   1493        .read = virtio_pci_device_read,
   1494        .write = virtio_pci_device_write,
   1495        .impl = {
   1496            .min_access_size = 1,
   1497            .max_access_size = 4,
   1498        },
   1499        .endianness = DEVICE_LITTLE_ENDIAN,
   1500    };
   1501    static const MemoryRegionOps notify_ops = {
   1502        .read = virtio_pci_notify_read,
   1503        .write = virtio_pci_notify_write,
   1504        .impl = {
   1505            .min_access_size = 1,
   1506            .max_access_size = 4,
   1507        },
   1508        .endianness = DEVICE_LITTLE_ENDIAN,
   1509    };
   1510    static const MemoryRegionOps notify_pio_ops = {
   1511        .read = virtio_pci_notify_read,
   1512        .write = virtio_pci_notify_write_pio,
   1513        .impl = {
   1514            .min_access_size = 1,
   1515            .max_access_size = 4,
   1516        },
   1517        .endianness = DEVICE_LITTLE_ENDIAN,
   1518    };
   1519    g_autoptr(GString) name = g_string_new(NULL);
   1520
   1521    g_string_printf(name, "virtio-pci-common-%s", vdev_name);
   1522    memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
   1523                          &common_ops,
   1524                          proxy,
   1525                          name->str,
   1526                          proxy->common.size);
   1527
   1528    g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
   1529    memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
   1530                          &isr_ops,
   1531                          proxy,
   1532                          name->str,
   1533                          proxy->isr.size);
   1534
   1535    g_string_printf(name, "virtio-pci-device-%s", vdev_name);
   1536    memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
   1537                          &device_ops,
   1538                          proxy,
   1539                          name->str,
   1540                          proxy->device.size);
   1541
   1542    g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
   1543    memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
   1544                          &notify_ops,
   1545                          proxy,
   1546                          name->str,
   1547                          proxy->notify.size);
   1548
   1549    g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
   1550    memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
   1551                          &notify_pio_ops,
   1552                          proxy,
   1553                          name->str,
   1554                          proxy->notify_pio.size);
   1555}
   1556
   1557static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
   1558                                         VirtIOPCIRegion *region,
   1559                                         struct virtio_pci_cap *cap,
   1560                                         MemoryRegion *mr,
   1561                                         uint8_t bar)
   1562{
   1563    memory_region_add_subregion(mr, region->offset, &region->mr);
   1564
   1565    cap->cfg_type = region->type;
   1566    cap->bar = bar;
   1567    cap->offset = cpu_to_le32(region->offset);
   1568    cap->length = cpu_to_le32(region->size);
   1569    virtio_pci_add_mem_cap(proxy, cap);
   1570
   1571}
   1572
   1573static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
   1574                                             VirtIOPCIRegion *region,
   1575                                             struct virtio_pci_cap *cap)
   1576{
   1577    virtio_pci_modern_region_map(proxy, region, cap,
   1578                                 &proxy->modern_bar, proxy->modern_mem_bar_idx);
   1579}
   1580
   1581static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
   1582                                            VirtIOPCIRegion *region,
   1583                                            struct virtio_pci_cap *cap)
   1584{
   1585    virtio_pci_modern_region_map(proxy, region, cap,
   1586                                 &proxy->io_bar, proxy->modern_io_bar_idx);
   1587}
   1588
   1589static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
   1590                                               VirtIOPCIRegion *region)
   1591{
   1592    memory_region_del_subregion(&proxy->modern_bar,
   1593                                &region->mr);
   1594}
   1595
   1596static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
   1597                                              VirtIOPCIRegion *region)
   1598{
   1599    memory_region_del_subregion(&proxy->io_bar,
   1600                                &region->mr);
   1601}
   1602
   1603static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
   1604{
   1605    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
   1606    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1607
   1608    if (virtio_pci_modern(proxy)) {
   1609        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
   1610    }
   1611
   1612    virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
   1613}
   1614
   1615/* This is called by virtio-bus just after the device is plugged. */
   1616static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
   1617{
   1618    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
   1619    VirtioBusState *bus = &proxy->bus;
   1620    bool legacy = virtio_pci_legacy(proxy);
   1621    bool modern;
   1622    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
   1623    uint8_t *config;
   1624    uint32_t size;
   1625    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
   1626
   1627    /*
   1628     * Virtio capabilities present without
   1629     * VIRTIO_F_VERSION_1 confuses guests
   1630     */
   1631    if (!proxy->ignore_backend_features &&
   1632            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
   1633        virtio_pci_disable_modern(proxy);
   1634
   1635        if (!legacy) {
   1636            error_setg(errp, "Device doesn't support modern mode, and legacy"
   1637                             " mode is disabled");
   1638            error_append_hint(errp, "Set disable-legacy to off\n");
   1639
   1640            return;
   1641        }
   1642    }
   1643
   1644    modern = virtio_pci_modern(proxy);
   1645
   1646    config = proxy->pci_dev.config;
   1647    if (proxy->class_code) {
   1648        pci_config_set_class(config, proxy->class_code);
   1649    }
   1650
   1651    if (legacy) {
   1652        if (!virtio_legacy_allowed(vdev)) {
   1653            /*
   1654             * To avoid migration issues, we allow legacy mode when legacy
   1655             * check is disabled in the old machine types (< 5.1).
   1656             */
   1657            if (virtio_legacy_check_disabled(vdev)) {
   1658                warn_report("device is modern-only, but for backward "
   1659                            "compatibility legacy is allowed");
   1660            } else {
   1661                error_setg(errp,
   1662                           "device is modern-only, use disable-legacy=on");
   1663                return;
   1664            }
   1665        }
   1666        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
   1667            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
   1668                       " neither legacy nor transitional device");
   1669            return ;
   1670        }
   1671        /*
   1672         * Legacy and transitional devices use specific subsystem IDs.
   1673         * Note that the subsystem vendor ID (config + PCI_SUBSYSTEM_VENDOR_ID)
   1674         * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default.
   1675         */
   1676        pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
   1677    } else {
   1678        /* pure virtio-1.0 */
   1679        pci_set_word(config + PCI_VENDOR_ID,
   1680                     PCI_VENDOR_ID_REDHAT_QUMRANET);
   1681        pci_set_word(config + PCI_DEVICE_ID,
   1682                     0x1040 + virtio_bus_get_vdev_id(bus));
   1683        pci_config_set_revision(config, 1);
   1684    }
   1685    config[PCI_INTERRUPT_PIN] = 1;
   1686
   1687
   1688    if (modern) {
   1689        struct virtio_pci_cap cap = {
   1690            .cap_len = sizeof cap,
   1691        };
   1692        struct virtio_pci_notify_cap notify = {
   1693            .cap.cap_len = sizeof notify,
   1694            .notify_off_multiplier =
   1695                cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
   1696        };
   1697        struct virtio_pci_cfg_cap cfg = {
   1698            .cap.cap_len = sizeof cfg,
   1699            .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
   1700        };
   1701        struct virtio_pci_notify_cap notify_pio = {
   1702            .cap.cap_len = sizeof notify,
   1703            .notify_off_multiplier = cpu_to_le32(0x0),
   1704        };
   1705
   1706        struct virtio_pci_cfg_cap *cfg_mask;
   1707
   1708        virtio_pci_modern_regions_init(proxy, vdev->name);
   1709
   1710        virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
   1711        virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
   1712        virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
   1713        virtio_pci_modern_mem_region_map(proxy, &proxy->notify, &notify.cap);
   1714
   1715        if (modern_pio) {
   1716            memory_region_init(&proxy->io_bar, OBJECT(proxy),
   1717                               "virtio-pci-io", 0x4);
   1718
   1719            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
   1720                             PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
   1721
   1722            virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
   1723                                            &notify_pio.cap);
   1724        }
   1725
   1726        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
   1727                         PCI_BASE_ADDRESS_SPACE_MEMORY |
   1728                         PCI_BASE_ADDRESS_MEM_PREFETCH |
   1729                         PCI_BASE_ADDRESS_MEM_TYPE_64,
   1730                         &proxy->modern_bar);
   1731
   1732        proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
   1733        cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
   1734        pci_set_byte(&cfg_mask->cap.bar, ~0x0);
   1735        pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
   1736        pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
   1737        pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
   1738    }
   1739
   1740    if (proxy->nvectors) {
   1741        int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
   1742                                          proxy->msix_bar_idx, NULL);
   1743        if (err) {
   1744            /* Notice when a system that supports MSIx can't initialize it */
   1745            if (err != -ENOTSUP) {
   1746                warn_report("unable to init msix vectors to %" PRIu32,
   1747                            proxy->nvectors);
   1748            }
   1749            proxy->nvectors = 0;
   1750        }
   1751    }
   1752
   1753    proxy->pci_dev.config_write = virtio_write_config;
   1754    proxy->pci_dev.config_read = virtio_read_config;
   1755
   1756    if (legacy) {
   1757        size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
   1758            + virtio_bus_get_vdev_config_len(bus);
   1759        size = pow2ceil(size);
   1760
   1761        memory_region_init_io(&proxy->bar, OBJECT(proxy),
   1762                              &virtio_pci_config_ops,
   1763                              proxy, "virtio-pci", size);
   1764
   1765        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
   1766                         PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
   1767    }
   1768}
   1769
   1770static void virtio_pci_device_unplugged(DeviceState *d)
   1771{
   1772    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
   1773    bool modern = virtio_pci_modern(proxy);
   1774    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
   1775
   1776    virtio_pci_stop_ioeventfd(proxy);
   1777
   1778    if (modern) {
   1779        virtio_pci_modern_mem_region_unmap(proxy, &proxy->common);
   1780        virtio_pci_modern_mem_region_unmap(proxy, &proxy->isr);
   1781        virtio_pci_modern_mem_region_unmap(proxy, &proxy->device);
   1782        virtio_pci_modern_mem_region_unmap(proxy, &proxy->notify);
   1783        if (modern_pio) {
   1784            virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
   1785        }
   1786    }
   1787}
   1788
   1789static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
   1790{
   1791    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
   1792    VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(pci_dev);
   1793    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
   1794                     !pci_bus_is_root(pci_get_bus(pci_dev));
   1795
   1796    if (kvm_enabled() && !kvm_has_many_ioeventfds()) {
   1797        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
   1798    }
   1799
   1800    /* fd-based ioevents can't be synchronized in record/replay */
   1801    if (replay_mode != REPLAY_MODE_NONE) {
   1802        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
   1803    }
   1804
   1805    /*
   1806     * virtio pci bar layout used by default.
   1807     * subclasses can re-arrange things if needed.
   1808     *
   1809     *   region 0   --  virtio legacy io bar
   1810     *   region 1   --  msi-x bar
   1811     *   region 2   --  virtio modern io bar (off by default)
   1812     *   region 4+5 --  virtio modern memory (64bit) bar
   1813     *
   1814     */
   1815    proxy->legacy_io_bar_idx  = 0;
   1816    proxy->msix_bar_idx       = 1;
   1817    proxy->modern_io_bar_idx  = 2;
   1818    proxy->modern_mem_bar_idx = 4;
   1819
   1820    proxy->common.offset = 0x0;
   1821    proxy->common.size = 0x1000;
   1822    proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
   1823
   1824    proxy->isr.offset = 0x1000;
   1825    proxy->isr.size = 0x1000;
   1826    proxy->isr.type = VIRTIO_PCI_CAP_ISR_CFG;
   1827
   1828    proxy->device.offset = 0x2000;
   1829    proxy->device.size = 0x1000;
   1830    proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
   1831
   1832    proxy->notify.offset = 0x3000;
   1833    proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
   1834    proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
   1835
   1836    proxy->notify_pio.offset = 0x0;
   1837    proxy->notify_pio.size = 0x4;
   1838    proxy->notify_pio.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
   1839
   1840    /* subclasses can enforce modern, so do this unconditionally */
   1841    memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
   1842                       /* PCI BAR regions must be powers of 2 */
   1843                       pow2ceil(proxy->notify.offset + proxy->notify.size));
   1844
   1845    if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
   1846        proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
   1847    }
   1848
   1849    if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
   1850        error_setg(errp, "device cannot work as neither modern nor legacy mode"
   1851                   " is enabled");
   1852        error_append_hint(errp, "Set either disable-modern or disable-legacy"
   1853                          " to off\n");
   1854        return;
   1855    }
   1856
   1857    if (pcie_port && pci_is_express(pci_dev)) {
   1858        int pos;
   1859        uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE;
   1860
   1861        pos = pcie_endpoint_cap_init(pci_dev, 0);
   1862        assert(pos > 0);
   1863
   1864        pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
   1865                                 PCI_PM_SIZEOF, errp);
   1866        if (pos < 0) {
   1867            return;
   1868        }
   1869
   1870        pci_dev->exp.pm_cap = pos;
   1871
   1872        /*
   1873         * Indicates that this function complies with revision 1.2 of the
   1874         * PCI Power Management Interface Specification.
   1875         */
   1876        pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
   1877
   1878        if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
   1879            pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset,
   1880                          PCI_ERR_SIZEOF, NULL);
   1881            last_pcie_cap_offset += PCI_ERR_SIZEOF;
   1882        }
   1883
   1884        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
   1885            /* Init error enabling flags */
   1886            pcie_cap_deverr_init(pci_dev);
   1887        }
   1888
   1889        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_LNKCTL) {
   1890            /* Init Link Control Register */
   1891            pcie_cap_lnkctl_init(pci_dev);
   1892        }
   1893
   1894        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
   1895            /* Init Power Management Control Register */
   1896            pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
   1897                         PCI_PM_CTRL_STATE_MASK);
   1898        }
   1899
   1900        if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
   1901            pcie_ats_init(pci_dev, last_pcie_cap_offset,
   1902                          proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
   1903            last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
   1904        }
   1905
   1906        if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) {
   1907            /* Set Function Level Reset capability bit */
   1908            pcie_cap_flr_init(pci_dev);
   1909        }
   1910    } else {
   1911        /*
   1912         * make future invocations of pci_is_express() return false
   1913         * and pci_config_size() return PCI_CONFIG_SPACE_SIZE.
   1914         */
   1915        pci_dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
   1916    }
   1917
   1918    virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
   1919    if (k->realize) {
   1920        k->realize(proxy, errp);
   1921    }
   1922}
   1923
   1924static void virtio_pci_exit(PCIDevice *pci_dev)
   1925{
   1926    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
   1927    bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
   1928                     !pci_bus_is_root(pci_get_bus(pci_dev));
   1929
   1930    msix_uninit_exclusive_bar(pci_dev);
   1931    if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
   1932        pci_is_express(pci_dev)) {
   1933        pcie_aer_exit(pci_dev);
   1934    }
   1935}
   1936
   1937static void virtio_pci_reset(DeviceState *qdev)
   1938{
   1939    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
   1940    VirtioBusState *bus = VIRTIO_BUS(&proxy->bus);
   1941    PCIDevice *dev = PCI_DEVICE(qdev);
   1942    int i;
   1943
   1944    virtio_pci_stop_ioeventfd(proxy);
   1945    virtio_bus_reset(bus);
   1946    msix_unuse_all_vectors(&proxy->pci_dev);
   1947
   1948    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
   1949        proxy->vqs[i].enabled = 0;
   1950        proxy->vqs[i].num = 0;
   1951        proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
   1952        proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
   1953        proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
   1954    }
   1955
   1956    if (pci_is_express(dev)) {
   1957        pcie_cap_deverr_reset(dev);
   1958        pcie_cap_lnkctl_reset(dev);
   1959
   1960        pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
   1961    }
   1962}
   1963
   1964static Property virtio_pci_properties[] = {
   1965    DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
   1966                    VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
   1967    DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
   1968                    VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
   1969    DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
   1970                    VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
   1971    DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
   1972                    VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
   1973    DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
   1974                    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
   1975    DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
   1976                     ignore_backend_features, false),
   1977    DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
   1978                    VIRTIO_PCI_FLAG_ATS_BIT, false),
   1979    DEFINE_PROP_BIT("x-ats-page-aligned", VirtIOPCIProxy, flags,
   1980                    VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED_BIT, true),
   1981    DEFINE_PROP_BIT("x-pcie-deverr-init", VirtIOPCIProxy, flags,
   1982                    VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, true),
   1983    DEFINE_PROP_BIT("x-pcie-lnkctl-init", VirtIOPCIProxy, flags,
   1984                    VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true),
   1985    DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags,
   1986                    VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
   1987    DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
   1988                    VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
   1989    DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags,
   1990                    VIRTIO_PCI_FLAG_AER_BIT, false),
   1991    DEFINE_PROP_END_OF_LIST(),
   1992};
   1993
   1994static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
   1995{
   1996    VirtioPCIClass *vpciklass = VIRTIO_PCI_GET_CLASS(qdev);
   1997    VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
   1998    PCIDevice *pci_dev = &proxy->pci_dev;
   1999
   2000    if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
   2001        virtio_pci_modern(proxy)) {
   2002        pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
   2003    }
   2004
   2005    vpciklass->parent_dc_realize(qdev, errp);
   2006}
   2007
   2008static void virtio_pci_class_init(ObjectClass *klass, void *data)
   2009{
   2010    DeviceClass *dc = DEVICE_CLASS(klass);
   2011    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
   2012    VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
   2013
   2014    device_class_set_props(dc, virtio_pci_properties);
   2015    k->realize = virtio_pci_realize;
   2016    k->exit = virtio_pci_exit;
   2017    k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
   2018    k->revision = VIRTIO_PCI_ABI_VERSION;
   2019    k->class_id = PCI_CLASS_OTHERS;
   2020    device_class_set_parent_realize(dc, virtio_pci_dc_realize,
   2021                                    &vpciklass->parent_dc_realize);
   2022    dc->reset = virtio_pci_reset;
   2023}
   2024
   2025static const TypeInfo virtio_pci_info = {
   2026    .name          = TYPE_VIRTIO_PCI,
   2027    .parent        = TYPE_PCI_DEVICE,
   2028    .instance_size = sizeof(VirtIOPCIProxy),
   2029    .class_init    = virtio_pci_class_init,
   2030    .class_size    = sizeof(VirtioPCIClass),
   2031    .abstract      = true,
   2032};
   2033
   2034static Property virtio_pci_generic_properties[] = {
   2035    DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy,
   2036                            ON_OFF_AUTO_AUTO),
   2037    DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false),
   2038    DEFINE_PROP_END_OF_LIST(),
   2039};
   2040
   2041static void virtio_pci_base_class_init(ObjectClass *klass, void *data)
   2042{
   2043    const VirtioPCIDeviceTypeInfo *t = data;
   2044    if (t->class_init) {
   2045        t->class_init(klass, NULL);
   2046    }
   2047}
   2048
   2049static void virtio_pci_generic_class_init(ObjectClass *klass, void *data)
   2050{
   2051    DeviceClass *dc = DEVICE_CLASS(klass);
   2052
   2053    device_class_set_props(dc, virtio_pci_generic_properties);
   2054}
   2055
   2056static void virtio_pci_transitional_instance_init(Object *obj)
   2057{
   2058    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
   2059
   2060    proxy->disable_legacy = ON_OFF_AUTO_OFF;
   2061    proxy->disable_modern = false;
   2062}
   2063
   2064static void virtio_pci_non_transitional_instance_init(Object *obj)
   2065{
   2066    VirtIOPCIProxy *proxy = VIRTIO_PCI(obj);
   2067
   2068    proxy->disable_legacy = ON_OFF_AUTO_ON;
   2069    proxy->disable_modern = false;
   2070}
   2071
   2072void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t)
   2073{
   2074    char *base_name = NULL;
   2075    TypeInfo base_type_info = {
   2076        .name          = t->base_name,
   2077        .parent        = t->parent ? t->parent : TYPE_VIRTIO_PCI,
   2078        .instance_size = t->instance_size,
   2079        .instance_init = t->instance_init,
   2080        .class_size    = t->class_size,
   2081        .abstract      = true,
   2082        .interfaces    = t->interfaces,
   2083    };
   2084    TypeInfo generic_type_info = {
   2085        .name = t->generic_name,
   2086        .parent = base_type_info.name,
   2087        .class_init = virtio_pci_generic_class_init,
   2088        .interfaces = (InterfaceInfo[]) {
   2089            { INTERFACE_PCIE_DEVICE },
   2090            { INTERFACE_CONVENTIONAL_PCI_DEVICE },
   2091            { }
   2092        },
   2093    };
   2094
   2095    if (!base_type_info.name) {
   2096        /* No base type -> register a single generic device type */
   2097        /* use intermediate %s-base-type to add generic device props */
   2098        base_name = g_strdup_printf("%s-base-type", t->generic_name);
   2099        base_type_info.name = base_name;
   2100        base_type_info.class_init = virtio_pci_generic_class_init;
   2101
   2102        generic_type_info.parent = base_name;
   2103        generic_type_info.class_init = virtio_pci_base_class_init;
   2104        generic_type_info.class_data = (void *)t;
   2105
   2106        assert(!t->non_transitional_name);
   2107        assert(!t->transitional_name);
   2108    } else {
   2109        base_type_info.class_init = virtio_pci_base_class_init;
   2110        base_type_info.class_data = (void *)t;
   2111    }
   2112
   2113    type_register(&base_type_info);
   2114    if (generic_type_info.name) {
   2115        type_register(&generic_type_info);
   2116    }
   2117
   2118    if (t->non_transitional_name) {
   2119        const TypeInfo non_transitional_type_info = {
   2120            .name          = t->non_transitional_name,
   2121            .parent        = base_type_info.name,
   2122            .instance_init = virtio_pci_non_transitional_instance_init,
   2123            .interfaces = (InterfaceInfo[]) {
   2124                { INTERFACE_PCIE_DEVICE },
   2125                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
   2126                { }
   2127            },
   2128        };
   2129        type_register(&non_transitional_type_info);
   2130    }
   2131
   2132    if (t->transitional_name) {
   2133        const TypeInfo transitional_type_info = {
   2134            .name          = t->transitional_name,
   2135            .parent        = base_type_info.name,
   2136            .instance_init = virtio_pci_transitional_instance_init,
   2137            .interfaces = (InterfaceInfo[]) {
   2138                /*
   2139                 * Transitional virtio devices work only as Conventional PCI
   2140                 * devices because they require PIO ports.
   2141                 */
   2142                { INTERFACE_CONVENTIONAL_PCI_DEVICE },
   2143                { }
   2144            },
   2145        };
   2146        type_register(&transitional_type_info);
   2147    }
   2148    g_free(base_name);
   2149}
   2150
   2151unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues)
   2152{
   2153    /*
   2154     * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted
   2155     * virtqueue buffers can handle their completion. When a different vCPU
   2156     * handles completion it may need to IPI the vCPU that submitted the
   2157     * request and this adds overhead.
   2158     *
   2159     * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in
   2160     * guests with very many vCPUs and a device that is only used by a few
   2161     * vCPUs. Unfortunately optimizing that case requires manual pinning inside
   2162     * the guest, so those users might as well manually set the number of
   2163     * queues. There is no upper limit that can be applied automatically and
   2164     * doing so arbitrarily would result in a sudden performance drop once the
   2165     * threshold number of vCPUs is exceeded.
   2166     */
   2167    unsigned num_queues = current_machine->smp.cpus;
   2168
   2169    /*
   2170     * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the
   2171     * config change interrupt and the fixed virtqueues must be taken into
   2172     * account too.
   2173     */
   2174    num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues);
   2175
   2176    /*
   2177     * There is a limit to how many virtqueues a device can have.
   2178     */
   2179    return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues);
   2180}
   2181
   2182/* virtio-pci-bus */
   2183
   2184static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
   2185                               VirtIOPCIProxy *dev)
   2186{
   2187    DeviceState *qdev = DEVICE(dev);
   2188    char virtio_bus_name[] = "virtio-bus";
   2189
   2190    qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name);
   2191}
   2192
   2193static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
   2194{
   2195    BusClass *bus_class = BUS_CLASS(klass);
   2196    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
   2197    bus_class->max_dev = 1;
   2198    k->notify = virtio_pci_notify;
   2199    k->save_config = virtio_pci_save_config;
   2200    k->load_config = virtio_pci_load_config;
   2201    k->save_queue = virtio_pci_save_queue;
   2202    k->load_queue = virtio_pci_load_queue;
   2203    k->save_extra_state = virtio_pci_save_extra_state;
   2204    k->load_extra_state = virtio_pci_load_extra_state;
   2205    k->has_extra_state = virtio_pci_has_extra_state;
   2206    k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
   2207    k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
   2208    k->set_host_notifier_mr = virtio_pci_set_host_notifier_mr;
   2209    k->vmstate_change = virtio_pci_vmstate_change;
   2210    k->pre_plugged = virtio_pci_pre_plugged;
   2211    k->device_plugged = virtio_pci_device_plugged;
   2212    k->device_unplugged = virtio_pci_device_unplugged;
   2213    k->query_nvectors = virtio_pci_query_nvectors;
   2214    k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
   2215    k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
   2216    k->get_dma_as = virtio_pci_get_dma_as;
   2217    k->iommu_enabled = virtio_pci_iommu_enabled;
   2218    k->queue_enabled = virtio_pci_queue_enabled;
   2219}
   2220
   2221static const TypeInfo virtio_pci_bus_info = {
   2222    .name          = TYPE_VIRTIO_PCI_BUS,
   2223    .parent        = TYPE_VIRTIO_BUS,
   2224    .instance_size = sizeof(VirtioPCIBusState),
   2225    .class_size    = sizeof(VirtioPCIBusClass),
   2226    .class_init    = virtio_pci_bus_class_init,
   2227};
   2228
   2229static void virtio_pci_register_types(void)
   2230{
   2231    /* Base types: */
   2232    type_register_static(&virtio_pci_bus_info);
   2233    type_register_static(&virtio_pci_info);
   2234}
   2235
   2236type_init(virtio_pci_register_types)
   2237