cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

vmbus.c (78217B)


      1/*
      2 * QEMU Hyper-V VMBus
      3 *
      4 * Copyright (c) 2017-2018 Virtuozzo International GmbH.
      5 *
      6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
      7 * See the COPYING file in the top-level directory.
      8 */
      9
     10#include "qemu/osdep.h"
     11#include "qemu/error-report.h"
     12#include "qemu/main-loop.h"
     13#include "qapi/error.h"
     14#include "migration/vmstate.h"
     15#include "hw/qdev-properties.h"
     16#include "hw/qdev-properties-system.h"
     17#include "hw/hyperv/hyperv.h"
     18#include "hw/hyperv/vmbus.h"
     19#include "hw/hyperv/vmbus-bridge.h"
     20#include "hw/sysbus.h"
     21#include "cpu.h"
     22#include "trace.h"
     23
     24enum {
     25    VMGPADL_INIT,
     26    VMGPADL_ALIVE,
     27    VMGPADL_TEARINGDOWN,
     28    VMGPADL_TORNDOWN,
     29};
     30
     31struct VMBusGpadl {
     32    /* GPADL id */
     33    uint32_t id;
     34    /* associated channel id (rudimentary?) */
     35    uint32_t child_relid;
     36
     37    /* number of pages in the GPADL as declared in GPADL_HEADER message */
     38    uint32_t num_gfns;
     39    /*
     40     * Due to limited message size, GPADL may not fit fully in a single
     41     * GPADL_HEADER message, and is further popluated using GPADL_BODY
     42     * messages.  @seen_gfns is the number of pages seen so far; once it
     43     * reaches @num_gfns, the GPADL is ready to use.
     44     */
     45    uint32_t seen_gfns;
     46    /* array of GFNs (of size @num_gfns once allocated) */
     47    uint64_t *gfns;
     48
     49    uint8_t state;
     50
     51    QTAILQ_ENTRY(VMBusGpadl) link;
     52    VMBus *vmbus;
     53    unsigned refcount;
     54};
     55
     56/*
     57 * Wrap sequential read from / write to GPADL.
     58 */
     59typedef struct GpadlIter {
     60    VMBusGpadl *gpadl;
     61    AddressSpace *as;
     62    DMADirection dir;
     63    /* offset into GPADL where the next i/o will be performed */
     64    uint32_t off;
     65    /*
     66     * Cached mapping of the currently accessed page, up to page boundary.
     67     * Updated lazily on i/o.
     68     * Note: MemoryRegionCache can not be used here because pages in the GPADL
     69     * are non-contiguous and may belong to different memory regions.
     70     */
     71    void *map;
     72    /* offset after last i/o (i.e. not affected by seek) */
     73    uint32_t last_off;
     74    /*
     75     * Indicator that the iterator is active and may have a cached mapping.
     76     * Allows to enforce bracketing of all i/o (which may create cached
     77     * mappings) and thus exclude mapping leaks.
     78     */
     79    bool active;
     80} GpadlIter;
     81
     82/*
     83 * Ring buffer.  There are two of them, sitting in the same GPADL, for each
     84 * channel.
     85 * Each ring buffer consists of a set of pages, with the first page containing
     86 * the ring buffer header, and the remaining pages being for data packets.
     87 */
     88typedef struct VMBusRingBufCommon {
     89    AddressSpace *as;
     90    /* GPA of the ring buffer header */
     91    dma_addr_t rb_addr;
     92    /* start and length of the ring buffer data area within GPADL */
     93    uint32_t base;
     94    uint32_t len;
     95
     96    GpadlIter iter;
     97} VMBusRingBufCommon;
     98
     99typedef struct VMBusSendRingBuf {
    100    VMBusRingBufCommon common;
    101    /* current write index, to be committed at the end of send */
    102    uint32_t wr_idx;
    103    /* write index at the start of send */
    104    uint32_t last_wr_idx;
    105    /* space to be requested from the guest */
    106    uint32_t wanted;
    107    /* space reserved for planned sends */
    108    uint32_t reserved;
    109    /* last seen read index */
    110    uint32_t last_seen_rd_idx;
    111} VMBusSendRingBuf;
    112
    113typedef struct VMBusRecvRingBuf {
    114    VMBusRingBufCommon common;
    115    /* current read index, to be committed at the end of receive */
    116    uint32_t rd_idx;
    117    /* read index at the start of receive */
    118    uint32_t last_rd_idx;
    119    /* last seen write index */
    120    uint32_t last_seen_wr_idx;
    121} VMBusRecvRingBuf;
    122
    123
    124enum {
    125    VMOFFER_INIT,
    126    VMOFFER_SENDING,
    127    VMOFFER_SENT,
    128};
    129
    130enum {
    131    VMCHAN_INIT,
    132    VMCHAN_OPENING,
    133    VMCHAN_OPEN,
    134};
    135
    136struct VMBusChannel {
    137    VMBusDevice *dev;
    138
    139    /* channel id */
    140    uint32_t id;
    141    /*
    142     * subchannel index within the device; subchannel #0 is "primary" and
    143     * always exists
    144     */
    145    uint16_t subchan_idx;
    146    uint32_t open_id;
    147    /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
    148    uint32_t target_vp;
    149    /* GPADL id to use for the ring buffers */
    150    uint32_t ringbuf_gpadl;
    151    /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
    152    uint32_t ringbuf_send_offset;
    153
    154    uint8_t offer_state;
    155    uint8_t state;
    156    bool is_open;
    157
    158    /* main device worker; copied from the device class */
    159    VMBusChannelNotifyCb notify_cb;
    160    /*
    161     * guest->host notifications, either sent directly or dispatched via
    162     * interrupt page (older VMBus)
    163     */
    164    EventNotifier notifier;
    165
    166    VMBus *vmbus;
    167    /*
    168     * SINT route to signal with host->guest notifications; may be shared with
    169     * the main VMBus SINT route
    170     */
    171    HvSintRoute *notify_route;
    172    VMBusGpadl *gpadl;
    173
    174    VMBusSendRingBuf send_ringbuf;
    175    VMBusRecvRingBuf recv_ringbuf;
    176
    177    QTAILQ_ENTRY(VMBusChannel) link;
    178};
    179
    180/*
    181 * Hyper-V spec mandates that every message port has 16 buffers, which means
    182 * that the guest can post up to this many messages without blocking.
    183 * Therefore a queue for incoming messages has to be provided.
    184 * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
    185 * doesn't transition to a new state until the message is known to have been
    186 * successfully delivered to the respective SynIC message slot.
    187 */
    188#define HV_MSG_QUEUE_LEN     16
    189
    190/* Hyper-V devices never use channel #0.  Must be something special. */
    191#define VMBUS_FIRST_CHANID      1
    192/* Each channel occupies one bit within a single event page sint slot. */
    193#define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
    194/* Leave a few connection numbers for other purposes. */
    195#define VMBUS_CHAN_CONNECTION_OFFSET     16
    196
    197/*
    198 * Since the success or failure of sending a message is reported
    199 * asynchronously, the VMBus state machine has effectively two entry points:
    200 * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
    201 * message delivery status becomes known).  Both are run as oneshot BHs on the
    202 * main aio context, ensuring serialization.
    203 */
    204enum {
    205    VMBUS_LISTEN,
    206    VMBUS_HANDSHAKE,
    207    VMBUS_OFFER,
    208    VMBUS_CREATE_GPADL,
    209    VMBUS_TEARDOWN_GPADL,
    210    VMBUS_OPEN_CHANNEL,
    211    VMBUS_UNLOAD,
    212    VMBUS_STATE_MAX
    213};
    214
    215struct VMBus {
    216    BusState parent;
    217
    218    uint8_t state;
    219    /* protection against recursive aio_poll (see vmbus_run) */
    220    bool in_progress;
    221    /* whether there's a message being delivered to the guest */
    222    bool msg_in_progress;
    223    uint32_t version;
    224    /* VP_INDEX of the vCPU to send messages and interrupts to */
    225    uint32_t target_vp;
    226    HvSintRoute *sint_route;
    227    /*
    228     * interrupt page for older protocol versions; newer ones use SynIC event
    229     * flags directly
    230     */
    231    hwaddr int_page_gpa;
    232
    233    DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
    234
    235    /* incoming message queue */
    236    struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
    237    uint8_t rx_queue_head;
    238    uint8_t rx_queue_size;
    239    QemuMutex rx_queue_lock;
    240
    241    QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
    242    QTAILQ_HEAD(, VMBusChannel) channel_list;
    243
    244    /*
    245     * guest->host notifications for older VMBus, to be dispatched via
    246     * interrupt page
    247     */
    248    EventNotifier notifier;
    249};
    250
    251static bool gpadl_full(VMBusGpadl *gpadl)
    252{
    253    return gpadl->seen_gfns == gpadl->num_gfns;
    254}
    255
    256static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
    257                                uint32_t child_relid, uint32_t num_gfns)
    258{
    259    VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
    260
    261    gpadl->id = id;
    262    gpadl->child_relid = child_relid;
    263    gpadl->num_gfns = num_gfns;
    264    gpadl->gfns = g_new(uint64_t, num_gfns);
    265    QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
    266    gpadl->vmbus = vmbus;
    267    gpadl->refcount = 1;
    268    return gpadl;
    269}
    270
    271static void free_gpadl(VMBusGpadl *gpadl)
    272{
    273    QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
    274    g_free(gpadl->gfns);
    275    g_free(gpadl);
    276}
    277
    278static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
    279{
    280    VMBusGpadl *gpadl;
    281    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
    282        if (gpadl->id == gpadl_id) {
    283            return gpadl;
    284        }
    285    }
    286    return NULL;
    287}
    288
    289VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
    290{
    291    VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
    292    if (!gpadl || !gpadl_full(gpadl)) {
    293        return NULL;
    294    }
    295    gpadl->refcount++;
    296    return gpadl;
    297}
    298
    299void vmbus_put_gpadl(VMBusGpadl *gpadl)
    300{
    301    if (!gpadl) {
    302        return;
    303    }
    304    if (--gpadl->refcount) {
    305        return;
    306    }
    307    free_gpadl(gpadl);
    308}
    309
    310uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
    311{
    312    return gpadl->num_gfns * TARGET_PAGE_SIZE;
    313}
    314
    315static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
    316                            AddressSpace *as, DMADirection dir)
    317{
    318    iter->gpadl = gpadl;
    319    iter->as = as;
    320    iter->dir = dir;
    321    iter->active = false;
    322}
    323
    324static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
    325{
    326    uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
    327    uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
    328
    329    /* mapping is only done to do non-zero amount of i/o */
    330    assert(iter->last_off > 0);
    331    assert(map_start_in_page < io_end_in_page);
    332
    333    dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
    334                     iter->dir, io_end_in_page - map_start_in_page);
    335}
    336
    337/*
    338 * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
    339 * The direction of the copy is determined by @iter->dir.
    340 * The caller must ensure the operation overflows neither @buf nor the GPADL
    341 * (there's an assert for the latter).
    342 * Reuse the currently mapped page in the GPADL if possible.
    343 */
    344static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
    345{
    346    ssize_t ret = len;
    347
    348    assert(iter->active);
    349
    350    while (len) {
    351        uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
    352        uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
    353        uint32_t cplen = MIN(pgleft, len);
    354        void *p;
    355
    356        /* try to reuse the cached mapping */
    357        if (iter->map) {
    358            uint32_t map_start_in_page =
    359                (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
    360            uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
    361            uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
    362            if (off_base != mapped_base || off_in_page < map_start_in_page) {
    363                gpadl_iter_cache_unmap(iter);
    364                iter->map = NULL;
    365            }
    366        }
    367
    368        if (!iter->map) {
    369            dma_addr_t maddr;
    370            dma_addr_t mlen = pgleft;
    371            uint32_t idx = iter->off >> TARGET_PAGE_BITS;
    372            assert(idx < iter->gpadl->num_gfns);
    373
    374            maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
    375
    376            iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir);
    377            if (mlen != pgleft) {
    378                dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
    379                iter->map = NULL;
    380                return -EFAULT;
    381            }
    382        }
    383
    384        p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
    385                off_in_page);
    386        if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
    387            memcpy(p, buf, cplen);
    388        } else {
    389            memcpy(buf, p, cplen);
    390        }
    391
    392        buf += cplen;
    393        len -= cplen;
    394        iter->off += cplen;
    395        iter->last_off = iter->off;
    396    }
    397
    398    return ret;
    399}
    400
    401/*
    402 * Position the iterator @iter at new offset @new_off.
    403 * If this results in the cached mapping being unusable with the new offset,
    404 * unmap it.
    405 */
    406static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
    407{
    408    assert(iter->active);
    409    iter->off = new_off;
    410}
    411
    412/*
    413 * Start a series of i/o on the GPADL.
    414 * After this i/o and seek operations on @iter become legal.
    415 */
    416static inline void gpadl_iter_start_io(GpadlIter *iter)
    417{
    418    assert(!iter->active);
    419    /* mapping is cached lazily on i/o */
    420    iter->map = NULL;
    421    iter->active = true;
    422}
    423
    424/*
    425 * End the eariler started series of i/o on the GPADL and release the cached
    426 * mapping if any.
    427 */
    428static inline void gpadl_iter_end_io(GpadlIter *iter)
    429{
    430    assert(iter->active);
    431
    432    if (iter->map) {
    433        gpadl_iter_cache_unmap(iter);
    434    }
    435
    436    iter->active = false;
    437}
    438
    439static void vmbus_resched(VMBus *vmbus);
    440static void vmbus_msg_cb(void *data, int status);
    441
    442ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
    443                           const struct iovec *iov, size_t iov_cnt)
    444{
    445    GpadlIter iter;
    446    size_t i;
    447    ssize_t ret = 0;
    448
    449    gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
    450                    DMA_DIRECTION_FROM_DEVICE);
    451    gpadl_iter_start_io(&iter);
    452    gpadl_iter_seek(&iter, off);
    453    for (i = 0; i < iov_cnt; i++) {
    454        ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
    455        if (ret < 0) {
    456            goto out;
    457        }
    458    }
    459out:
    460    gpadl_iter_end_io(&iter);
    461    return ret;
    462}
    463
    464int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
    465                  unsigned iov_cnt, size_t len, size_t off)
    466{
    467    int ret_cnt = 0, ret;
    468    unsigned i;
    469    QEMUSGList *sgl = &req->sgl;
    470    ScatterGatherEntry *sg = sgl->sg;
    471
    472    for (i = 0; i < sgl->nsg; i++) {
    473        if (sg[i].len > off) {
    474            break;
    475        }
    476        off -= sg[i].len;
    477    }
    478    for (; len && i < sgl->nsg; i++) {
    479        dma_addr_t mlen = MIN(sg[i].len - off, len);
    480        dma_addr_t addr = sg[i].base + off;
    481        len -= mlen;
    482        off = 0;
    483
    484        for (; mlen; ret_cnt++) {
    485            dma_addr_t l = mlen;
    486            dma_addr_t a = addr;
    487
    488            if (ret_cnt == iov_cnt) {
    489                ret = -ENOBUFS;
    490                goto err;
    491            }
    492
    493            iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir);
    494            if (!l) {
    495                ret = -EFAULT;
    496                goto err;
    497            }
    498            iov[ret_cnt].iov_len = l;
    499            addr += l;
    500            mlen -= l;
    501        }
    502    }
    503
    504    return ret_cnt;
    505err:
    506    vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
    507    return ret;
    508}
    509
    510void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
    511                     unsigned iov_cnt, size_t accessed)
    512{
    513    QEMUSGList *sgl = &req->sgl;
    514    unsigned i;
    515
    516    for (i = 0; i < iov_cnt; i++) {
    517        size_t acsd = MIN(accessed, iov[i].iov_len);
    518        dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
    519        accessed -= acsd;
    520    }
    521}
    522
    523static const VMStateDescription vmstate_gpadl = {
    524    .name = "vmbus/gpadl",
    525    .version_id = 0,
    526    .minimum_version_id = 0,
    527    .fields = (VMStateField[]) {
    528        VMSTATE_UINT32(id, VMBusGpadl),
    529        VMSTATE_UINT32(child_relid, VMBusGpadl),
    530        VMSTATE_UINT32(num_gfns, VMBusGpadl),
    531        VMSTATE_UINT32(seen_gfns, VMBusGpadl),
    532        VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
    533                                    vmstate_info_uint64, uint64_t),
    534        VMSTATE_UINT8(state, VMBusGpadl),
    535        VMSTATE_END_OF_LIST()
    536    }
    537};
    538
    539/*
    540 * Wrap the index into a ring buffer of @len bytes.
    541 * @idx is assumed not to exceed twice the size of the ringbuffer, so only
    542 * single wraparound is considered.
    543 */
    544static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
    545{
    546    if (idx >= len) {
    547        idx -= len;
    548    }
    549    return idx;
    550}
    551
    552/*
    553 * Circular difference between two indices into a ring buffer of @len bytes.
    554 * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
    555 * up write index but not vice versa.
    556 */
    557static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
    558                                    bool allow_catchup)
    559{
    560    return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
    561}
    562
    563static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
    564{
    565    vmbus_ring_buffer *rb;
    566    dma_addr_t mlen = sizeof(*rb);
    567
    568    rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
    569                        DMA_DIRECTION_FROM_DEVICE);
    570    if (mlen != sizeof(*rb)) {
    571        dma_memory_unmap(ringbuf->as, rb, mlen,
    572                         DMA_DIRECTION_FROM_DEVICE, 0);
    573        return NULL;
    574    }
    575    return rb;
    576}
    577
    578static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
    579                              vmbus_ring_buffer *rb, bool dirty)
    580{
    581    assert(rb);
    582
    583    dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
    584                     dirty ? sizeof(*rb) : 0);
    585}
    586
    587static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
    588                                AddressSpace *as, DMADirection dir,
    589                                uint32_t begin, uint32_t end)
    590{
    591    ringbuf->as = as;
    592    ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
    593    ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
    594    ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
    595    gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
    596}
    597
    598static int ringbufs_init(VMBusChannel *chan)
    599{
    600    vmbus_ring_buffer *rb;
    601    VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
    602    VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
    603
    604    if (chan->ringbuf_send_offset <= 1 ||
    605        chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
    606        return -EINVAL;
    607    }
    608
    609    ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
    610                        DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
    611    ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
    612                        DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
    613                        chan->gpadl->num_gfns);
    614    send_ringbuf->wanted = 0;
    615    send_ringbuf->reserved = 0;
    616
    617    rb = ringbuf_map_hdr(&recv_ringbuf->common);
    618    if (!rb) {
    619        return -EFAULT;
    620    }
    621    recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
    622    ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
    623
    624    rb = ringbuf_map_hdr(&send_ringbuf->common);
    625    if (!rb) {
    626        return -EFAULT;
    627    }
    628    send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
    629    send_ringbuf->last_seen_rd_idx = rb->read_index;
    630    rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
    631    ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
    632
    633    if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
    634        send_ringbuf->wr_idx >= send_ringbuf->common.len) {
    635        return -EOVERFLOW;
    636    }
    637
    638    return 0;
    639}
    640
    641/*
    642 * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
    643 * around if needed.
    644 * @len is assumed not to exceed the size of the ringbuffer, so only single
    645 * wraparound is considered.
    646 */
    647static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
    648{
    649    ssize_t ret1 = 0, ret2 = 0;
    650    uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
    651
    652    if (len >= remain) {
    653        ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
    654        if (ret1 < 0) {
    655            return ret1;
    656        }
    657        gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
    658        buf += remain;
    659        len -= remain;
    660    }
    661    ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
    662    if (ret2 < 0) {
    663        return ret2;
    664    }
    665    return ret1 + ret2;
    666}
    667
    668/*
    669 * Position the circular iterator within @ringbuf to offset @new_off, wrapping
    670 * around if needed.
    671 * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
    672 * single wraparound is considered.
    673 */
    674static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
    675{
    676    gpadl_iter_seek(&ringbuf->iter,
    677                    ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
    678}
    679
    680static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
    681{
    682    return ringbuf->iter.off - ringbuf->base;
    683}
    684
    685static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
    686{
    687    gpadl_iter_start_io(&ringbuf->iter);
    688}
    689
    690static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
    691{
    692    gpadl_iter_end_io(&ringbuf->iter);
    693}
    694
    695VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
    696{
    697    return chan->dev;
    698}
    699
    700VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
    701{
    702    if (chan_idx >= dev->num_channels) {
    703        return NULL;
    704    }
    705    return &dev->channels[chan_idx];
    706}
    707
    708uint32_t vmbus_channel_idx(VMBusChannel *chan)
    709{
    710    return chan - chan->dev->channels;
    711}
    712
    713void vmbus_channel_notify_host(VMBusChannel *chan)
    714{
    715    event_notifier_set(&chan->notifier);
    716}
    717
    718bool vmbus_channel_is_open(VMBusChannel *chan)
    719{
    720    return chan->is_open;
    721}
    722
    723/*
    724 * Notify the guest side about the data to work on in the channel ring buffer.
    725 * The notification is done by signaling a dedicated per-channel SynIC event
    726 * flag (more recent guests) or setting a bit in the interrupt page and firing
    727 * the VMBus SINT (older guests).
    728 */
    729static int vmbus_channel_notify_guest(VMBusChannel *chan)
    730{
    731    int res = 0;
    732    unsigned long *int_map, mask;
    733    unsigned idx;
    734    hwaddr addr = chan->vmbus->int_page_gpa;
    735    hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
    736
    737    trace_vmbus_channel_notify_guest(chan->id);
    738
    739    if (!addr) {
    740        return hyperv_set_event_flag(chan->notify_route, chan->id);
    741    }
    742
    743    int_map = cpu_physical_memory_map(addr, &len, 1);
    744    if (len != TARGET_PAGE_SIZE / 2) {
    745        res = -ENXIO;
    746        goto unmap;
    747    }
    748
    749    idx = BIT_WORD(chan->id);
    750    mask = BIT_MASK(chan->id);
    751    if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
    752        res = hyperv_sint_route_set_sint(chan->notify_route);
    753        dirty = len;
    754    }
    755
    756unmap:
    757    cpu_physical_memory_unmap(int_map, len, 1, dirty);
    758    return res;
    759}
    760
    761#define VMBUS_PKT_TRAILER      sizeof(uint64_t)
    762
    763static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
    764                                          uint32_t desclen, uint32_t msglen)
    765{
    766    hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
    767        DIV_ROUND_UP(desclen, sizeof(uint64_t));
    768    hdr->len_qwords = hdr->offset_qwords +
    769        DIV_ROUND_UP(msglen, sizeof(uint64_t));
    770    return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
    771}
    772
    773/*
    774 * Simplified ring buffer operation with paired barriers annotations in the
    775 * producer and consumer loops:
    776 *
    777 * producer                           * consumer
    778 * ~~~~~~~~                           * ~~~~~~~~
    779 * write pending_send_sz              * read write_index
    780 * smp_mb                       [A]   * smp_mb                       [C]
    781 * read read_index                    * read packet
    782 * smp_mb                       [B]   * read/write out-of-band data
    783 * read/write out-of-band data        * smp_mb                       [B]
    784 * write packet                       * write read_index
    785 * smp_mb                       [C]   * smp_mb                       [A]
    786 * write write_index                  * read pending_send_sz
    787 * smp_wmb                      [D]   * smp_rmb                      [D]
    788 * write pending_send_sz              * read write_index
    789 * ...                                * ...
    790 */
    791
    792static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
    793{
    794    /* don't trust guest data */
    795    if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
    796        return 0;
    797    }
    798    return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
    799                        ringbuf->common.len, false);
    800}
    801
    802static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
    803{
    804    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
    805    vmbus_ring_buffer *rb;
    806    uint32_t written;
    807
    808    written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
    809                           ringbuf->common.len, true);
    810    if (!written) {
    811        return 0;
    812    }
    813
    814    rb = ringbuf_map_hdr(&ringbuf->common);
    815    if (!rb) {
    816        return -EFAULT;
    817    }
    818
    819    ringbuf->reserved -= written;
    820
    821    /* prevent reorder with the data operation and packet write */
    822    smp_mb();                   /* barrier pair [C] */
    823    rb->write_index = ringbuf->wr_idx;
    824
    825    /*
    826     * If the producer earlier indicated that it wants to be notified when the
    827     * consumer frees certain amount of space in the ring buffer, that amount
    828     * is reduced by the size of the completed write.
    829     */
    830    if (ringbuf->wanted) {
    831        /* otherwise reservation would fail */
    832        assert(ringbuf->wanted < written);
    833        ringbuf->wanted -= written;
    834        /* prevent reorder with write_index write */
    835        smp_wmb();              /* barrier pair [D] */
    836        rb->pending_send_sz = ringbuf->wanted;
    837    }
    838
    839    /* prevent reorder with write_index or pending_send_sz write */
    840    smp_mb();                   /* barrier pair [A] */
    841    ringbuf->last_seen_rd_idx = rb->read_index;
    842
    843    /*
    844     * The consumer may have missed the reduction of pending_send_sz and skip
    845     * notification, so re-check the blocking condition, and, if it's no longer
    846     * true, ensure processing another iteration by simulating consumer's
    847     * notification.
    848     */
    849    if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
    850        vmbus_channel_notify_host(chan);
    851    }
    852
    853    /* skip notification by consumer's request */
    854    if (rb->interrupt_mask) {
    855        goto out;
    856    }
    857
    858    /*
    859     * The consumer hasn't caught up with the producer's previous state so it's
    860     * not blocked.
    861     * (last_seen_rd_idx comes from the guest but it's safe to use w/o
    862     * validation here as it only affects notification.)
    863     */
    864    if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
    865                     ringbuf->common.len, true) > written) {
    866        goto out;
    867    }
    868
    869    vmbus_channel_notify_guest(chan);
    870out:
    871    ringbuf_unmap_hdr(&ringbuf->common, rb, true);
    872    ringbuf->last_wr_idx = ringbuf->wr_idx;
    873    return written;
    874}
    875
    876int vmbus_channel_reserve(VMBusChannel *chan,
    877                          uint32_t desclen, uint32_t msglen)
    878{
    879    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
    880    vmbus_ring_buffer *rb = NULL;
    881    vmbus_packet_hdr hdr;
    882    uint32_t needed = ringbuf->reserved +
    883        vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
    884
    885    /* avoid touching the guest memory if possible */
    886    if (likely(needed <= ringbuf_send_avail(ringbuf))) {
    887        goto success;
    888    }
    889
    890    rb = ringbuf_map_hdr(&ringbuf->common);
    891    if (!rb) {
    892        return -EFAULT;
    893    }
    894
    895    /* fetch read index from guest memory and try again */
    896    ringbuf->last_seen_rd_idx = rb->read_index;
    897
    898    if (likely(needed <= ringbuf_send_avail(ringbuf))) {
    899        goto success;
    900    }
    901
    902    rb->pending_send_sz = needed;
    903
    904    /*
    905     * The consumer may have made progress and freed up some space before
    906     * seeing updated pending_send_sz, so re-read read_index (preventing
    907     * reorder with the pending_send_sz write) and try again.
    908     */
    909    smp_mb();                   /* barrier pair [A] */
    910    ringbuf->last_seen_rd_idx = rb->read_index;
    911
    912    if (needed > ringbuf_send_avail(ringbuf)) {
    913        goto out;
    914    }
    915
    916success:
    917    ringbuf->reserved = needed;
    918    needed = 0;
    919
    920    /* clear pending_send_sz if it was set */
    921    if (ringbuf->wanted) {
    922        if (!rb) {
    923            rb = ringbuf_map_hdr(&ringbuf->common);
    924            if (!rb) {
    925                /* failure to clear pending_send_sz is non-fatal */
    926                goto out;
    927            }
    928        }
    929
    930        rb->pending_send_sz = 0;
    931    }
    932
    933    /* prevent reorder of the following data operation with read_index read */
    934    smp_mb();                   /* barrier pair [B] */
    935
    936out:
    937    if (rb) {
    938        ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
    939    }
    940    ringbuf->wanted = needed;
    941    return needed ? -ENOSPC : 0;
    942}
    943
    944ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
    945                           void *desc, uint32_t desclen,
    946                           void *msg, uint32_t msglen,
    947                           bool need_comp, uint64_t transaction_id)
    948{
    949    ssize_t ret = 0;
    950    vmbus_packet_hdr hdr;
    951    uint32_t totlen;
    952    VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
    953
    954    if (!vmbus_channel_is_open(chan)) {
    955        return -EINVAL;
    956    }
    957
    958    totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
    959    hdr.type = pkt_type;
    960    hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
    961    hdr.transaction_id = transaction_id;
    962
    963    assert(totlen <= ringbuf->reserved);
    964
    965    ringbuf_start_io(&ringbuf->common);
    966    ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
    967    ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
    968    if (ret < 0) {
    969        goto out;
    970    }
    971    if (desclen) {
    972        assert(desc);
    973        ret = ringbuf_io(&ringbuf->common, desc, desclen);
    974        if (ret < 0) {
    975            goto out;
    976        }
    977        ringbuf_seek(&ringbuf->common,
    978                     ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
    979    }
    980    ret = ringbuf_io(&ringbuf->common, msg, msglen);
    981    if (ret < 0) {
    982        goto out;
    983    }
    984    ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
    985    ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
    986    ret = 0;
    987out:
    988    ringbuf_end_io(&ringbuf->common);
    989    if (ret) {
    990        return ret;
    991    }
    992    return ringbuf_send_update_idx(chan);
    993}
    994
    995ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
    996                                      void *msg, uint32_t msglen)
    997{
    998    assert(req->need_comp);
    999    return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
   1000                              msg, msglen, false, req->transaction_id);
   1001}
   1002
   1003static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
   1004                               VMBusRingBufCommon *ringbuf, uint32_t len)
   1005{
   1006    int ret;
   1007    vmbus_pkt_gpa_direct hdr;
   1008    hwaddr curaddr = 0;
   1009    hwaddr curlen = 0;
   1010    int num;
   1011
   1012    if (len < sizeof(hdr)) {
   1013        return -EIO;
   1014    }
   1015    ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
   1016    if (ret < 0) {
   1017        return ret;
   1018    }
   1019    len -= sizeof(hdr);
   1020
   1021    num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
   1022    if (num < 0) {
   1023        return -EIO;
   1024    }
   1025    qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
   1026
   1027    for (; hdr.rangecount; hdr.rangecount--) {
   1028        vmbus_gpa_range range;
   1029
   1030        if (len < sizeof(range)) {
   1031            goto eio;
   1032        }
   1033        ret = ringbuf_io(ringbuf, &range, sizeof(range));
   1034        if (ret < 0) {
   1035            goto err;
   1036        }
   1037        len -= sizeof(range);
   1038
   1039        if (range.byte_offset & TARGET_PAGE_MASK) {
   1040            goto eio;
   1041        }
   1042
   1043        for (; range.byte_count; range.byte_offset = 0) {
   1044            uint64_t paddr;
   1045            uint32_t plen = MIN(range.byte_count,
   1046                                TARGET_PAGE_SIZE - range.byte_offset);
   1047
   1048            if (len < sizeof(uint64_t)) {
   1049                goto eio;
   1050            }
   1051            ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
   1052            if (ret < 0) {
   1053                goto err;
   1054            }
   1055            len -= sizeof(uint64_t);
   1056            paddr <<= TARGET_PAGE_BITS;
   1057            paddr |= range.byte_offset;
   1058            range.byte_count -= plen;
   1059
   1060            if (curaddr + curlen == paddr) {
   1061                /* consecutive fragments - join */
   1062                curlen += plen;
   1063            } else {
   1064                if (curlen) {
   1065                    qemu_sglist_add(sgl, curaddr, curlen);
   1066                }
   1067
   1068                curaddr = paddr;
   1069                curlen = plen;
   1070            }
   1071        }
   1072    }
   1073
   1074    if (curlen) {
   1075        qemu_sglist_add(sgl, curaddr, curlen);
   1076    }
   1077
   1078    return 0;
   1079eio:
   1080    ret = -EIO;
   1081err:
   1082    qemu_sglist_destroy(sgl);
   1083    return ret;
   1084}
   1085
   1086static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
   1087                                     uint32_t size, uint16_t pkt_type,
   1088                                     uint32_t msglen, uint64_t transaction_id,
   1089                                     bool need_comp)
   1090{
   1091    VMBusChanReq *req;
   1092    uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
   1093    uint32_t totlen = msgoff + msglen;
   1094
   1095    req = g_malloc0(totlen);
   1096    req->chan = chan;
   1097    req->pkt_type = pkt_type;
   1098    req->msg = (void *)req + msgoff;
   1099    req->msglen = msglen;
   1100    req->transaction_id = transaction_id;
   1101    req->need_comp = need_comp;
   1102    return req;
   1103}
   1104
   1105int vmbus_channel_recv_start(VMBusChannel *chan)
   1106{
   1107    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1108    vmbus_ring_buffer *rb;
   1109
   1110    rb = ringbuf_map_hdr(&ringbuf->common);
   1111    if (!rb) {
   1112        return -EFAULT;
   1113    }
   1114    ringbuf->last_seen_wr_idx = rb->write_index;
   1115    ringbuf_unmap_hdr(&ringbuf->common, rb, false);
   1116
   1117    if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
   1118        return -EOVERFLOW;
   1119    }
   1120
   1121    /* prevent reorder of the following data operation with write_index read */
   1122    smp_mb();                   /* barrier pair [C] */
   1123    return 0;
   1124}
   1125
   1126void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
   1127{
   1128    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1129    vmbus_packet_hdr hdr = {};
   1130    VMBusChanReq *req;
   1131    uint32_t avail;
   1132    uint32_t totlen, pktlen, msglen, msgoff, desclen;
   1133
   1134    assert(size >= sizeof(*req));
   1135
   1136    /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
   1137    avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
   1138                         ringbuf->common.len, true);
   1139    if (avail < sizeof(hdr)) {
   1140        return NULL;
   1141    }
   1142
   1143    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
   1144    if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
   1145        return NULL;
   1146    }
   1147
   1148    pktlen = hdr.len_qwords * sizeof(uint64_t);
   1149    totlen = pktlen + VMBUS_PKT_TRAILER;
   1150    if (totlen > avail) {
   1151        return NULL;
   1152    }
   1153
   1154    msgoff = hdr.offset_qwords * sizeof(uint64_t);
   1155    if (msgoff > pktlen || msgoff < sizeof(hdr)) {
   1156        error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
   1157        return NULL;
   1158    }
   1159
   1160    msglen = pktlen - msgoff;
   1161
   1162    req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
   1163                          hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
   1164
   1165    switch (hdr.type) {
   1166    case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
   1167        desclen = msgoff - sizeof(hdr);
   1168        if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
   1169                                desclen) < 0) {
   1170            error_report("%s: failed to convert GPA ranges to SGL", __func__);
   1171            goto free_req;
   1172        }
   1173        break;
   1174    case VMBUS_PACKET_DATA_INBAND:
   1175    case VMBUS_PACKET_COMP:
   1176        break;
   1177    default:
   1178        error_report("%s: unexpected msg type: %x", __func__, hdr.type);
   1179        goto free_req;
   1180    }
   1181
   1182    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
   1183    if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
   1184        goto free_req;
   1185    }
   1186    ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
   1187
   1188    return req;
   1189free_req:
   1190    vmbus_free_req(req);
   1191    return NULL;
   1192}
   1193
   1194void vmbus_channel_recv_pop(VMBusChannel *chan)
   1195{
   1196    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1197    ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
   1198}
   1199
   1200ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
   1201{
   1202    VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1203    vmbus_ring_buffer *rb;
   1204    uint32_t read;
   1205
   1206    read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
   1207                        ringbuf->common.len, true);
   1208    if (!read) {
   1209        return 0;
   1210    }
   1211
   1212    rb = ringbuf_map_hdr(&ringbuf->common);
   1213    if (!rb) {
   1214        return -EFAULT;
   1215    }
   1216
   1217    /* prevent reorder with the data operation and packet read */
   1218    smp_mb();                   /* barrier pair [B] */
   1219    rb->read_index = ringbuf->rd_idx;
   1220
   1221    /* prevent reorder of the following pending_send_sz read */
   1222    smp_mb();                   /* barrier pair [A] */
   1223
   1224    if (rb->interrupt_mask) {
   1225        goto out;
   1226    }
   1227
   1228    if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
   1229        uint32_t wr_idx, wr_avail;
   1230        uint32_t wanted = rb->pending_send_sz;
   1231
   1232        if (!wanted) {
   1233            goto out;
   1234        }
   1235
   1236        /* prevent reorder with pending_send_sz read */
   1237        smp_rmb();              /* barrier pair [D] */
   1238        wr_idx = rb->write_index;
   1239
   1240        wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
   1241                                true);
   1242
   1243        /* the producer wasn't blocked on the consumer state */
   1244        if (wr_avail >= read + wanted) {
   1245            goto out;
   1246        }
   1247        /* there's not enough space for the producer to make progress */
   1248        if (wr_avail < wanted) {
   1249            goto out;
   1250        }
   1251    }
   1252
   1253    vmbus_channel_notify_guest(chan);
   1254out:
   1255    ringbuf_unmap_hdr(&ringbuf->common, rb, true);
   1256    ringbuf->last_rd_idx = ringbuf->rd_idx;
   1257    return read;
   1258}
   1259
   1260void vmbus_free_req(void *req)
   1261{
   1262    VMBusChanReq *r = req;
   1263
   1264    if (!req) {
   1265        return;
   1266    }
   1267
   1268    if (r->sgl.dev) {
   1269        qemu_sglist_destroy(&r->sgl);
   1270    }
   1271    g_free(req);
   1272}
   1273
   1274static const VMStateDescription vmstate_sgent = {
   1275    .name = "vmbus/sgentry",
   1276    .version_id = 0,
   1277    .minimum_version_id = 0,
   1278    .fields = (VMStateField[]) {
   1279        VMSTATE_UINT64(base, ScatterGatherEntry),
   1280        VMSTATE_UINT64(len, ScatterGatherEntry),
   1281        VMSTATE_END_OF_LIST()
   1282    }
   1283};
   1284
   1285typedef struct VMBusChanReqSave {
   1286    uint16_t chan_idx;
   1287    uint16_t pkt_type;
   1288    uint32_t msglen;
   1289    void *msg;
   1290    uint64_t transaction_id;
   1291    bool need_comp;
   1292    uint32_t num;
   1293    ScatterGatherEntry *sgl;
   1294} VMBusChanReqSave;
   1295
   1296static const VMStateDescription vmstate_vmbus_chan_req = {
   1297    .name = "vmbus/vmbus_chan_req",
   1298    .version_id = 0,
   1299    .minimum_version_id = 0,
   1300    .fields = (VMStateField[]) {
   1301        VMSTATE_UINT16(chan_idx, VMBusChanReqSave),
   1302        VMSTATE_UINT16(pkt_type, VMBusChanReqSave),
   1303        VMSTATE_UINT32(msglen, VMBusChanReqSave),
   1304        VMSTATE_VBUFFER_ALLOC_UINT32(msg, VMBusChanReqSave, 0, NULL, msglen),
   1305        VMSTATE_UINT64(transaction_id, VMBusChanReqSave),
   1306        VMSTATE_BOOL(need_comp, VMBusChanReqSave),
   1307        VMSTATE_UINT32(num, VMBusChanReqSave),
   1308        VMSTATE_STRUCT_VARRAY_POINTER_UINT32(sgl, VMBusChanReqSave, num,
   1309                                             vmstate_sgent, ScatterGatherEntry),
   1310        VMSTATE_END_OF_LIST()
   1311    }
   1312};
   1313
   1314void vmbus_save_req(QEMUFile *f, VMBusChanReq *req)
   1315{
   1316    VMBusChanReqSave req_save;
   1317
   1318    req_save.chan_idx = req->chan->subchan_idx;
   1319    req_save.pkt_type = req->pkt_type;
   1320    req_save.msglen = req->msglen;
   1321    req_save.msg = req->msg;
   1322    req_save.transaction_id = req->transaction_id;
   1323    req_save.need_comp = req->need_comp;
   1324    req_save.num = req->sgl.nsg;
   1325    req_save.sgl = g_memdup(req->sgl.sg,
   1326                            req_save.num * sizeof(ScatterGatherEntry));
   1327
   1328    vmstate_save_state(f, &vmstate_vmbus_chan_req, &req_save, NULL);
   1329
   1330    g_free(req_save.sgl);
   1331}
   1332
   1333void *vmbus_load_req(QEMUFile *f, VMBusDevice *dev, uint32_t size)
   1334{
   1335    VMBusChanReqSave req_save;
   1336    VMBusChanReq *req = NULL;
   1337    VMBusChannel *chan = NULL;
   1338    uint32_t i;
   1339
   1340    vmstate_load_state(f, &vmstate_vmbus_chan_req, &req_save, 0);
   1341
   1342    if (req_save.chan_idx >= dev->num_channels) {
   1343        error_report("%s: %u(chan_idx) > %u(num_channels)", __func__,
   1344                     req_save.chan_idx, dev->num_channels);
   1345        goto out;
   1346    }
   1347    chan = &dev->channels[req_save.chan_idx];
   1348
   1349    if (vmbus_channel_reserve(chan, 0, req_save.msglen)) {
   1350        goto out;
   1351    }
   1352
   1353    req = vmbus_alloc_req(chan, size, req_save.pkt_type, req_save.msglen,
   1354                          req_save.transaction_id, req_save.need_comp);
   1355    if (req_save.msglen) {
   1356        memcpy(req->msg, req_save.msg, req_save.msglen);
   1357    }
   1358
   1359    for (i = 0; i < req_save.num; i++) {
   1360        qemu_sglist_add(&req->sgl, req_save.sgl[i].base, req_save.sgl[i].len);
   1361    }
   1362
   1363out:
   1364    if (req_save.msglen) {
   1365        g_free(req_save.msg);
   1366    }
   1367    if (req_save.num) {
   1368        g_free(req_save.sgl);
   1369    }
   1370    return req;
   1371}
   1372
   1373static void channel_event_cb(EventNotifier *e)
   1374{
   1375    VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
   1376    if (event_notifier_test_and_clear(e)) {
   1377        /*
   1378         * All receives are supposed to happen within the device worker, so
   1379         * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
   1380         * potentially reuse the cached mapping throughout the worker.
   1381         * Can't do this for sends as they may happen outside the device
   1382         * worker.
   1383         */
   1384        VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1385        ringbuf_start_io(&ringbuf->common);
   1386        chan->notify_cb(chan);
   1387        ringbuf_end_io(&ringbuf->common);
   1388
   1389    }
   1390}
   1391
   1392static int alloc_chan_id(VMBus *vmbus)
   1393{
   1394    int ret;
   1395
   1396    ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
   1397    if (ret == VMBUS_CHANID_COUNT) {
   1398        return -ENOMEM;
   1399    }
   1400    return ret + VMBUS_FIRST_CHANID;
   1401}
   1402
   1403static int register_chan_id(VMBusChannel *chan)
   1404{
   1405    return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
   1406                            chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
   1407}
   1408
   1409static void unregister_chan_id(VMBusChannel *chan)
   1410{
   1411    clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
   1412}
   1413
   1414static uint32_t chan_connection_id(VMBusChannel *chan)
   1415{
   1416    return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
   1417}
   1418
   1419static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
   1420                         VMBusChannel *chan, uint16_t idx, Error **errp)
   1421{
   1422    int res;
   1423
   1424    chan->dev = dev;
   1425    chan->notify_cb = vdc->chan_notify_cb;
   1426    chan->subchan_idx = idx;
   1427    chan->vmbus = vmbus;
   1428
   1429    res = alloc_chan_id(vmbus);
   1430    if (res < 0) {
   1431        error_setg(errp, "no spare channel id");
   1432        return;
   1433    }
   1434    chan->id = res;
   1435    register_chan_id(chan);
   1436
   1437    /*
   1438     * The guest drivers depend on the device subchannels (idx #1+) to be
   1439     * offered after the primary channel (idx #0) of that device.  To ensure
   1440     * that, record the channels on the channel list in the order they appear
   1441     * within the device.
   1442     */
   1443    QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
   1444}
   1445
   1446static void deinit_channel(VMBusChannel *chan)
   1447{
   1448    assert(chan->state == VMCHAN_INIT);
   1449    QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
   1450    unregister_chan_id(chan);
   1451}
   1452
   1453static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
   1454{
   1455    uint16_t i;
   1456    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
   1457    Error *err = NULL;
   1458
   1459    dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
   1460    if (dev->num_channels < 1) {
   1461        error_setg(errp, "invalid #channels: %u", dev->num_channels);
   1462        return;
   1463    }
   1464
   1465    dev->channels = g_new0(VMBusChannel, dev->num_channels);
   1466    for (i = 0; i < dev->num_channels; i++) {
   1467        init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
   1468        if (err) {
   1469            goto err_init;
   1470        }
   1471    }
   1472
   1473    return;
   1474
   1475err_init:
   1476    while (i--) {
   1477        deinit_channel(&dev->channels[i]);
   1478    }
   1479    error_propagate(errp, err);
   1480}
   1481
   1482static void free_channels(VMBusDevice *dev)
   1483{
   1484    uint16_t i;
   1485    for (i = 0; i < dev->num_channels; i++) {
   1486        deinit_channel(&dev->channels[i]);
   1487    }
   1488    g_free(dev->channels);
   1489}
   1490
   1491static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
   1492{
   1493    VMBusChannel *chan;
   1494
   1495    if (vp_index == vmbus->target_vp) {
   1496        hyperv_sint_route_ref(vmbus->sint_route);
   1497        return vmbus->sint_route;
   1498    }
   1499
   1500    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1501        if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
   1502            hyperv_sint_route_ref(chan->notify_route);
   1503            return chan->notify_route;
   1504        }
   1505    }
   1506
   1507    return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
   1508}
   1509
   1510static void open_channel(VMBusChannel *chan)
   1511{
   1512    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
   1513
   1514    chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
   1515    if (!chan->gpadl) {
   1516        return;
   1517    }
   1518
   1519    if (ringbufs_init(chan)) {
   1520        goto put_gpadl;
   1521    }
   1522
   1523    if (event_notifier_init(&chan->notifier, 0)) {
   1524        goto put_gpadl;
   1525    }
   1526
   1527    event_notifier_set_handler(&chan->notifier, channel_event_cb);
   1528
   1529    if (hyperv_set_event_flag_handler(chan_connection_id(chan),
   1530                                      &chan->notifier)) {
   1531        goto cleanup_notifier;
   1532    }
   1533
   1534    chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
   1535    if (!chan->notify_route) {
   1536        goto clear_event_flag_handler;
   1537    }
   1538
   1539    if (vdc->open_channel && vdc->open_channel(chan)) {
   1540        goto unref_sint_route;
   1541    }
   1542
   1543    chan->is_open = true;
   1544    return;
   1545
   1546unref_sint_route:
   1547    hyperv_sint_route_unref(chan->notify_route);
   1548clear_event_flag_handler:
   1549    hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
   1550cleanup_notifier:
   1551    event_notifier_set_handler(&chan->notifier, NULL);
   1552    event_notifier_cleanup(&chan->notifier);
   1553put_gpadl:
   1554    vmbus_put_gpadl(chan->gpadl);
   1555}
   1556
   1557static void close_channel(VMBusChannel *chan)
   1558{
   1559    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
   1560
   1561    if (!chan->is_open) {
   1562        return;
   1563    }
   1564
   1565    if (vdc->close_channel) {
   1566        vdc->close_channel(chan);
   1567    }
   1568
   1569    hyperv_sint_route_unref(chan->notify_route);
   1570    hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
   1571    event_notifier_set_handler(&chan->notifier, NULL);
   1572    event_notifier_cleanup(&chan->notifier);
   1573    vmbus_put_gpadl(chan->gpadl);
   1574    chan->is_open = false;
   1575}
   1576
   1577static int channel_post_load(void *opaque, int version_id)
   1578{
   1579    VMBusChannel *chan = opaque;
   1580
   1581    return register_chan_id(chan);
   1582}
   1583
   1584static const VMStateDescription vmstate_channel = {
   1585    .name = "vmbus/channel",
   1586    .version_id = 0,
   1587    .minimum_version_id = 0,
   1588    .post_load = channel_post_load,
   1589    .fields = (VMStateField[]) {
   1590        VMSTATE_UINT32(id, VMBusChannel),
   1591        VMSTATE_UINT16(subchan_idx, VMBusChannel),
   1592        VMSTATE_UINT32(open_id, VMBusChannel),
   1593        VMSTATE_UINT32(target_vp, VMBusChannel),
   1594        VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
   1595        VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
   1596        VMSTATE_UINT8(offer_state, VMBusChannel),
   1597        VMSTATE_UINT8(state, VMBusChannel),
   1598        VMSTATE_END_OF_LIST()
   1599    }
   1600};
   1601
   1602static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
   1603{
   1604    VMBusChannel *chan;
   1605    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1606        if (chan->id == id) {
   1607            return chan;
   1608        }
   1609    }
   1610    return NULL;
   1611}
   1612
   1613static int enqueue_incoming_message(VMBus *vmbus,
   1614                                    const struct hyperv_post_message_input *msg)
   1615{
   1616    int ret = 0;
   1617    uint8_t idx, prev_size;
   1618
   1619    qemu_mutex_lock(&vmbus->rx_queue_lock);
   1620
   1621    if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
   1622        ret = -ENOBUFS;
   1623        goto out;
   1624    }
   1625
   1626    prev_size = vmbus->rx_queue_size;
   1627    idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
   1628    memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
   1629    vmbus->rx_queue_size++;
   1630
   1631    /* only need to resched if the queue was empty before */
   1632    if (!prev_size) {
   1633        vmbus_resched(vmbus);
   1634    }
   1635out:
   1636    qemu_mutex_unlock(&vmbus->rx_queue_lock);
   1637    return ret;
   1638}
   1639
   1640static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
   1641                                   void *data)
   1642{
   1643    VMBus *vmbus = data;
   1644    struct vmbus_message_header *vmbus_msg;
   1645
   1646    if (msg->message_type != HV_MESSAGE_VMBUS) {
   1647        return HV_STATUS_INVALID_HYPERCALL_INPUT;
   1648    }
   1649
   1650    if (msg->payload_size < sizeof(struct vmbus_message_header)) {
   1651        return HV_STATUS_INVALID_HYPERCALL_INPUT;
   1652    }
   1653
   1654    vmbus_msg = (struct vmbus_message_header *)msg->payload;
   1655
   1656    trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
   1657
   1658    if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
   1659        vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
   1660        error_report("vmbus: unknown message type %#x",
   1661                     vmbus_msg->message_type);
   1662        return HV_STATUS_INVALID_HYPERCALL_INPUT;
   1663    }
   1664
   1665    if (enqueue_incoming_message(vmbus, msg)) {
   1666        return HV_STATUS_INSUFFICIENT_BUFFERS;
   1667    }
   1668    return HV_STATUS_SUCCESS;
   1669}
   1670
   1671static bool vmbus_initialized(VMBus *vmbus)
   1672{
   1673    return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
   1674}
   1675
   1676static void vmbus_reset_all(VMBus *vmbus)
   1677{
   1678    qbus_reset_all(BUS(vmbus));
   1679}
   1680
   1681static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
   1682{
   1683    int ret;
   1684    struct hyperv_message msg = {
   1685        .header.message_type = HV_MESSAGE_VMBUS,
   1686    };
   1687
   1688    assert(!vmbus->msg_in_progress);
   1689    assert(msglen <= sizeof(msg.payload));
   1690    assert(msglen >= sizeof(struct vmbus_message_header));
   1691
   1692    vmbus->msg_in_progress = true;
   1693
   1694    trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
   1695                         msglen);
   1696
   1697    memcpy(msg.payload, msgdata, msglen);
   1698    msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
   1699
   1700    ret = hyperv_post_msg(vmbus->sint_route, &msg);
   1701    if (ret == 0 || ret == -EAGAIN) {
   1702        return;
   1703    }
   1704
   1705    error_report("message delivery fatal failure: %d; aborting vmbus", ret);
   1706    vmbus_reset_all(vmbus);
   1707}
   1708
   1709static int vmbus_init(VMBus *vmbus)
   1710{
   1711    if (vmbus->target_vp != (uint32_t)-1) {
   1712        vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
   1713                                                  vmbus_msg_cb, vmbus);
   1714        if (!vmbus->sint_route) {
   1715            error_report("failed to set up SINT route");
   1716            return -ENOMEM;
   1717        }
   1718    }
   1719    return 0;
   1720}
   1721
   1722static void vmbus_deinit(VMBus *vmbus)
   1723{
   1724    VMBusGpadl *gpadl, *tmp_gpadl;
   1725    VMBusChannel *chan;
   1726
   1727    QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
   1728        if (gpadl->state == VMGPADL_TORNDOWN) {
   1729            continue;
   1730        }
   1731        vmbus_put_gpadl(gpadl);
   1732    }
   1733
   1734    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1735        chan->offer_state = VMOFFER_INIT;
   1736    }
   1737
   1738    hyperv_sint_route_unref(vmbus->sint_route);
   1739    vmbus->sint_route = NULL;
   1740    vmbus->int_page_gpa = 0;
   1741    vmbus->target_vp = (uint32_t)-1;
   1742    vmbus->version = 0;
   1743    vmbus->state = VMBUS_LISTEN;
   1744    vmbus->msg_in_progress = false;
   1745}
   1746
   1747static void handle_initiate_contact(VMBus *vmbus,
   1748                                    vmbus_message_initiate_contact *msg,
   1749                                    uint32_t msglen)
   1750{
   1751    if (msglen < sizeof(*msg)) {
   1752        return;
   1753    }
   1754
   1755    trace_vmbus_initiate_contact(msg->version_requested >> 16,
   1756                                 msg->version_requested & 0xffff,
   1757                                 msg->target_vcpu, msg->monitor_page1,
   1758                                 msg->monitor_page2, msg->interrupt_page);
   1759
   1760    /*
   1761     * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
   1762     * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
   1763     * before handing over to OS loader.
   1764     */
   1765    vmbus_reset_all(vmbus);
   1766
   1767    vmbus->target_vp = msg->target_vcpu;
   1768    vmbus->version = msg->version_requested;
   1769    if (vmbus->version < VMBUS_VERSION_WIN8) {
   1770        /* linux passes interrupt page even when it doesn't need it */
   1771        vmbus->int_page_gpa = msg->interrupt_page;
   1772    }
   1773    vmbus->state = VMBUS_HANDSHAKE;
   1774
   1775    if (vmbus_init(vmbus)) {
   1776        error_report("failed to init vmbus; aborting");
   1777        vmbus_deinit(vmbus);
   1778        return;
   1779    }
   1780}
   1781
   1782static void send_handshake(VMBus *vmbus)
   1783{
   1784    struct vmbus_message_version_response msg = {
   1785        .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
   1786        .version_supported = vmbus_initialized(vmbus),
   1787    };
   1788
   1789    post_msg(vmbus, &msg, sizeof(msg));
   1790}
   1791
   1792static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
   1793{
   1794    VMBusChannel *chan;
   1795
   1796    if (!vmbus_initialized(vmbus)) {
   1797        return;
   1798    }
   1799
   1800    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1801        if (chan->offer_state == VMOFFER_INIT) {
   1802            chan->offer_state = VMOFFER_SENDING;
   1803            break;
   1804        }
   1805    }
   1806
   1807    vmbus->state = VMBUS_OFFER;
   1808}
   1809
   1810static void send_offer(VMBus *vmbus)
   1811{
   1812    VMBusChannel *chan;
   1813    struct vmbus_message_header alloffers_msg = {
   1814        .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
   1815    };
   1816
   1817    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1818        if (chan->offer_state == VMOFFER_SENDING) {
   1819            VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
   1820            /* Hyper-V wants LE GUIDs */
   1821            QemuUUID classid = qemu_uuid_bswap(vdc->classid);
   1822            QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
   1823            struct vmbus_message_offer_channel msg = {
   1824                .header.message_type = VMBUS_MSG_OFFERCHANNEL,
   1825                .child_relid = chan->id,
   1826                .connection_id = chan_connection_id(chan),
   1827                .channel_flags = vdc->channel_flags,
   1828                .mmio_size_mb = vdc->mmio_size_mb,
   1829                .sub_channel_index = vmbus_channel_idx(chan),
   1830                .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
   1831            };
   1832
   1833            memcpy(msg.type_uuid, &classid, sizeof(classid));
   1834            memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
   1835
   1836            trace_vmbus_send_offer(chan->id, chan->dev);
   1837
   1838            post_msg(vmbus, &msg, sizeof(msg));
   1839            return;
   1840        }
   1841    }
   1842
   1843    /* no more offers, send terminator message */
   1844    trace_vmbus_terminate_offers();
   1845    post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
   1846}
   1847
   1848static bool complete_offer(VMBus *vmbus)
   1849{
   1850    VMBusChannel *chan;
   1851
   1852    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1853        if (chan->offer_state == VMOFFER_SENDING) {
   1854            chan->offer_state = VMOFFER_SENT;
   1855            goto next_offer;
   1856        }
   1857    }
   1858    /*
   1859     * no transitioning channels found so this is completing the terminator
   1860     * message, and vmbus can move to the next state
   1861     */
   1862    return true;
   1863
   1864next_offer:
   1865    /* try to mark another channel for offering */
   1866    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1867        if (chan->offer_state == VMOFFER_INIT) {
   1868            chan->offer_state = VMOFFER_SENDING;
   1869            break;
   1870        }
   1871    }
   1872    /*
   1873     * if an offer has been sent there are more offers or the terminator yet to
   1874     * send, so no state transition for vmbus
   1875     */
   1876    return false;
   1877}
   1878
   1879
   1880static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
   1881                                uint32_t msglen)
   1882{
   1883    VMBusGpadl *gpadl;
   1884    uint32_t num_gfns, i;
   1885
   1886    /* must include at least one gpa range */
   1887    if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
   1888        !vmbus_initialized(vmbus)) {
   1889        return;
   1890    }
   1891
   1892    num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
   1893               sizeof(msg->range[0].pfn_array[0]);
   1894
   1895    trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
   1896
   1897    /*
   1898     * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
   1899     * ranges each with arbitrary size and alignment.  However in practice only
   1900     * single-range page-aligned GPADLs have been observed so just ignore
   1901     * anything else and simplify things greatly.
   1902     */
   1903    if (msg->rangecount != 1 || msg->range[0].byte_offset ||
   1904        (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
   1905        return;
   1906    }
   1907
   1908    /* ignore requests to create already existing GPADLs */
   1909    if (find_gpadl(vmbus, msg->gpadl_id)) {
   1910        return;
   1911    }
   1912
   1913    gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
   1914
   1915    for (i = 0; i < num_gfns &&
   1916         (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
   1917         i++) {
   1918        gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
   1919    }
   1920
   1921    if (gpadl_full(gpadl)) {
   1922        vmbus->state = VMBUS_CREATE_GPADL;
   1923    }
   1924}
   1925
   1926static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
   1927                              uint32_t msglen)
   1928{
   1929    VMBusGpadl *gpadl;
   1930    uint32_t num_gfns_left, i;
   1931
   1932    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
   1933        return;
   1934    }
   1935
   1936    trace_vmbus_gpadl_body(msg->gpadl_id);
   1937
   1938    gpadl = find_gpadl(vmbus, msg->gpadl_id);
   1939    if (!gpadl) {
   1940        return;
   1941    }
   1942
   1943    num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
   1944    assert(num_gfns_left);
   1945
   1946    for (i = 0; i < num_gfns_left &&
   1947         (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
   1948        gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
   1949    }
   1950
   1951    if (gpadl_full(gpadl)) {
   1952        vmbus->state = VMBUS_CREATE_GPADL;
   1953    }
   1954}
   1955
   1956static void send_create_gpadl(VMBus *vmbus)
   1957{
   1958    VMBusGpadl *gpadl;
   1959
   1960    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   1961        if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
   1962            struct vmbus_message_gpadl_created msg = {
   1963                .header.message_type = VMBUS_MSG_GPADL_CREATED,
   1964                .gpadl_id = gpadl->id,
   1965                .child_relid = gpadl->child_relid,
   1966            };
   1967
   1968            trace_vmbus_gpadl_created(gpadl->id);
   1969            post_msg(vmbus, &msg, sizeof(msg));
   1970            return;
   1971        }
   1972    }
   1973
   1974    assert(false);
   1975}
   1976
   1977static bool complete_create_gpadl(VMBus *vmbus)
   1978{
   1979    VMBusGpadl *gpadl;
   1980
   1981    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   1982        if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
   1983            gpadl->state = VMGPADL_ALIVE;
   1984
   1985            return true;
   1986        }
   1987    }
   1988
   1989    assert(false);
   1990    return false;
   1991}
   1992
   1993static void handle_gpadl_teardown(VMBus *vmbus,
   1994                                  vmbus_message_gpadl_teardown *msg,
   1995                                  uint32_t msglen)
   1996{
   1997    VMBusGpadl *gpadl;
   1998
   1999    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
   2000        return;
   2001    }
   2002
   2003    trace_vmbus_gpadl_teardown(msg->gpadl_id);
   2004
   2005    gpadl = find_gpadl(vmbus, msg->gpadl_id);
   2006    if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
   2007        return;
   2008    }
   2009
   2010    gpadl->state = VMGPADL_TEARINGDOWN;
   2011    vmbus->state = VMBUS_TEARDOWN_GPADL;
   2012}
   2013
   2014static void send_teardown_gpadl(VMBus *vmbus)
   2015{
   2016    VMBusGpadl *gpadl;
   2017
   2018    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   2019        if (gpadl->state == VMGPADL_TEARINGDOWN) {
   2020            struct vmbus_message_gpadl_torndown msg = {
   2021                .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
   2022                .gpadl_id = gpadl->id,
   2023            };
   2024
   2025            trace_vmbus_gpadl_torndown(gpadl->id);
   2026            post_msg(vmbus, &msg, sizeof(msg));
   2027            return;
   2028        }
   2029    }
   2030
   2031    assert(false);
   2032}
   2033
   2034static bool complete_teardown_gpadl(VMBus *vmbus)
   2035{
   2036    VMBusGpadl *gpadl;
   2037
   2038    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   2039        if (gpadl->state == VMGPADL_TEARINGDOWN) {
   2040            gpadl->state = VMGPADL_TORNDOWN;
   2041            vmbus_put_gpadl(gpadl);
   2042            return true;
   2043        }
   2044    }
   2045
   2046    assert(false);
   2047    return false;
   2048}
   2049
   2050static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
   2051                                uint32_t msglen)
   2052{
   2053    VMBusChannel *chan;
   2054
   2055    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
   2056        return;
   2057    }
   2058
   2059    trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
   2060                             msg->target_vp);
   2061    chan = find_channel(vmbus, msg->child_relid);
   2062    if (!chan || chan->state != VMCHAN_INIT) {
   2063        return;
   2064    }
   2065
   2066    chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
   2067    chan->ringbuf_send_offset = msg->ring_buffer_offset;
   2068    chan->target_vp = msg->target_vp;
   2069    chan->open_id = msg->open_id;
   2070
   2071    open_channel(chan);
   2072
   2073    chan->state = VMCHAN_OPENING;
   2074    vmbus->state = VMBUS_OPEN_CHANNEL;
   2075}
   2076
   2077static void send_open_channel(VMBus *vmbus)
   2078{
   2079    VMBusChannel *chan;
   2080
   2081    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2082        if (chan->state == VMCHAN_OPENING) {
   2083            struct vmbus_message_open_result msg = {
   2084                .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
   2085                .child_relid = chan->id,
   2086                .open_id = chan->open_id,
   2087                .status = !vmbus_channel_is_open(chan),
   2088            };
   2089
   2090            trace_vmbus_channel_open(chan->id, msg.status);
   2091            post_msg(vmbus, &msg, sizeof(msg));
   2092            return;
   2093        }
   2094    }
   2095
   2096    assert(false);
   2097}
   2098
   2099static bool complete_open_channel(VMBus *vmbus)
   2100{
   2101    VMBusChannel *chan;
   2102
   2103    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2104        if (chan->state == VMCHAN_OPENING) {
   2105            if (vmbus_channel_is_open(chan)) {
   2106                chan->state = VMCHAN_OPEN;
   2107                /*
   2108                 * simulate guest notification of ringbuffer space made
   2109                 * available, for the channel protocols where the host
   2110                 * initiates the communication
   2111                 */
   2112                vmbus_channel_notify_host(chan);
   2113            } else {
   2114                chan->state = VMCHAN_INIT;
   2115            }
   2116            return true;
   2117        }
   2118    }
   2119
   2120    assert(false);
   2121    return false;
   2122}
   2123
   2124static void vdev_reset_on_close(VMBusDevice *vdev)
   2125{
   2126    uint16_t i;
   2127
   2128    for (i = 0; i < vdev->num_channels; i++) {
   2129        if (vmbus_channel_is_open(&vdev->channels[i])) {
   2130            return;
   2131        }
   2132    }
   2133
   2134    /* all channels closed -- reset device */
   2135    qdev_reset_all(DEVICE(vdev));
   2136}
   2137
   2138static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
   2139                                 uint32_t msglen)
   2140{
   2141    VMBusChannel *chan;
   2142
   2143    if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
   2144        return;
   2145    }
   2146
   2147    trace_vmbus_close_channel(msg->child_relid);
   2148
   2149    chan = find_channel(vmbus, msg->child_relid);
   2150    if (!chan) {
   2151        return;
   2152    }
   2153
   2154    close_channel(chan);
   2155    chan->state = VMCHAN_INIT;
   2156
   2157    vdev_reset_on_close(chan->dev);
   2158}
   2159
   2160static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
   2161{
   2162    vmbus->state = VMBUS_UNLOAD;
   2163}
   2164
   2165static void send_unload(VMBus *vmbus)
   2166{
   2167    vmbus_message_header msg = {
   2168        .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
   2169    };
   2170
   2171    qemu_mutex_lock(&vmbus->rx_queue_lock);
   2172    vmbus->rx_queue_size = 0;
   2173    qemu_mutex_unlock(&vmbus->rx_queue_lock);
   2174
   2175    post_msg(vmbus, &msg, sizeof(msg));
   2176    return;
   2177}
   2178
   2179static bool complete_unload(VMBus *vmbus)
   2180{
   2181    vmbus_reset_all(vmbus);
   2182    return true;
   2183}
   2184
   2185static void process_message(VMBus *vmbus)
   2186{
   2187    struct hyperv_post_message_input *hv_msg;
   2188    struct vmbus_message_header *msg;
   2189    void *msgdata;
   2190    uint32_t msglen;
   2191
   2192    qemu_mutex_lock(&vmbus->rx_queue_lock);
   2193
   2194    if (!vmbus->rx_queue_size) {
   2195        goto unlock;
   2196    }
   2197
   2198    hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
   2199    msglen =  hv_msg->payload_size;
   2200    if (msglen < sizeof(*msg)) {
   2201        goto out;
   2202    }
   2203    msgdata = hv_msg->payload;
   2204    msg = (struct vmbus_message_header *)msgdata;
   2205
   2206    trace_vmbus_process_incoming_message(msg->message_type);
   2207
   2208    switch (msg->message_type) {
   2209    case VMBUS_MSG_INITIATE_CONTACT:
   2210        handle_initiate_contact(vmbus, msgdata, msglen);
   2211        break;
   2212    case VMBUS_MSG_REQUESTOFFERS:
   2213        handle_request_offers(vmbus, msgdata, msglen);
   2214        break;
   2215    case VMBUS_MSG_GPADL_HEADER:
   2216        handle_gpadl_header(vmbus, msgdata, msglen);
   2217        break;
   2218    case VMBUS_MSG_GPADL_BODY:
   2219        handle_gpadl_body(vmbus, msgdata, msglen);
   2220        break;
   2221    case VMBUS_MSG_GPADL_TEARDOWN:
   2222        handle_gpadl_teardown(vmbus, msgdata, msglen);
   2223        break;
   2224    case VMBUS_MSG_OPENCHANNEL:
   2225        handle_open_channel(vmbus, msgdata, msglen);
   2226        break;
   2227    case VMBUS_MSG_CLOSECHANNEL:
   2228        handle_close_channel(vmbus, msgdata, msglen);
   2229        break;
   2230    case VMBUS_MSG_UNLOAD:
   2231        handle_unload(vmbus, msgdata, msglen);
   2232        break;
   2233    default:
   2234        error_report("unknown message type %#x", msg->message_type);
   2235        break;
   2236    }
   2237
   2238out:
   2239    vmbus->rx_queue_size--;
   2240    vmbus->rx_queue_head++;
   2241    vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
   2242
   2243    vmbus_resched(vmbus);
   2244unlock:
   2245    qemu_mutex_unlock(&vmbus->rx_queue_lock);
   2246}
   2247
   2248static const struct {
   2249    void (*run)(VMBus *vmbus);
   2250    bool (*complete)(VMBus *vmbus);
   2251} state_runner[] = {
   2252    [VMBUS_LISTEN]         = {process_message,     NULL},
   2253    [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
   2254    [VMBUS_OFFER]          = {send_offer,          complete_offer},
   2255    [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
   2256    [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
   2257    [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
   2258    [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
   2259};
   2260
   2261static void vmbus_do_run(VMBus *vmbus)
   2262{
   2263    if (vmbus->msg_in_progress) {
   2264        return;
   2265    }
   2266
   2267    assert(vmbus->state < VMBUS_STATE_MAX);
   2268    assert(state_runner[vmbus->state].run);
   2269    state_runner[vmbus->state].run(vmbus);
   2270}
   2271
   2272static void vmbus_run(void *opaque)
   2273{
   2274    VMBus *vmbus = opaque;
   2275
   2276    /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
   2277    if (vmbus->in_progress) {
   2278        return;
   2279    }
   2280
   2281    vmbus->in_progress = true;
   2282    /*
   2283     * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
   2284     * should go *after* the code that can result in aio_poll; otherwise
   2285     * reschedules can be missed.  No idea how to enforce that.
   2286     */
   2287    vmbus_do_run(vmbus);
   2288    vmbus->in_progress = false;
   2289}
   2290
   2291static void vmbus_msg_cb(void *data, int status)
   2292{
   2293    VMBus *vmbus = data;
   2294    bool (*complete)(VMBus *vmbus);
   2295
   2296    assert(vmbus->msg_in_progress);
   2297
   2298    trace_vmbus_msg_cb(status);
   2299
   2300    if (status == -EAGAIN) {
   2301        goto out;
   2302    }
   2303    if (status) {
   2304        error_report("message delivery fatal failure: %d; aborting vmbus",
   2305                     status);
   2306        vmbus_reset_all(vmbus);
   2307        return;
   2308    }
   2309
   2310    assert(vmbus->state < VMBUS_STATE_MAX);
   2311    complete = state_runner[vmbus->state].complete;
   2312    if (!complete || complete(vmbus)) {
   2313        vmbus->state = VMBUS_LISTEN;
   2314    }
   2315out:
   2316    vmbus->msg_in_progress = false;
   2317    vmbus_resched(vmbus);
   2318}
   2319
   2320static void vmbus_resched(VMBus *vmbus)
   2321{
   2322    aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
   2323}
   2324
   2325static void vmbus_signal_event(EventNotifier *e)
   2326{
   2327    VMBusChannel *chan;
   2328    VMBus *vmbus = container_of(e, VMBus, notifier);
   2329    unsigned long *int_map;
   2330    hwaddr addr, len;
   2331    bool is_dirty = false;
   2332
   2333    if (!event_notifier_test_and_clear(e)) {
   2334        return;
   2335    }
   2336
   2337    trace_vmbus_signal_event();
   2338
   2339    if (!vmbus->int_page_gpa) {
   2340        return;
   2341    }
   2342
   2343    addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
   2344    len = TARGET_PAGE_SIZE / 2;
   2345    int_map = cpu_physical_memory_map(addr, &len, 1);
   2346    if (len != TARGET_PAGE_SIZE / 2) {
   2347        goto unmap;
   2348    }
   2349
   2350    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2351        if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
   2352            if (!vmbus_channel_is_open(chan)) {
   2353                continue;
   2354            }
   2355            vmbus_channel_notify_host(chan);
   2356            is_dirty = true;
   2357        }
   2358    }
   2359
   2360unmap:
   2361    cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
   2362}
   2363
   2364static void vmbus_dev_realize(DeviceState *dev, Error **errp)
   2365{
   2366    VMBusDevice *vdev = VMBUS_DEVICE(dev);
   2367    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
   2368    VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
   2369    BusChild *child;
   2370    Error *err = NULL;
   2371    char idstr[UUID_FMT_LEN + 1];
   2372
   2373    assert(!qemu_uuid_is_null(&vdev->instanceid));
   2374
   2375    if (!qemu_uuid_is_null(&vdc->instanceid)) {
   2376        /* Class wants to only have a single instance with a fixed UUID */
   2377        if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
   2378            error_setg(&err, "instance id can't be changed");
   2379            goto error_out;
   2380        }
   2381    }
   2382
   2383    /* Check for instance id collision for this class id */
   2384    QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
   2385        VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
   2386
   2387        if (child_dev == vdev) {
   2388            continue;
   2389        }
   2390
   2391        if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
   2392            qemu_uuid_unparse(&vdev->instanceid, idstr);
   2393            error_setg(&err, "duplicate vmbus device instance id %s", idstr);
   2394            goto error_out;
   2395        }
   2396    }
   2397
   2398    vdev->dma_as = &address_space_memory;
   2399
   2400    create_channels(vmbus, vdev, &err);
   2401    if (err) {
   2402        goto error_out;
   2403    }
   2404
   2405    if (vdc->vmdev_realize) {
   2406        vdc->vmdev_realize(vdev, &err);
   2407        if (err) {
   2408            goto err_vdc_realize;
   2409        }
   2410    }
   2411    return;
   2412
   2413err_vdc_realize:
   2414    free_channels(vdev);
   2415error_out:
   2416    error_propagate(errp, err);
   2417}
   2418
   2419static void vmbus_dev_reset(DeviceState *dev)
   2420{
   2421    uint16_t i;
   2422    VMBusDevice *vdev = VMBUS_DEVICE(dev);
   2423    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
   2424
   2425    if (vdev->channels) {
   2426        for (i = 0; i < vdev->num_channels; i++) {
   2427            VMBusChannel *chan = &vdev->channels[i];
   2428            close_channel(chan);
   2429            chan->state = VMCHAN_INIT;
   2430        }
   2431    }
   2432
   2433    if (vdc->vmdev_reset) {
   2434        vdc->vmdev_reset(vdev);
   2435    }
   2436}
   2437
   2438static void vmbus_dev_unrealize(DeviceState *dev)
   2439{
   2440    VMBusDevice *vdev = VMBUS_DEVICE(dev);
   2441    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
   2442
   2443    if (vdc->vmdev_unrealize) {
   2444        vdc->vmdev_unrealize(vdev);
   2445    }
   2446    free_channels(vdev);
   2447}
   2448
   2449static Property vmbus_dev_props[] = {
   2450    DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
   2451    DEFINE_PROP_END_OF_LIST()
   2452};
   2453
   2454
   2455static void vmbus_dev_class_init(ObjectClass *klass, void *data)
   2456{
   2457    DeviceClass *kdev = DEVICE_CLASS(klass);
   2458    device_class_set_props(kdev, vmbus_dev_props);
   2459    kdev->bus_type = TYPE_VMBUS;
   2460    kdev->realize = vmbus_dev_realize;
   2461    kdev->unrealize = vmbus_dev_unrealize;
   2462    kdev->reset = vmbus_dev_reset;
   2463}
   2464
   2465static void vmbus_dev_instance_init(Object *obj)
   2466{
   2467    VMBusDevice *vdev = VMBUS_DEVICE(obj);
   2468    VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
   2469
   2470    if (!qemu_uuid_is_null(&vdc->instanceid)) {
   2471        /* Class wants to only have a single instance with a fixed UUID */
   2472        vdev->instanceid = vdc->instanceid;
   2473    }
   2474}
   2475
   2476const VMStateDescription vmstate_vmbus_dev = {
   2477    .name = TYPE_VMBUS_DEVICE,
   2478    .version_id = 0,
   2479    .minimum_version_id = 0,
   2480    .fields = (VMStateField[]) {
   2481        VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
   2482        VMSTATE_UINT16(num_channels, VMBusDevice),
   2483        VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
   2484                                             num_channels, vmstate_channel,
   2485                                             VMBusChannel),
   2486        VMSTATE_END_OF_LIST()
   2487    }
   2488};
   2489
   2490/* vmbus generic device base */
   2491static const TypeInfo vmbus_dev_type_info = {
   2492    .name = TYPE_VMBUS_DEVICE,
   2493    .parent = TYPE_DEVICE,
   2494    .abstract = true,
   2495    .instance_size = sizeof(VMBusDevice),
   2496    .class_size = sizeof(VMBusDeviceClass),
   2497    .class_init = vmbus_dev_class_init,
   2498    .instance_init = vmbus_dev_instance_init,
   2499};
   2500
   2501static void vmbus_realize(BusState *bus, Error **errp)
   2502{
   2503    int ret = 0;
   2504    Error *local_err = NULL;
   2505    VMBus *vmbus = VMBUS(bus);
   2506
   2507    qemu_mutex_init(&vmbus->rx_queue_lock);
   2508
   2509    QTAILQ_INIT(&vmbus->gpadl_list);
   2510    QTAILQ_INIT(&vmbus->channel_list);
   2511
   2512    ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
   2513                                 vmbus_recv_message, vmbus);
   2514    if (ret != 0) {
   2515        error_setg(&local_err, "hyperv set message handler failed: %d", ret);
   2516        goto error_out;
   2517    }
   2518
   2519    ret = event_notifier_init(&vmbus->notifier, 0);
   2520    if (ret != 0) {
   2521        error_setg(&local_err, "event notifier failed to init with %d", ret);
   2522        goto remove_msg_handler;
   2523    }
   2524
   2525    event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
   2526    ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
   2527                                        &vmbus->notifier);
   2528    if (ret != 0) {
   2529        error_setg(&local_err, "hyperv set event handler failed with %d", ret);
   2530        goto clear_event_notifier;
   2531    }
   2532
   2533    return;
   2534
   2535clear_event_notifier:
   2536    event_notifier_cleanup(&vmbus->notifier);
   2537remove_msg_handler:
   2538    hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
   2539error_out:
   2540    qemu_mutex_destroy(&vmbus->rx_queue_lock);
   2541    error_propagate(errp, local_err);
   2542}
   2543
   2544static void vmbus_unrealize(BusState *bus)
   2545{
   2546    VMBus *vmbus = VMBUS(bus);
   2547
   2548    hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
   2549    hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
   2550    event_notifier_cleanup(&vmbus->notifier);
   2551
   2552    qemu_mutex_destroy(&vmbus->rx_queue_lock);
   2553}
   2554
   2555static void vmbus_reset(BusState *bus)
   2556{
   2557    vmbus_deinit(VMBUS(bus));
   2558}
   2559
   2560static char *vmbus_get_dev_path(DeviceState *dev)
   2561{
   2562    BusState *bus = qdev_get_parent_bus(dev);
   2563    return qdev_get_dev_path(bus->parent);
   2564}
   2565
   2566static char *vmbus_get_fw_dev_path(DeviceState *dev)
   2567{
   2568    VMBusDevice *vdev = VMBUS_DEVICE(dev);
   2569    char uuid[UUID_FMT_LEN + 1];
   2570
   2571    qemu_uuid_unparse(&vdev->instanceid, uuid);
   2572    return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
   2573}
   2574
   2575static void vmbus_class_init(ObjectClass *klass, void *data)
   2576{
   2577    BusClass *k = BUS_CLASS(klass);
   2578
   2579    k->get_dev_path = vmbus_get_dev_path;
   2580    k->get_fw_dev_path = vmbus_get_fw_dev_path;
   2581    k->realize = vmbus_realize;
   2582    k->unrealize = vmbus_unrealize;
   2583    k->reset = vmbus_reset;
   2584}
   2585
   2586static int vmbus_pre_load(void *opaque)
   2587{
   2588    VMBusChannel *chan;
   2589    VMBus *vmbus = VMBUS(opaque);
   2590
   2591    /*
   2592     * channel IDs allocated by the source will come in the migration stream
   2593     * for each channel, so clean up the ones allocated at realize
   2594     */
   2595    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2596        unregister_chan_id(chan);
   2597    }
   2598
   2599    return 0;
   2600}
   2601static int vmbus_post_load(void *opaque, int version_id)
   2602{
   2603    int ret;
   2604    VMBus *vmbus = VMBUS(opaque);
   2605    VMBusGpadl *gpadl;
   2606    VMBusChannel *chan;
   2607
   2608    ret = vmbus_init(vmbus);
   2609    if (ret) {
   2610        return ret;
   2611    }
   2612
   2613    QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   2614        gpadl->vmbus = vmbus;
   2615        gpadl->refcount = 1;
   2616    }
   2617
   2618    /*
   2619     * reopening channels depends on initialized vmbus so it's done here
   2620     * instead of channel_post_load()
   2621     */
   2622    QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2623
   2624        if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
   2625            open_channel(chan);
   2626        }
   2627
   2628        if (chan->state != VMCHAN_OPEN) {
   2629            continue;
   2630        }
   2631
   2632        if (!vmbus_channel_is_open(chan)) {
   2633            /* reopen failed, abort loading */
   2634            return -1;
   2635        }
   2636
   2637        /* resume processing on the guest side if it missed the notification */
   2638        hyperv_sint_route_set_sint(chan->notify_route);
   2639        /* ditto on the host side */
   2640        vmbus_channel_notify_host(chan);
   2641    }
   2642
   2643    vmbus_resched(vmbus);
   2644    return 0;
   2645}
   2646
   2647static const VMStateDescription vmstate_post_message_input = {
   2648    .name = "vmbus/hyperv_post_message_input",
   2649    .version_id = 0,
   2650    .minimum_version_id = 0,
   2651    .fields = (VMStateField[]) {
   2652        /*
   2653         * skip connection_id and message_type as they are validated before
   2654         * queueing and ignored on dequeueing
   2655         */
   2656        VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
   2657        VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
   2658                            HV_MESSAGE_PAYLOAD_SIZE),
   2659        VMSTATE_END_OF_LIST()
   2660    }
   2661};
   2662
   2663static bool vmbus_rx_queue_needed(void *opaque)
   2664{
   2665    VMBus *vmbus = VMBUS(opaque);
   2666    return vmbus->rx_queue_size;
   2667}
   2668
   2669static const VMStateDescription vmstate_rx_queue = {
   2670    .name = "vmbus/rx_queue",
   2671    .version_id = 0,
   2672    .minimum_version_id = 0,
   2673    .needed = vmbus_rx_queue_needed,
   2674    .fields = (VMStateField[]) {
   2675        VMSTATE_UINT8(rx_queue_head, VMBus),
   2676        VMSTATE_UINT8(rx_queue_size, VMBus),
   2677        VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
   2678                             HV_MSG_QUEUE_LEN, 0,
   2679                             vmstate_post_message_input,
   2680                             struct hyperv_post_message_input),
   2681        VMSTATE_END_OF_LIST()
   2682    }
   2683};
   2684
   2685static const VMStateDescription vmstate_vmbus = {
   2686    .name = TYPE_VMBUS,
   2687    .version_id = 0,
   2688    .minimum_version_id = 0,
   2689    .pre_load = vmbus_pre_load,
   2690    .post_load = vmbus_post_load,
   2691    .fields = (VMStateField[]) {
   2692        VMSTATE_UINT8(state, VMBus),
   2693        VMSTATE_UINT32(version, VMBus),
   2694        VMSTATE_UINT32(target_vp, VMBus),
   2695        VMSTATE_UINT64(int_page_gpa, VMBus),
   2696        VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
   2697                         vmstate_gpadl, VMBusGpadl, link),
   2698        VMSTATE_END_OF_LIST()
   2699    },
   2700    .subsections = (const VMStateDescription * []) {
   2701        &vmstate_rx_queue,
   2702        NULL
   2703    }
   2704};
   2705
   2706static const TypeInfo vmbus_type_info = {
   2707    .name = TYPE_VMBUS,
   2708    .parent = TYPE_BUS,
   2709    .instance_size = sizeof(VMBus),
   2710    .class_init = vmbus_class_init,
   2711};
   2712
   2713static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
   2714{
   2715    VMBusBridge *bridge = VMBUS_BRIDGE(dev);
   2716
   2717    /*
   2718     * here there's at least one vmbus bridge that is being realized, so
   2719     * vmbus_bridge_find can only return NULL if it's not unique
   2720     */
   2721    if (!vmbus_bridge_find()) {
   2722        error_setg(errp, "there can be at most one %s in the system",
   2723                   TYPE_VMBUS_BRIDGE);
   2724        return;
   2725    }
   2726
   2727    if (!hyperv_is_synic_enabled()) {
   2728        error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
   2729        return;
   2730    }
   2731
   2732    bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
   2733}
   2734
   2735static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
   2736{
   2737    /* there can be only one VMBus */
   2738    return g_strdup("0");
   2739}
   2740
   2741static const VMStateDescription vmstate_vmbus_bridge = {
   2742    .name = TYPE_VMBUS_BRIDGE,
   2743    .version_id = 0,
   2744    .minimum_version_id = 0,
   2745    .fields = (VMStateField[]) {
   2746        VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
   2747        VMSTATE_END_OF_LIST()
   2748    },
   2749};
   2750
   2751static Property vmbus_bridge_props[] = {
   2752    DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
   2753    DEFINE_PROP_END_OF_LIST()
   2754};
   2755
   2756static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
   2757{
   2758    DeviceClass *k = DEVICE_CLASS(klass);
   2759    SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
   2760
   2761    k->realize = vmbus_bridge_realize;
   2762    k->fw_name = "vmbus";
   2763    sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
   2764    set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
   2765    k->vmsd = &vmstate_vmbus_bridge;
   2766    device_class_set_props(k, vmbus_bridge_props);
   2767    /* override SysBusDevice's default */
   2768    k->user_creatable = true;
   2769}
   2770
   2771static const TypeInfo vmbus_bridge_type_info = {
   2772    .name = TYPE_VMBUS_BRIDGE,
   2773    .parent = TYPE_SYS_BUS_DEVICE,
   2774    .instance_size = sizeof(VMBusBridge),
   2775    .class_init = vmbus_bridge_class_init,
   2776};
   2777
   2778static void vmbus_register_types(void)
   2779{
   2780    type_register_static(&vmbus_bridge_type_info);
   2781    type_register_static(&vmbus_dev_type_info);
   2782    type_register_static(&vmbus_type_info);
   2783}
   2784
   2785type_init(vmbus_register_types)