cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

xen-hvm.c (49565B)


      1/*
      2 * Copyright (C) 2010       Citrix Ltd.
      3 *
      4 * This work is licensed under the terms of the GNU GPL, version 2.  See
      5 * the COPYING file in the top-level directory.
      6 *
      7 * Contributions after 2012-01-13 are licensed under the terms of the
      8 * GNU GPL, version 2 or (at your option) any later version.
      9 */
     10
     11#include "qemu/osdep.h"
     12#include "qemu/units.h"
     13
     14#include "cpu.h"
     15#include "hw/pci/pci.h"
     16#include "hw/pci/pci_host.h"
     17#include "hw/i386/pc.h"
     18#include "hw/southbridge/piix.h"
     19#include "hw/irq.h"
     20#include "hw/hw.h"
     21#include "hw/i386/apic-msidef.h"
     22#include "hw/xen/xen_common.h"
     23#include "hw/xen/xen-legacy-backend.h"
     24#include "hw/xen/xen-bus.h"
     25#include "hw/xen/xen-x86.h"
     26#include "qapi/error.h"
     27#include "qapi/qapi-commands-migration.h"
     28#include "qemu/error-report.h"
     29#include "qemu/main-loop.h"
     30#include "qemu/range.h"
     31#include "sysemu/runstate.h"
     32#include "sysemu/sysemu.h"
     33#include "sysemu/xen.h"
     34#include "sysemu/xen-mapcache.h"
     35#include "trace.h"
     36
     37#include <xen/hvm/ioreq.h>
     38#include <xen/hvm/e820.h>
     39
     40//#define DEBUG_XEN_HVM
     41
     42#ifdef DEBUG_XEN_HVM
     43#define DPRINTF(fmt, ...) \
     44    do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
     45#else
     46#define DPRINTF(fmt, ...) \
     47    do { } while (0)
     48#endif
     49
     50static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi;
     51static MemoryRegion *framebuffer;
     52static bool xen_in_migration;
     53
     54/* Compatibility with older version */
     55
     56/* This allows QEMU to build on a system that has Xen 4.5 or earlier
     57 * installed.  This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
     58 * needs to be included before this block and hw/xen/xen_common.h needs to
     59 * be included before xen/hvm/ioreq.h
     60 */
     61#ifndef IOREQ_TYPE_VMWARE_PORT
     62#define IOREQ_TYPE_VMWARE_PORT  3
     63struct vmware_regs {
     64    uint32_t esi;
     65    uint32_t edi;
     66    uint32_t ebx;
     67    uint32_t ecx;
     68    uint32_t edx;
     69};
     70typedef struct vmware_regs vmware_regs_t;
     71
     72struct shared_vmport_iopage {
     73    struct vmware_regs vcpu_vmport_regs[1];
     74};
     75typedef struct shared_vmport_iopage shared_vmport_iopage_t;
     76#endif
     77
     78static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
     79{
     80    return shared_page->vcpu_ioreq[i].vp_eport;
     81}
     82static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
     83{
     84    return &shared_page->vcpu_ioreq[vcpu];
     85}
     86
     87#define BUFFER_IO_MAX_DELAY  100
     88
     89typedef struct XenPhysmap {
     90    hwaddr start_addr;
     91    ram_addr_t size;
     92    const char *name;
     93    hwaddr phys_offset;
     94
     95    QLIST_ENTRY(XenPhysmap) list;
     96} XenPhysmap;
     97
     98static QLIST_HEAD(, XenPhysmap) xen_physmap;
     99
    100typedef struct XenPciDevice {
    101    PCIDevice *pci_dev;
    102    uint32_t sbdf;
    103    QLIST_ENTRY(XenPciDevice) entry;
    104} XenPciDevice;
    105
    106typedef struct XenIOState {
    107    ioservid_t ioservid;
    108    shared_iopage_t *shared_page;
    109    shared_vmport_iopage_t *shared_vmport_page;
    110    buffered_iopage_t *buffered_io_page;
    111    xenforeignmemory_resource_handle *fres;
    112    QEMUTimer *buffered_io_timer;
    113    CPUState **cpu_by_vcpu_id;
    114    /* the evtchn port for polling the notification, */
    115    evtchn_port_t *ioreq_local_port;
    116    /* evtchn remote and local ports for buffered io */
    117    evtchn_port_t bufioreq_remote_port;
    118    evtchn_port_t bufioreq_local_port;
    119    /* the evtchn fd for polling */
    120    xenevtchn_handle *xce_handle;
    121    /* which vcpu we are serving */
    122    int send_vcpu;
    123
    124    struct xs_handle *xenstore;
    125    MemoryListener memory_listener;
    126    MemoryListener io_listener;
    127    QLIST_HEAD(, XenPciDevice) dev_list;
    128    DeviceListener device_listener;
    129    hwaddr free_phys_offset;
    130    const XenPhysmap *log_for_dirtybit;
    131    /* Buffer used by xen_sync_dirty_bitmap */
    132    unsigned long *dirty_bitmap;
    133
    134    Notifier exit;
    135    Notifier suspend;
    136    Notifier wakeup;
    137} XenIOState;
    138
    139/* Xen specific function for piix pci */
    140
    141int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
    142{
    143    return irq_num + (PCI_SLOT(pci_dev->devfn) << 2);
    144}
    145
    146void xen_piix3_set_irq(void *opaque, int irq_num, int level)
    147{
    148    xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2,
    149                           irq_num & 3, level);
    150}
    151
    152void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
    153{
    154    int i;
    155
    156    /* Scan for updates to PCI link routes (0x60-0x63). */
    157    for (i = 0; i < len; i++) {
    158        uint8_t v = (val >> (8 * i)) & 0xff;
    159        if (v & 0x80) {
    160            v = 0;
    161        }
    162        v &= 0xf;
    163        if (((address + i) >= PIIX_PIRQCA) && ((address + i) <= PIIX_PIRQCD)) {
    164            xen_set_pci_link_route(xen_domid, address + i - PIIX_PIRQCA, v);
    165        }
    166    }
    167}
    168
    169int xen_is_pirq_msi(uint32_t msi_data)
    170{
    171    /* If vector is 0, the msi is remapped into a pirq, passed as
    172     * dest_id.
    173     */
    174    return ((msi_data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT) == 0;
    175}
    176
    177void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
    178{
    179    xen_inject_msi(xen_domid, addr, data);
    180}
    181
    182static void xen_suspend_notifier(Notifier *notifier, void *data)
    183{
    184    xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
    185}
    186
    187/* Xen Interrupt Controller */
    188
    189static void xen_set_irq(void *opaque, int irq, int level)
    190{
    191    xen_set_isa_irq_level(xen_domid, irq, level);
    192}
    193
    194qemu_irq *xen_interrupt_controller_init(void)
    195{
    196    return qemu_allocate_irqs(xen_set_irq, NULL, 16);
    197}
    198
    199/* Memory Ops */
    200
    201static void xen_ram_init(PCMachineState *pcms,
    202                         ram_addr_t ram_size, MemoryRegion **ram_memory_p)
    203{
    204    X86MachineState *x86ms = X86_MACHINE(pcms);
    205    MemoryRegion *sysmem = get_system_memory();
    206    ram_addr_t block_len;
    207    uint64_t user_lowmem =
    208        object_property_get_uint(qdev_get_machine(),
    209                                 PC_MACHINE_MAX_RAM_BELOW_4G,
    210                                 &error_abort);
    211
    212    /* Handle the machine opt max-ram-below-4g.  It is basically doing
    213     * min(xen limit, user limit).
    214     */
    215    if (!user_lowmem) {
    216        user_lowmem = HVM_BELOW_4G_RAM_END; /* default */
    217    }
    218    if (HVM_BELOW_4G_RAM_END <= user_lowmem) {
    219        user_lowmem = HVM_BELOW_4G_RAM_END;
    220    }
    221
    222    if (ram_size >= user_lowmem) {
    223        x86ms->above_4g_mem_size = ram_size - user_lowmem;
    224        x86ms->below_4g_mem_size = user_lowmem;
    225    } else {
    226        x86ms->above_4g_mem_size = 0;
    227        x86ms->below_4g_mem_size = ram_size;
    228    }
    229    if (!x86ms->above_4g_mem_size) {
    230        block_len = ram_size;
    231    } else {
    232        /*
    233         * Xen does not allocate the memory continuously, it keeps a
    234         * hole of the size computed above or passed in.
    235         */
    236        block_len = (4 * GiB) + x86ms->above_4g_mem_size;
    237    }
    238    memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
    239                           &error_fatal);
    240    *ram_memory_p = &ram_memory;
    241
    242    memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
    243                             &ram_memory, 0, 0xa0000);
    244    memory_region_add_subregion(sysmem, 0, &ram_640k);
    245    /* Skip of the VGA IO memory space, it will be registered later by the VGA
    246     * emulated device.
    247     *
    248     * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load
    249     * the Options ROM, so it is registered here as RAM.
    250     */
    251    memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo",
    252                             &ram_memory, 0xc0000,
    253                             x86ms->below_4g_mem_size - 0xc0000);
    254    memory_region_add_subregion(sysmem, 0xc0000, &ram_lo);
    255    if (x86ms->above_4g_mem_size > 0) {
    256        memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi",
    257                                 &ram_memory, 0x100000000ULL,
    258                                 x86ms->above_4g_mem_size);
    259        memory_region_add_subregion(sysmem, 0x100000000ULL, &ram_hi);
    260    }
    261}
    262
    263void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
    264                   Error **errp)
    265{
    266    unsigned long nr_pfn;
    267    xen_pfn_t *pfn_list;
    268    int i;
    269
    270    if (runstate_check(RUN_STATE_INMIGRATE)) {
    271        /* RAM already populated in Xen */
    272        fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT
    273                " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n",
    274                __func__, size, ram_addr);
    275        return;
    276    }
    277
    278    if (mr == &ram_memory) {
    279        return;
    280    }
    281
    282    trace_xen_ram_alloc(ram_addr, size);
    283
    284    nr_pfn = size >> TARGET_PAGE_BITS;
    285    pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn);
    286
    287    for (i = 0; i < nr_pfn; i++) {
    288        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
    289    }
    290
    291    if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
    292        error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
    293                   ram_addr);
    294    }
    295
    296    g_free(pfn_list);
    297}
    298
    299static XenPhysmap *get_physmapping(hwaddr start_addr, ram_addr_t size)
    300{
    301    XenPhysmap *physmap = NULL;
    302
    303    start_addr &= TARGET_PAGE_MASK;
    304
    305    QLIST_FOREACH(physmap, &xen_physmap, list) {
    306        if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) {
    307            return physmap;
    308        }
    309    }
    310    return NULL;
    311}
    312
    313static hwaddr xen_phys_offset_to_gaddr(hwaddr phys_offset, ram_addr_t size)
    314{
    315    hwaddr addr = phys_offset & TARGET_PAGE_MASK;
    316    XenPhysmap *physmap = NULL;
    317
    318    QLIST_FOREACH(physmap, &xen_physmap, list) {
    319        if (range_covers_byte(physmap->phys_offset, physmap->size, addr)) {
    320            return physmap->start_addr + (phys_offset - physmap->phys_offset);
    321        }
    322    }
    323
    324    return phys_offset;
    325}
    326
    327#ifdef XEN_COMPAT_PHYSMAP
    328static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap)
    329{
    330    char path[80], value[17];
    331
    332    snprintf(path, sizeof(path),
    333            "/local/domain/0/device-model/%d/physmap/%"PRIx64"/start_addr",
    334            xen_domid, (uint64_t)physmap->phys_offset);
    335    snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->start_addr);
    336    if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
    337        return -1;
    338    }
    339    snprintf(path, sizeof(path),
    340            "/local/domain/0/device-model/%d/physmap/%"PRIx64"/size",
    341            xen_domid, (uint64_t)physmap->phys_offset);
    342    snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)physmap->size);
    343    if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
    344        return -1;
    345    }
    346    if (physmap->name) {
    347        snprintf(path, sizeof(path),
    348                "/local/domain/0/device-model/%d/physmap/%"PRIx64"/name",
    349                xen_domid, (uint64_t)physmap->phys_offset);
    350        if (!xs_write(state->xenstore, 0, path,
    351                      physmap->name, strlen(physmap->name))) {
    352            return -1;
    353        }
    354    }
    355    return 0;
    356}
    357#else
    358static int xen_save_physmap(XenIOState *state, XenPhysmap *physmap)
    359{
    360    return 0;
    361}
    362#endif
    363
    364static int xen_add_to_physmap(XenIOState *state,
    365                              hwaddr start_addr,
    366                              ram_addr_t size,
    367                              MemoryRegion *mr,
    368                              hwaddr offset_within_region)
    369{
    370    unsigned long nr_pages;
    371    int rc = 0;
    372    XenPhysmap *physmap = NULL;
    373    hwaddr pfn, start_gpfn;
    374    hwaddr phys_offset = memory_region_get_ram_addr(mr);
    375    const char *mr_name;
    376
    377    if (get_physmapping(start_addr, size)) {
    378        return 0;
    379    }
    380    if (size <= 0) {
    381        return -1;
    382    }
    383
    384    /* Xen can only handle a single dirty log region for now and we want
    385     * the linear framebuffer to be that region.
    386     * Avoid tracking any regions that is not videoram and avoid tracking
    387     * the legacy vga region. */
    388    if (mr == framebuffer && start_addr > 0xbffff) {
    389        goto go_physmap;
    390    }
    391    return -1;
    392
    393go_physmap:
    394    DPRINTF("mapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
    395            start_addr, start_addr + size);
    396
    397    mr_name = memory_region_name(mr);
    398
    399    physmap = g_malloc(sizeof(XenPhysmap));
    400
    401    physmap->start_addr = start_addr;
    402    physmap->size = size;
    403    physmap->name = mr_name;
    404    physmap->phys_offset = phys_offset;
    405
    406    QLIST_INSERT_HEAD(&xen_physmap, physmap, list);
    407
    408    if (runstate_check(RUN_STATE_INMIGRATE)) {
    409        /* Now when we have a physmap entry we can replace a dummy mapping with
    410         * a real one of guest foreign memory. */
    411        uint8_t *p = xen_replace_cache_entry(phys_offset, start_addr, size);
    412        assert(p && p == memory_region_get_ram_ptr(mr));
    413
    414        return 0;
    415    }
    416
    417    pfn = phys_offset >> TARGET_PAGE_BITS;
    418    start_gpfn = start_addr >> TARGET_PAGE_BITS;
    419    nr_pages = size >> TARGET_PAGE_BITS;
    420    rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, nr_pages, pfn,
    421                                        start_gpfn);
    422    if (rc) {
    423        int saved_errno = errno;
    424
    425        error_report("relocate_memory %lu pages from GFN %"HWADDR_PRIx
    426                     " to GFN %"HWADDR_PRIx" failed: %s",
    427                     nr_pages, pfn, start_gpfn, strerror(saved_errno));
    428        errno = saved_errno;
    429        return -1;
    430    }
    431
    432    rc = xendevicemodel_pin_memory_cacheattr(xen_dmod, xen_domid,
    433                                   start_addr >> TARGET_PAGE_BITS,
    434                                   (start_addr + size - 1) >> TARGET_PAGE_BITS,
    435                                   XEN_DOMCTL_MEM_CACHEATTR_WB);
    436    if (rc) {
    437        error_report("pin_memory_cacheattr failed: %s", strerror(errno));
    438    }
    439    return xen_save_physmap(state, physmap);
    440}
    441
    442static int xen_remove_from_physmap(XenIOState *state,
    443                                   hwaddr start_addr,
    444                                   ram_addr_t size)
    445{
    446    int rc = 0;
    447    XenPhysmap *physmap = NULL;
    448    hwaddr phys_offset = 0;
    449
    450    physmap = get_physmapping(start_addr, size);
    451    if (physmap == NULL) {
    452        return -1;
    453    }
    454
    455    phys_offset = physmap->phys_offset;
    456    size = physmap->size;
    457
    458    DPRINTF("unmapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx", at "
    459            "%"HWADDR_PRIx"\n", start_addr, start_addr + size, phys_offset);
    460
    461    size >>= TARGET_PAGE_BITS;
    462    start_addr >>= TARGET_PAGE_BITS;
    463    phys_offset >>= TARGET_PAGE_BITS;
    464    rc = xendevicemodel_relocate_memory(xen_dmod, xen_domid, size, start_addr,
    465                                        phys_offset);
    466    if (rc) {
    467        int saved_errno = errno;
    468
    469        error_report("relocate_memory "RAM_ADDR_FMT" pages"
    470                     " from GFN %"HWADDR_PRIx
    471                     " to GFN %"HWADDR_PRIx" failed: %s",
    472                     size, start_addr, phys_offset, strerror(saved_errno));
    473        errno = saved_errno;
    474        return -1;
    475    }
    476
    477    QLIST_REMOVE(physmap, list);
    478    if (state->log_for_dirtybit == physmap) {
    479        state->log_for_dirtybit = NULL;
    480        g_free(state->dirty_bitmap);
    481        state->dirty_bitmap = NULL;
    482    }
    483    g_free(physmap);
    484
    485    return 0;
    486}
    487
    488static void xen_set_memory(struct MemoryListener *listener,
    489                           MemoryRegionSection *section,
    490                           bool add)
    491{
    492    XenIOState *state = container_of(listener, XenIOState, memory_listener);
    493    hwaddr start_addr = section->offset_within_address_space;
    494    ram_addr_t size = int128_get64(section->size);
    495    bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA);
    496    hvmmem_type_t mem_type;
    497
    498    if (section->mr == &ram_memory) {
    499        return;
    500    } else {
    501        if (add) {
    502            xen_map_memory_section(xen_domid, state->ioservid,
    503                                   section);
    504        } else {
    505            xen_unmap_memory_section(xen_domid, state->ioservid,
    506                                     section);
    507        }
    508    }
    509
    510    if (!memory_region_is_ram(section->mr)) {
    511        return;
    512    }
    513
    514    if (log_dirty != add) {
    515        return;
    516    }
    517
    518    trace_xen_client_set_memory(start_addr, size, log_dirty);
    519
    520    start_addr &= TARGET_PAGE_MASK;
    521    size = TARGET_PAGE_ALIGN(size);
    522
    523    if (add) {
    524        if (!memory_region_is_rom(section->mr)) {
    525            xen_add_to_physmap(state, start_addr, size,
    526                               section->mr, section->offset_within_region);
    527        } else {
    528            mem_type = HVMMEM_ram_ro;
    529            if (xen_set_mem_type(xen_domid, mem_type,
    530                                 start_addr >> TARGET_PAGE_BITS,
    531                                 size >> TARGET_PAGE_BITS)) {
    532                DPRINTF("xen_set_mem_type error, addr: "TARGET_FMT_plx"\n",
    533                        start_addr);
    534            }
    535        }
    536    } else {
    537        if (xen_remove_from_physmap(state, start_addr, size) < 0) {
    538            DPRINTF("physmapping does not exist at "TARGET_FMT_plx"\n", start_addr);
    539        }
    540    }
    541}
    542
    543static void xen_region_add(MemoryListener *listener,
    544                           MemoryRegionSection *section)
    545{
    546    memory_region_ref(section->mr);
    547    xen_set_memory(listener, section, true);
    548}
    549
    550static void xen_region_del(MemoryListener *listener,
    551                           MemoryRegionSection *section)
    552{
    553    xen_set_memory(listener, section, false);
    554    memory_region_unref(section->mr);
    555}
    556
    557static void xen_io_add(MemoryListener *listener,
    558                       MemoryRegionSection *section)
    559{
    560    XenIOState *state = container_of(listener, XenIOState, io_listener);
    561    MemoryRegion *mr = section->mr;
    562
    563    if (mr->ops == &unassigned_io_ops) {
    564        return;
    565    }
    566
    567    memory_region_ref(mr);
    568
    569    xen_map_io_section(xen_domid, state->ioservid, section);
    570}
    571
    572static void xen_io_del(MemoryListener *listener,
    573                       MemoryRegionSection *section)
    574{
    575    XenIOState *state = container_of(listener, XenIOState, io_listener);
    576    MemoryRegion *mr = section->mr;
    577
    578    if (mr->ops == &unassigned_io_ops) {
    579        return;
    580    }
    581
    582    xen_unmap_io_section(xen_domid, state->ioservid, section);
    583
    584    memory_region_unref(mr);
    585}
    586
    587static void xen_device_realize(DeviceListener *listener,
    588                               DeviceState *dev)
    589{
    590    XenIOState *state = container_of(listener, XenIOState, device_listener);
    591
    592    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
    593        PCIDevice *pci_dev = PCI_DEVICE(dev);
    594        XenPciDevice *xendev = g_new(XenPciDevice, 1);
    595
    596        xendev->pci_dev = pci_dev;
    597        xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
    598                                     pci_dev->devfn);
    599        QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
    600
    601        xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
    602    }
    603}
    604
    605static void xen_device_unrealize(DeviceListener *listener,
    606                                 DeviceState *dev)
    607{
    608    XenIOState *state = container_of(listener, XenIOState, device_listener);
    609
    610    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
    611        PCIDevice *pci_dev = PCI_DEVICE(dev);
    612        XenPciDevice *xendev, *next;
    613
    614        xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
    615
    616        QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
    617            if (xendev->pci_dev == pci_dev) {
    618                QLIST_REMOVE(xendev, entry);
    619                g_free(xendev);
    620                break;
    621            }
    622        }
    623    }
    624}
    625
    626static void xen_sync_dirty_bitmap(XenIOState *state,
    627                                  hwaddr start_addr,
    628                                  ram_addr_t size)
    629{
    630    hwaddr npages = size >> TARGET_PAGE_BITS;
    631    const int width = sizeof(unsigned long) * 8;
    632    size_t bitmap_size = DIV_ROUND_UP(npages, width);
    633    int rc, i, j;
    634    const XenPhysmap *physmap = NULL;
    635
    636    physmap = get_physmapping(start_addr, size);
    637    if (physmap == NULL) {
    638        /* not handled */
    639        return;
    640    }
    641
    642    if (state->log_for_dirtybit == NULL) {
    643        state->log_for_dirtybit = physmap;
    644        state->dirty_bitmap = g_new(unsigned long, bitmap_size);
    645    } else if (state->log_for_dirtybit != physmap) {
    646        /* Only one range for dirty bitmap can be tracked. */
    647        return;
    648    }
    649
    650    rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS,
    651                              npages, state->dirty_bitmap);
    652    if (rc < 0) {
    653#ifndef ENODATA
    654#define ENODATA  ENOENT
    655#endif
    656        if (errno == ENODATA) {
    657            memory_region_set_dirty(framebuffer, 0, size);
    658            DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx
    659                    ", 0x" TARGET_FMT_plx "): %s\n",
    660                    start_addr, start_addr + size, strerror(errno));
    661        }
    662        return;
    663    }
    664
    665    for (i = 0; i < bitmap_size; i++) {
    666        unsigned long map = state->dirty_bitmap[i];
    667        while (map != 0) {
    668            j = ctzl(map);
    669            map &= ~(1ul << j);
    670            memory_region_set_dirty(framebuffer,
    671                                    (i * width + j) * TARGET_PAGE_SIZE,
    672                                    TARGET_PAGE_SIZE);
    673        };
    674    }
    675}
    676
    677static void xen_log_start(MemoryListener *listener,
    678                          MemoryRegionSection *section,
    679                          int old, int new)
    680{
    681    XenIOState *state = container_of(listener, XenIOState, memory_listener);
    682
    683    if (new & ~old & (1 << DIRTY_MEMORY_VGA)) {
    684        xen_sync_dirty_bitmap(state, section->offset_within_address_space,
    685                              int128_get64(section->size));
    686    }
    687}
    688
    689static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section,
    690                         int old, int new)
    691{
    692    XenIOState *state = container_of(listener, XenIOState, memory_listener);
    693
    694    if (old & ~new & (1 << DIRTY_MEMORY_VGA)) {
    695        state->log_for_dirtybit = NULL;
    696        g_free(state->dirty_bitmap);
    697        state->dirty_bitmap = NULL;
    698        /* Disable dirty bit tracking */
    699        xen_track_dirty_vram(xen_domid, 0, 0, NULL);
    700    }
    701}
    702
    703static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
    704{
    705    XenIOState *state = container_of(listener, XenIOState, memory_listener);
    706
    707    xen_sync_dirty_bitmap(state, section->offset_within_address_space,
    708                          int128_get64(section->size));
    709}
    710
    711static void xen_log_global_start(MemoryListener *listener)
    712{
    713    if (xen_enabled()) {
    714        xen_in_migration = true;
    715    }
    716}
    717
    718static void xen_log_global_stop(MemoryListener *listener)
    719{
    720    xen_in_migration = false;
    721}
    722
    723static MemoryListener xen_memory_listener = {
    724    .name = "xen-memory",
    725    .region_add = xen_region_add,
    726    .region_del = xen_region_del,
    727    .log_start = xen_log_start,
    728    .log_stop = xen_log_stop,
    729    .log_sync = xen_log_sync,
    730    .log_global_start = xen_log_global_start,
    731    .log_global_stop = xen_log_global_stop,
    732    .priority = 10,
    733};
    734
    735static MemoryListener xen_io_listener = {
    736    .name = "xen-io",
    737    .region_add = xen_io_add,
    738    .region_del = xen_io_del,
    739    .priority = 10,
    740};
    741
    742static DeviceListener xen_device_listener = {
    743    .realize = xen_device_realize,
    744    .unrealize = xen_device_unrealize,
    745};
    746
    747/* get the ioreq packets from share mem */
    748static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
    749{
    750    ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
    751
    752    if (req->state != STATE_IOREQ_READY) {
    753        DPRINTF("I/O request not ready: "
    754                "%x, ptr: %x, port: %"PRIx64", "
    755                "data: %"PRIx64", count: %u, size: %u\n",
    756                req->state, req->data_is_ptr, req->addr,
    757                req->data, req->count, req->size);
    758        return NULL;
    759    }
    760
    761    xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
    762
    763    req->state = STATE_IOREQ_INPROCESS;
    764    return req;
    765}
    766
    767/* use poll to get the port notification */
    768/* ioreq_vec--out,the */
    769/* retval--the number of ioreq packet */
    770static ioreq_t *cpu_get_ioreq(XenIOState *state)
    771{
    772    MachineState *ms = MACHINE(qdev_get_machine());
    773    unsigned int max_cpus = ms->smp.max_cpus;
    774    int i;
    775    evtchn_port_t port;
    776
    777    port = xenevtchn_pending(state->xce_handle);
    778    if (port == state->bufioreq_local_port) {
    779        timer_mod(state->buffered_io_timer,
    780                BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
    781        return NULL;
    782    }
    783
    784    if (port != -1) {
    785        for (i = 0; i < max_cpus; i++) {
    786            if (state->ioreq_local_port[i] == port) {
    787                break;
    788            }
    789        }
    790
    791        if (i == max_cpus) {
    792            hw_error("Fatal error while trying to get io event!\n");
    793        }
    794
    795        /* unmask the wanted port again */
    796        xenevtchn_unmask(state->xce_handle, port);
    797
    798        /* get the io packet from shared memory */
    799        state->send_vcpu = i;
    800        return cpu_get_ioreq_from_shared_memory(state, i);
    801    }
    802
    803    /* read error or read nothing */
    804    return NULL;
    805}
    806
    807static uint32_t do_inp(uint32_t addr, unsigned long size)
    808{
    809    switch (size) {
    810        case 1:
    811            return cpu_inb(addr);
    812        case 2:
    813            return cpu_inw(addr);
    814        case 4:
    815            return cpu_inl(addr);
    816        default:
    817            hw_error("inp: bad size: %04x %lx", addr, size);
    818    }
    819}
    820
    821static void do_outp(uint32_t addr,
    822        unsigned long size, uint32_t val)
    823{
    824    switch (size) {
    825        case 1:
    826            return cpu_outb(addr, val);
    827        case 2:
    828            return cpu_outw(addr, val);
    829        case 4:
    830            return cpu_outl(addr, val);
    831        default:
    832            hw_error("outp: bad size: %04x %lx", addr, size);
    833    }
    834}
    835
    836/*
    837 * Helper functions which read/write an object from/to physical guest
    838 * memory, as part of the implementation of an ioreq.
    839 *
    840 * Equivalent to
    841 *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
    842 *                          val, req->size, 0/1)
    843 * except without the integer overflow problems.
    844 */
    845static void rw_phys_req_item(hwaddr addr,
    846                             ioreq_t *req, uint32_t i, void *val, int rw)
    847{
    848    /* Do everything unsigned so overflow just results in a truncated result
    849     * and accesses to undesired parts of guest memory, which is up
    850     * to the guest */
    851    hwaddr offset = (hwaddr)req->size * i;
    852    if (req->df) {
    853        addr -= offset;
    854    } else {
    855        addr += offset;
    856    }
    857    cpu_physical_memory_rw(addr, val, req->size, rw);
    858}
    859
    860static inline void read_phys_req_item(hwaddr addr,
    861                                      ioreq_t *req, uint32_t i, void *val)
    862{
    863    rw_phys_req_item(addr, req, i, val, 0);
    864}
    865static inline void write_phys_req_item(hwaddr addr,
    866                                       ioreq_t *req, uint32_t i, void *val)
    867{
    868    rw_phys_req_item(addr, req, i, val, 1);
    869}
    870
    871
    872static void cpu_ioreq_pio(ioreq_t *req)
    873{
    874    uint32_t i;
    875
    876    trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
    877                         req->data, req->count, req->size);
    878
    879    if (req->size > sizeof(uint32_t)) {
    880        hw_error("PIO: bad size (%u)", req->size);
    881    }
    882
    883    if (req->dir == IOREQ_READ) {
    884        if (!req->data_is_ptr) {
    885            req->data = do_inp(req->addr, req->size);
    886            trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
    887                                         req->size);
    888        } else {
    889            uint32_t tmp;
    890
    891            for (i = 0; i < req->count; i++) {
    892                tmp = do_inp(req->addr, req->size);
    893                write_phys_req_item(req->data, req, i, &tmp);
    894            }
    895        }
    896    } else if (req->dir == IOREQ_WRITE) {
    897        if (!req->data_is_ptr) {
    898            trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
    899                                          req->size);
    900            do_outp(req->addr, req->size, req->data);
    901        } else {
    902            for (i = 0; i < req->count; i++) {
    903                uint32_t tmp = 0;
    904
    905                read_phys_req_item(req->data, req, i, &tmp);
    906                do_outp(req->addr, req->size, tmp);
    907            }
    908        }
    909    }
    910}
    911
    912static void cpu_ioreq_move(ioreq_t *req)
    913{
    914    uint32_t i;
    915
    916    trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
    917                         req->data, req->count, req->size);
    918
    919    if (req->size > sizeof(req->data)) {
    920        hw_error("MMIO: bad size (%u)", req->size);
    921    }
    922
    923    if (!req->data_is_ptr) {
    924        if (req->dir == IOREQ_READ) {
    925            for (i = 0; i < req->count; i++) {
    926                read_phys_req_item(req->addr, req, i, &req->data);
    927            }
    928        } else if (req->dir == IOREQ_WRITE) {
    929            for (i = 0; i < req->count; i++) {
    930                write_phys_req_item(req->addr, req, i, &req->data);
    931            }
    932        }
    933    } else {
    934        uint64_t tmp;
    935
    936        if (req->dir == IOREQ_READ) {
    937            for (i = 0; i < req->count; i++) {
    938                read_phys_req_item(req->addr, req, i, &tmp);
    939                write_phys_req_item(req->data, req, i, &tmp);
    940            }
    941        } else if (req->dir == IOREQ_WRITE) {
    942            for (i = 0; i < req->count; i++) {
    943                read_phys_req_item(req->data, req, i, &tmp);
    944                write_phys_req_item(req->addr, req, i, &tmp);
    945            }
    946        }
    947    }
    948}
    949
    950static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
    951{
    952    uint32_t sbdf = req->addr >> 32;
    953    uint32_t reg = req->addr;
    954    XenPciDevice *xendev;
    955
    956    if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
    957        req->size != sizeof(uint32_t)) {
    958        hw_error("PCI config access: bad size (%u)", req->size);
    959    }
    960
    961    if (req->count != 1) {
    962        hw_error("PCI config access: bad count (%u)", req->count);
    963    }
    964
    965    QLIST_FOREACH(xendev, &state->dev_list, entry) {
    966        if (xendev->sbdf != sbdf) {
    967            continue;
    968        }
    969
    970        if (!req->data_is_ptr) {
    971            if (req->dir == IOREQ_READ) {
    972                req->data = pci_host_config_read_common(
    973                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
    974                    req->size);
    975                trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
    976                                            req->size, req->data);
    977            } else if (req->dir == IOREQ_WRITE) {
    978                trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
    979                                             req->size, req->data);
    980                pci_host_config_write_common(
    981                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
    982                    req->data, req->size);
    983            }
    984        } else {
    985            uint32_t tmp;
    986
    987            if (req->dir == IOREQ_READ) {
    988                tmp = pci_host_config_read_common(
    989                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
    990                    req->size);
    991                trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
    992                                            req->size, tmp);
    993                write_phys_req_item(req->data, req, 0, &tmp);
    994            } else if (req->dir == IOREQ_WRITE) {
    995                read_phys_req_item(req->data, req, 0, &tmp);
    996                trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
    997                                             req->size, tmp);
    998                pci_host_config_write_common(
    999                    xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
   1000                    tmp, req->size);
   1001            }
   1002        }
   1003    }
   1004}
   1005
   1006static void regs_to_cpu(vmware_regs_t *vmport_regs, ioreq_t *req)
   1007{
   1008    X86CPU *cpu;
   1009    CPUX86State *env;
   1010
   1011    cpu = X86_CPU(current_cpu);
   1012    env = &cpu->env;
   1013    env->regs[R_EAX] = req->data;
   1014    env->regs[R_EBX] = vmport_regs->ebx;
   1015    env->regs[R_ECX] = vmport_regs->ecx;
   1016    env->regs[R_EDX] = vmport_regs->edx;
   1017    env->regs[R_ESI] = vmport_regs->esi;
   1018    env->regs[R_EDI] = vmport_regs->edi;
   1019}
   1020
   1021static void regs_from_cpu(vmware_regs_t *vmport_regs)
   1022{
   1023    X86CPU *cpu = X86_CPU(current_cpu);
   1024    CPUX86State *env = &cpu->env;
   1025
   1026    vmport_regs->ebx = env->regs[R_EBX];
   1027    vmport_regs->ecx = env->regs[R_ECX];
   1028    vmport_regs->edx = env->regs[R_EDX];
   1029    vmport_regs->esi = env->regs[R_ESI];
   1030    vmport_regs->edi = env->regs[R_EDI];
   1031}
   1032
   1033static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req)
   1034{
   1035    vmware_regs_t *vmport_regs;
   1036
   1037    assert(state->shared_vmport_page);
   1038    vmport_regs =
   1039        &state->shared_vmport_page->vcpu_vmport_regs[state->send_vcpu];
   1040    QEMU_BUILD_BUG_ON(sizeof(*req) < sizeof(*vmport_regs));
   1041
   1042    current_cpu = state->cpu_by_vcpu_id[state->send_vcpu];
   1043    regs_to_cpu(vmport_regs, req);
   1044    cpu_ioreq_pio(req);
   1045    regs_from_cpu(vmport_regs);
   1046    current_cpu = NULL;
   1047}
   1048
   1049static void handle_ioreq(XenIOState *state, ioreq_t *req)
   1050{
   1051    trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
   1052                       req->addr, req->data, req->count, req->size);
   1053
   1054    if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
   1055            (req->size < sizeof (target_ulong))) {
   1056        req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
   1057    }
   1058
   1059    if (req->dir == IOREQ_WRITE)
   1060        trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
   1061                                 req->addr, req->data, req->count, req->size);
   1062
   1063    switch (req->type) {
   1064        case IOREQ_TYPE_PIO:
   1065            cpu_ioreq_pio(req);
   1066            break;
   1067        case IOREQ_TYPE_COPY:
   1068            cpu_ioreq_move(req);
   1069            break;
   1070        case IOREQ_TYPE_VMWARE_PORT:
   1071            handle_vmport_ioreq(state, req);
   1072            break;
   1073        case IOREQ_TYPE_TIMEOFFSET:
   1074            break;
   1075        case IOREQ_TYPE_INVALIDATE:
   1076            xen_invalidate_map_cache();
   1077            break;
   1078        case IOREQ_TYPE_PCI_CONFIG:
   1079            cpu_ioreq_config(state, req);
   1080            break;
   1081        default:
   1082            hw_error("Invalid ioreq type 0x%x\n", req->type);
   1083    }
   1084    if (req->dir == IOREQ_READ) {
   1085        trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
   1086                                req->addr, req->data, req->count, req->size);
   1087    }
   1088}
   1089
   1090static int handle_buffered_iopage(XenIOState *state)
   1091{
   1092    buffered_iopage_t *buf_page = state->buffered_io_page;
   1093    buf_ioreq_t *buf_req = NULL;
   1094    ioreq_t req;
   1095    int qw;
   1096
   1097    if (!buf_page) {
   1098        return 0;
   1099    }
   1100
   1101    memset(&req, 0x00, sizeof(req));
   1102    req.state = STATE_IOREQ_READY;
   1103    req.count = 1;
   1104    req.dir = IOREQ_WRITE;
   1105
   1106    for (;;) {
   1107        uint32_t rdptr = buf_page->read_pointer, wrptr;
   1108
   1109        xen_rmb();
   1110        wrptr = buf_page->write_pointer;
   1111        xen_rmb();
   1112        if (rdptr != buf_page->read_pointer) {
   1113            continue;
   1114        }
   1115        if (rdptr == wrptr) {
   1116            break;
   1117        }
   1118        buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
   1119        req.size = 1U << buf_req->size;
   1120        req.addr = buf_req->addr;
   1121        req.data = buf_req->data;
   1122        req.type = buf_req->type;
   1123        xen_rmb();
   1124        qw = (req.size == 8);
   1125        if (qw) {
   1126            if (rdptr + 1 == wrptr) {
   1127                hw_error("Incomplete quad word buffered ioreq");
   1128            }
   1129            buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
   1130                                           IOREQ_BUFFER_SLOT_NUM];
   1131            req.data |= ((uint64_t)buf_req->data) << 32;
   1132            xen_rmb();
   1133        }
   1134
   1135        handle_ioreq(state, &req);
   1136
   1137        /* Only req.data may get updated by handle_ioreq(), albeit even that
   1138         * should not happen as such data would never make it to the guest (we
   1139         * can only usefully see writes here after all).
   1140         */
   1141        assert(req.state == STATE_IOREQ_READY);
   1142        assert(req.count == 1);
   1143        assert(req.dir == IOREQ_WRITE);
   1144        assert(!req.data_is_ptr);
   1145
   1146        qatomic_add(&buf_page->read_pointer, qw + 1);
   1147    }
   1148
   1149    return req.count;
   1150}
   1151
   1152static void handle_buffered_io(void *opaque)
   1153{
   1154    XenIOState *state = opaque;
   1155
   1156    if (handle_buffered_iopage(state)) {
   1157        timer_mod(state->buffered_io_timer,
   1158                BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
   1159    } else {
   1160        timer_del(state->buffered_io_timer);
   1161        xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port);
   1162    }
   1163}
   1164
   1165static void cpu_handle_ioreq(void *opaque)
   1166{
   1167    XenIOState *state = opaque;
   1168    ioreq_t *req = cpu_get_ioreq(state);
   1169
   1170    handle_buffered_iopage(state);
   1171    if (req) {
   1172        ioreq_t copy = *req;
   1173
   1174        xen_rmb();
   1175        handle_ioreq(state, &copy);
   1176        req->data = copy.data;
   1177
   1178        if (req->state != STATE_IOREQ_INPROCESS) {
   1179            fprintf(stderr, "Badness in I/O request ... not in service?!: "
   1180                    "%x, ptr: %x, port: %"PRIx64", "
   1181                    "data: %"PRIx64", count: %u, size: %u, type: %u\n",
   1182                    req->state, req->data_is_ptr, req->addr,
   1183                    req->data, req->count, req->size, req->type);
   1184            destroy_hvm_domain(false);
   1185            return;
   1186        }
   1187
   1188        xen_wmb(); /* Update ioreq contents /then/ update state. */
   1189
   1190        /*
   1191         * We do this before we send the response so that the tools
   1192         * have the opportunity to pick up on the reset before the
   1193         * guest resumes and does a hlt with interrupts disabled which
   1194         * causes Xen to powerdown the domain.
   1195         */
   1196        if (runstate_is_running()) {
   1197            ShutdownCause request;
   1198
   1199            if (qemu_shutdown_requested_get()) {
   1200                destroy_hvm_domain(false);
   1201            }
   1202            request = qemu_reset_requested_get();
   1203            if (request) {
   1204                qemu_system_reset(request);
   1205                destroy_hvm_domain(true);
   1206            }
   1207        }
   1208
   1209        req->state = STATE_IORESP_READY;
   1210        xenevtchn_notify(state->xce_handle,
   1211                         state->ioreq_local_port[state->send_vcpu]);
   1212    }
   1213}
   1214
   1215static void xen_main_loop_prepare(XenIOState *state)
   1216{
   1217    int evtchn_fd = -1;
   1218
   1219    if (state->xce_handle != NULL) {
   1220        evtchn_fd = xenevtchn_fd(state->xce_handle);
   1221    }
   1222
   1223    state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
   1224                                                 state);
   1225
   1226    if (evtchn_fd != -1) {
   1227        CPUState *cpu_state;
   1228
   1229        DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__);
   1230        CPU_FOREACH(cpu_state) {
   1231            DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n",
   1232                    __func__, cpu_state->cpu_index, cpu_state);
   1233            state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
   1234        }
   1235        qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
   1236    }
   1237}
   1238
   1239
   1240static void xen_hvm_change_state_handler(void *opaque, bool running,
   1241                                         RunState rstate)
   1242{
   1243    XenIOState *state = opaque;
   1244
   1245    if (running) {
   1246        xen_main_loop_prepare(state);
   1247    }
   1248
   1249    xen_set_ioreq_server_state(xen_domid,
   1250                               state->ioservid,
   1251                               (rstate == RUN_STATE_RUNNING));
   1252}
   1253
   1254static void xen_exit_notifier(Notifier *n, void *data)
   1255{
   1256    XenIOState *state = container_of(n, XenIOState, exit);
   1257
   1258    xen_destroy_ioreq_server(xen_domid, state->ioservid);
   1259    if (state->fres != NULL) {
   1260        xenforeignmemory_unmap_resource(xen_fmem, state->fres);
   1261    }
   1262
   1263    xenevtchn_close(state->xce_handle);
   1264    xs_daemon_close(state->xenstore);
   1265}
   1266
   1267#ifdef XEN_COMPAT_PHYSMAP
   1268static void xen_read_physmap(XenIOState *state)
   1269{
   1270    XenPhysmap *physmap = NULL;
   1271    unsigned int len, num, i;
   1272    char path[80], *value = NULL;
   1273    char **entries = NULL;
   1274
   1275    snprintf(path, sizeof(path),
   1276            "/local/domain/0/device-model/%d/physmap", xen_domid);
   1277    entries = xs_directory(state->xenstore, 0, path, &num);
   1278    if (entries == NULL)
   1279        return;
   1280
   1281    for (i = 0; i < num; i++) {
   1282        physmap = g_malloc(sizeof (XenPhysmap));
   1283        physmap->phys_offset = strtoull(entries[i], NULL, 16);
   1284        snprintf(path, sizeof(path),
   1285                "/local/domain/0/device-model/%d/physmap/%s/start_addr",
   1286                xen_domid, entries[i]);
   1287        value = xs_read(state->xenstore, 0, path, &len);
   1288        if (value == NULL) {
   1289            g_free(physmap);
   1290            continue;
   1291        }
   1292        physmap->start_addr = strtoull(value, NULL, 16);
   1293        free(value);
   1294
   1295        snprintf(path, sizeof(path),
   1296                "/local/domain/0/device-model/%d/physmap/%s/size",
   1297                xen_domid, entries[i]);
   1298        value = xs_read(state->xenstore, 0, path, &len);
   1299        if (value == NULL) {
   1300            g_free(physmap);
   1301            continue;
   1302        }
   1303        physmap->size = strtoull(value, NULL, 16);
   1304        free(value);
   1305
   1306        snprintf(path, sizeof(path),
   1307                "/local/domain/0/device-model/%d/physmap/%s/name",
   1308                xen_domid, entries[i]);
   1309        physmap->name = xs_read(state->xenstore, 0, path, &len);
   1310
   1311        QLIST_INSERT_HEAD(&xen_physmap, physmap, list);
   1312    }
   1313    free(entries);
   1314}
   1315#else
   1316static void xen_read_physmap(XenIOState *state)
   1317{
   1318}
   1319#endif
   1320
   1321static void xen_wakeup_notifier(Notifier *notifier, void *data)
   1322{
   1323    xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
   1324}
   1325
   1326static int xen_map_ioreq_server(XenIOState *state)
   1327{
   1328    void *addr = NULL;
   1329    xen_pfn_t ioreq_pfn;
   1330    xen_pfn_t bufioreq_pfn;
   1331    evtchn_port_t bufioreq_evtchn;
   1332    int rc;
   1333
   1334    /*
   1335     * Attempt to map using the resource API and fall back to normal
   1336     * foreign mapping if this is not supported.
   1337     */
   1338    QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
   1339    QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
   1340    state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
   1341                                         XENMEM_resource_ioreq_server,
   1342                                         state->ioservid, 0, 2,
   1343                                         &addr,
   1344                                         PROT_READ | PROT_WRITE, 0);
   1345    if (state->fres != NULL) {
   1346        trace_xen_map_resource_ioreq(state->ioservid, addr);
   1347        state->buffered_io_page = addr;
   1348        state->shared_page = addr + TARGET_PAGE_SIZE;
   1349    } else if (errno != EOPNOTSUPP) {
   1350        error_report("failed to map ioreq server resources: error %d handle=%p",
   1351                     errno, xen_xc);
   1352        return -1;
   1353    }
   1354
   1355    rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
   1356                                   (state->shared_page == NULL) ?
   1357                                   &ioreq_pfn : NULL,
   1358                                   (state->buffered_io_page == NULL) ?
   1359                                   &bufioreq_pfn : NULL,
   1360                                   &bufioreq_evtchn);
   1361    if (rc < 0) {
   1362        error_report("failed to get ioreq server info: error %d handle=%p",
   1363                     errno, xen_xc);
   1364        return rc;
   1365    }
   1366
   1367    if (state->shared_page == NULL) {
   1368        DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
   1369
   1370        state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
   1371                                                  PROT_READ | PROT_WRITE,
   1372                                                  1, &ioreq_pfn, NULL);
   1373        if (state->shared_page == NULL) {
   1374            error_report("map shared IO page returned error %d handle=%p",
   1375                         errno, xen_xc);
   1376        }
   1377    }
   1378
   1379    if (state->buffered_io_page == NULL) {
   1380        DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
   1381
   1382        state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
   1383                                                       PROT_READ | PROT_WRITE,
   1384                                                       1, &bufioreq_pfn,
   1385                                                       NULL);
   1386        if (state->buffered_io_page == NULL) {
   1387            error_report("map buffered IO page returned error %d", errno);
   1388            return -1;
   1389        }
   1390    }
   1391
   1392    if (state->shared_page == NULL || state->buffered_io_page == NULL) {
   1393        return -1;
   1394    }
   1395
   1396    DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
   1397
   1398    state->bufioreq_remote_port = bufioreq_evtchn;
   1399
   1400    return 0;
   1401}
   1402
   1403void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
   1404{
   1405    MachineState *ms = MACHINE(pcms);
   1406    unsigned int max_cpus = ms->smp.max_cpus;
   1407    int i, rc;
   1408    xen_pfn_t ioreq_pfn;
   1409    XenIOState *state;
   1410
   1411    state = g_malloc0(sizeof (XenIOState));
   1412
   1413    state->xce_handle = xenevtchn_open(NULL, 0);
   1414    if (state->xce_handle == NULL) {
   1415        perror("xen: event channel open");
   1416        goto err;
   1417    }
   1418
   1419    state->xenstore = xs_daemon_open();
   1420    if (state->xenstore == NULL) {
   1421        perror("xen: xenstore open");
   1422        goto err;
   1423    }
   1424
   1425    xen_create_ioreq_server(xen_domid, &state->ioservid);
   1426
   1427    state->exit.notify = xen_exit_notifier;
   1428    qemu_add_exit_notifier(&state->exit);
   1429
   1430    state->suspend.notify = xen_suspend_notifier;
   1431    qemu_register_suspend_notifier(&state->suspend);
   1432
   1433    state->wakeup.notify = xen_wakeup_notifier;
   1434    qemu_register_wakeup_notifier(&state->wakeup);
   1435
   1436    /*
   1437     * Register wake-up support in QMP query-current-machine API
   1438     */
   1439    qemu_register_wakeup_support();
   1440
   1441    rc = xen_map_ioreq_server(state);
   1442    if (rc < 0) {
   1443        goto err;
   1444    }
   1445
   1446    rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn);
   1447    if (!rc) {
   1448        DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn);
   1449        state->shared_vmport_page =
   1450            xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE,
   1451                                 1, &ioreq_pfn, NULL);
   1452        if (state->shared_vmport_page == NULL) {
   1453            error_report("map shared vmport IO page returned error %d handle=%p",
   1454                         errno, xen_xc);
   1455            goto err;
   1456        }
   1457    } else if (rc != -ENOSYS) {
   1458        error_report("get vmport regs pfn returned error %d, rc=%d",
   1459                     errno, rc);
   1460        goto err;
   1461    }
   1462
   1463    /* Note: cpus is empty at this point in init */
   1464    state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *));
   1465
   1466    rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
   1467    if (rc < 0) {
   1468        error_report("failed to enable ioreq server info: error %d handle=%p",
   1469                     errno, xen_xc);
   1470        goto err;
   1471    }
   1472
   1473    state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t));
   1474
   1475    /* FIXME: how about if we overflow the page here? */
   1476    for (i = 0; i < max_cpus; i++) {
   1477        rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
   1478                                        xen_vcpu_eport(state->shared_page, i));
   1479        if (rc == -1) {
   1480            error_report("shared evtchn %d bind error %d", i, errno);
   1481            goto err;
   1482        }
   1483        state->ioreq_local_port[i] = rc;
   1484    }
   1485
   1486    rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
   1487                                    state->bufioreq_remote_port);
   1488    if (rc == -1) {
   1489        error_report("buffered evtchn bind error %d", errno);
   1490        goto err;
   1491    }
   1492    state->bufioreq_local_port = rc;
   1493
   1494    /* Init RAM management */
   1495#ifdef XEN_COMPAT_PHYSMAP
   1496    xen_map_cache_init(xen_phys_offset_to_gaddr, state);
   1497#else
   1498    xen_map_cache_init(NULL, state);
   1499#endif
   1500    xen_ram_init(pcms, ms->ram_size, ram_memory);
   1501
   1502    qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
   1503
   1504    state->memory_listener = xen_memory_listener;
   1505    memory_listener_register(&state->memory_listener, &address_space_memory);
   1506    state->log_for_dirtybit = NULL;
   1507
   1508    state->io_listener = xen_io_listener;
   1509    memory_listener_register(&state->io_listener, &address_space_io);
   1510
   1511    state->device_listener = xen_device_listener;
   1512    QLIST_INIT(&state->dev_list);
   1513    device_listener_register(&state->device_listener);
   1514
   1515    xen_bus_init();
   1516
   1517    /* Initialize backend core & drivers */
   1518    if (xen_be_init() != 0) {
   1519        error_report("xen backend core setup failed");
   1520        goto err;
   1521    }
   1522    xen_be_register_common();
   1523
   1524    QLIST_INIT(&xen_physmap);
   1525    xen_read_physmap(state);
   1526
   1527    /* Disable ACPI build because Xen handles it */
   1528    pcms->acpi_build_enabled = false;
   1529
   1530    return;
   1531
   1532err:
   1533    error_report("xen hardware virtual machine initialisation failed");
   1534    exit(1);
   1535}
   1536
   1537void destroy_hvm_domain(bool reboot)
   1538{
   1539    xc_interface *xc_handle;
   1540    int sts;
   1541    int rc;
   1542
   1543    unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
   1544
   1545    if (xen_dmod) {
   1546        rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
   1547        if (!rc) {
   1548            return;
   1549        }
   1550        if (errno != ENOTTY /* old Xen */) {
   1551            perror("xendevicemodel_shutdown failed");
   1552        }
   1553        /* well, try the old thing then */
   1554    }
   1555
   1556    xc_handle = xc_interface_open(0, 0, 0);
   1557    if (xc_handle == NULL) {
   1558        fprintf(stderr, "Cannot acquire xenctrl handle\n");
   1559    } else {
   1560        sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
   1561        if (sts != 0) {
   1562            fprintf(stderr, "xc_domain_shutdown failed to issue %s, "
   1563                    "sts %d, %s\n", reboot ? "reboot" : "poweroff",
   1564                    sts, strerror(errno));
   1565        } else {
   1566            fprintf(stderr, "Issued domain %d %s\n", xen_domid,
   1567                    reboot ? "reboot" : "poweroff");
   1568        }
   1569        xc_interface_close(xc_handle);
   1570    }
   1571}
   1572
   1573void xen_register_framebuffer(MemoryRegion *mr)
   1574{
   1575    framebuffer = mr;
   1576}
   1577
   1578void xen_shutdown_fatal_error(const char *fmt, ...)
   1579{
   1580    va_list ap;
   1581
   1582    va_start(ap, fmt);
   1583    vfprintf(stderr, fmt, ap);
   1584    va_end(ap);
   1585    fprintf(stderr, "Will destroy the domain.\n");
   1586    /* destroy the domain */
   1587    qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
   1588}
   1589
   1590void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
   1591{
   1592    if (unlikely(xen_in_migration)) {
   1593        int rc;
   1594        ram_addr_t start_pfn, nb_pages;
   1595
   1596        start = xen_phys_offset_to_gaddr(start, length);
   1597
   1598        if (length == 0) {
   1599            length = TARGET_PAGE_SIZE;
   1600        }
   1601        start_pfn = start >> TARGET_PAGE_BITS;
   1602        nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
   1603            - start_pfn;
   1604        rc = xen_modified_memory(xen_domid, start_pfn, nb_pages);
   1605        if (rc) {
   1606            fprintf(stderr,
   1607                    "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
   1608                    __func__, start, nb_pages, errno, strerror(errno));
   1609        }
   1610    }
   1611}
   1612
   1613void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
   1614{
   1615    if (enable) {
   1616        memory_global_dirty_log_start();
   1617    } else {
   1618        memory_global_dirty_log_stop();
   1619    }
   1620}