cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

kvmvapic.c (24719B)


      1/*
      2 * TPR optimization for 32-bit Windows guests (XP and Server 2003)
      3 *
      4 * Copyright (C) 2007-2008 Qumranet Technologies
      5 * Copyright (C) 2012      Jan Kiszka, Siemens AG
      6 *
      7 * This work is licensed under the terms of the GNU GPL version 2, or
      8 * (at your option) any later version. See the COPYING file in the
      9 * top-level directory.
     10 */
     11
     12#include "qemu/osdep.h"
     13#include "qemu/module.h"
     14#include "sysemu/sysemu.h"
     15#include "sysemu/cpus.h"
     16#include "sysemu/hw_accel.h"
     17#include "sysemu/kvm.h"
     18#include "sysemu/runstate.h"
     19#include "hw/i386/apic_internal.h"
     20#include "hw/sysbus.h"
     21#include "hw/boards.h"
     22#include "migration/vmstate.h"
     23#include "qom/object.h"
     24
     25#define VAPIC_IO_PORT           0x7e
     26
     27#define VAPIC_CPU_SHIFT         7
     28
     29#define ROM_BLOCK_SIZE          512
     30#define ROM_BLOCK_MASK          (~(ROM_BLOCK_SIZE - 1))
     31
     32typedef enum VAPICMode {
     33    VAPIC_INACTIVE = 0,
     34    VAPIC_ACTIVE   = 1,
     35    VAPIC_STANDBY  = 2,
     36} VAPICMode;
     37
     38typedef struct VAPICHandlers {
     39    uint32_t set_tpr;
     40    uint32_t set_tpr_eax;
     41    uint32_t get_tpr[8];
     42    uint32_t get_tpr_stack;
     43} QEMU_PACKED VAPICHandlers;
     44
     45typedef struct GuestROMState {
     46    char signature[8];
     47    uint32_t vaddr;
     48    uint32_t fixup_start;
     49    uint32_t fixup_end;
     50    uint32_t vapic_vaddr;
     51    uint32_t vapic_size;
     52    uint32_t vcpu_shift;
     53    uint32_t real_tpr_addr;
     54    VAPICHandlers up;
     55    VAPICHandlers mp;
     56} QEMU_PACKED GuestROMState;
     57
     58struct VAPICROMState {
     59    SysBusDevice busdev;
     60    MemoryRegion io;
     61    MemoryRegion rom;
     62    uint32_t state;
     63    uint32_t rom_state_paddr;
     64    uint32_t rom_state_vaddr;
     65    uint32_t vapic_paddr;
     66    uint32_t real_tpr_addr;
     67    GuestROMState rom_state;
     68    size_t rom_size;
     69    bool rom_mapped_writable;
     70    VMChangeStateEntry *vmsentry;
     71};
     72
     73#define TYPE_VAPIC "kvmvapic"
     74OBJECT_DECLARE_SIMPLE_TYPE(VAPICROMState, VAPIC)
     75
     76#define TPR_INSTR_ABS_MODRM             0x1
     77#define TPR_INSTR_MATCH_MODRM_REG       0x2
     78
     79typedef struct TPRInstruction {
     80    uint8_t opcode;
     81    uint8_t modrm_reg;
     82    unsigned int flags;
     83    TPRAccess access;
     84    size_t length;
     85    off_t addr_offset;
     86} TPRInstruction;
     87
     88/* must be sorted by length, shortest first */
     89static const TPRInstruction tpr_instr[] = {
     90    { /* mov abs to eax */
     91        .opcode = 0xa1,
     92        .access = TPR_ACCESS_READ,
     93        .length = 5,
     94        .addr_offset = 1,
     95    },
     96    { /* mov eax to abs */
     97        .opcode = 0xa3,
     98        .access = TPR_ACCESS_WRITE,
     99        .length = 5,
    100        .addr_offset = 1,
    101    },
    102    { /* mov r32 to r/m32 */
    103        .opcode = 0x89,
    104        .flags = TPR_INSTR_ABS_MODRM,
    105        .access = TPR_ACCESS_WRITE,
    106        .length = 6,
    107        .addr_offset = 2,
    108    },
    109    { /* mov r/m32 to r32 */
    110        .opcode = 0x8b,
    111        .flags = TPR_INSTR_ABS_MODRM,
    112        .access = TPR_ACCESS_READ,
    113        .length = 6,
    114        .addr_offset = 2,
    115    },
    116    { /* push r/m32 */
    117        .opcode = 0xff,
    118        .modrm_reg = 6,
    119        .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
    120        .access = TPR_ACCESS_READ,
    121        .length = 6,
    122        .addr_offset = 2,
    123    },
    124    { /* mov imm32, r/m32 (c7/0) */
    125        .opcode = 0xc7,
    126        .modrm_reg = 0,
    127        .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
    128        .access = TPR_ACCESS_WRITE,
    129        .length = 10,
    130        .addr_offset = 2,
    131    },
    132};
    133
    134static void read_guest_rom_state(VAPICROMState *s)
    135{
    136    cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state,
    137                             sizeof(GuestROMState));
    138}
    139
    140static void write_guest_rom_state(VAPICROMState *s)
    141{
    142    cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state,
    143                              sizeof(GuestROMState));
    144}
    145
    146static void update_guest_rom_state(VAPICROMState *s)
    147{
    148    read_guest_rom_state(s);
    149
    150    s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr);
    151    s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT);
    152
    153    write_guest_rom_state(s);
    154}
    155
    156static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env)
    157{
    158    CPUState *cs = env_cpu(env);
    159    hwaddr paddr;
    160    target_ulong addr;
    161
    162    if (s->state == VAPIC_ACTIVE) {
    163        return 0;
    164    }
    165    /*
    166     * If there is no prior TPR access instruction we could analyze (which is
    167     * the case after resume from hibernation), we need to scan the possible
    168     * virtual address space for the APIC mapping.
    169     */
    170    for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) {
    171        paddr = cpu_get_phys_page_debug(cs, addr);
    172        if (paddr != APIC_DEFAULT_ADDRESS) {
    173            continue;
    174        }
    175        s->real_tpr_addr = addr + 0x80;
    176        update_guest_rom_state(s);
    177        return 0;
    178    }
    179    return -1;
    180}
    181
    182static uint8_t modrm_reg(uint8_t modrm)
    183{
    184    return (modrm >> 3) & 7;
    185}
    186
    187static bool is_abs_modrm(uint8_t modrm)
    188{
    189    return (modrm & 0xc7) == 0x05;
    190}
    191
    192static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr)
    193{
    194    return opcode[0] == instr->opcode &&
    195        (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) &&
    196        (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) ||
    197         modrm_reg(opcode[1]) == instr->modrm_reg);
    198}
    199
    200static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu,
    201                                    target_ulong *pip, TPRAccess access)
    202{
    203    CPUState *cs = CPU(cpu);
    204    const TPRInstruction *instr;
    205    target_ulong ip = *pip;
    206    uint8_t opcode[2];
    207    uint32_t real_tpr_addr;
    208    int i;
    209
    210    if ((ip & 0xf0000000ULL) != 0x80000000ULL &&
    211        (ip & 0xf0000000ULL) != 0xe0000000ULL) {
    212        return -1;
    213    }
    214
    215    /*
    216     * Early Windows 2003 SMP initialization contains a
    217     *
    218     *   mov imm32, r/m32
    219     *
    220     * instruction that is patched by TPR optimization. The problem is that
    221     * RSP, used by the patched instruction, is zero, so the guest gets a
    222     * double fault and dies.
    223     */
    224    if (cpu->env.regs[R_ESP] == 0) {
    225        return -1;
    226    }
    227
    228    if (kvm_enabled() && !kvm_irqchip_in_kernel()) {
    229        /*
    230         * KVM without kernel-based TPR access reporting will pass an IP that
    231         * points after the accessing instruction. So we need to look backward
    232         * to find the reason.
    233         */
    234        for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
    235            instr = &tpr_instr[i];
    236            if (instr->access != access) {
    237                continue;
    238            }
    239            if (cpu_memory_rw_debug(cs, ip - instr->length, opcode,
    240                                    sizeof(opcode), 0) < 0) {
    241                return -1;
    242            }
    243            if (opcode_matches(opcode, instr)) {
    244                ip -= instr->length;
    245                goto instruction_ok;
    246            }
    247        }
    248        return -1;
    249    } else {
    250        if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) {
    251            return -1;
    252        }
    253        for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
    254            instr = &tpr_instr[i];
    255            if (opcode_matches(opcode, instr)) {
    256                goto instruction_ok;
    257            }
    258        }
    259        return -1;
    260    }
    261
    262instruction_ok:
    263    /*
    264     * Grab the virtual TPR address from the instruction
    265     * and update the cached values.
    266     */
    267    if (cpu_memory_rw_debug(cs, ip + instr->addr_offset,
    268                            (void *)&real_tpr_addr,
    269                            sizeof(real_tpr_addr), 0) < 0) {
    270        return -1;
    271    }
    272    real_tpr_addr = le32_to_cpu(real_tpr_addr);
    273    if ((real_tpr_addr & 0xfff) != 0x80) {
    274        return -1;
    275    }
    276    s->real_tpr_addr = real_tpr_addr;
    277    update_guest_rom_state(s);
    278
    279    *pip = ip;
    280    return 0;
    281}
    282
    283static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip)
    284{
    285    CPUState *cs = env_cpu(env);
    286    hwaddr paddr;
    287    uint32_t rom_state_vaddr;
    288    uint32_t pos, patch, offset;
    289
    290    /* nothing to do if already activated */
    291    if (s->state == VAPIC_ACTIVE) {
    292        return 0;
    293    }
    294
    295    /* bail out if ROM init code was not executed (missing ROM?) */
    296    if (s->state == VAPIC_INACTIVE) {
    297        return -1;
    298    }
    299
    300    /* find out virtual address of the ROM */
    301    rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000);
    302    paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr);
    303    if (paddr == -1) {
    304        return -1;
    305    }
    306    paddr += rom_state_vaddr & ~TARGET_PAGE_MASK;
    307    if (paddr != s->rom_state_paddr) {
    308        return -1;
    309    }
    310    read_guest_rom_state(s);
    311    if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) {
    312        return -1;
    313    }
    314    s->rom_state_vaddr = rom_state_vaddr;
    315
    316    /* fixup addresses in ROM if needed */
    317    if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) {
    318        return 0;
    319    }
    320    for (pos = le32_to_cpu(s->rom_state.fixup_start);
    321         pos < le32_to_cpu(s->rom_state.fixup_end);
    322         pos += 4) {
    323        cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr,
    324                                 &offset, sizeof(offset));
    325        offset = le32_to_cpu(offset);
    326        cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch));
    327        patch = le32_to_cpu(patch);
    328        patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr);
    329        patch = cpu_to_le32(patch);
    330        cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch));
    331    }
    332    read_guest_rom_state(s);
    333    s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) -
    334        le32_to_cpu(s->rom_state.vaddr);
    335
    336    return 0;
    337}
    338
    339/*
    340 * Tries to read the unique processor number from the Kernel Processor Control
    341 * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR
    342 * cannot be accessed or is considered invalid. This also ensures that we are
    343 * not patching the wrong guest.
    344 */
    345static int get_kpcr_number(X86CPU *cpu)
    346{
    347    CPUX86State *env = &cpu->env;
    348    struct kpcr {
    349        uint8_t  fill1[0x1c];
    350        uint32_t self;
    351        uint8_t  fill2[0x31];
    352        uint8_t  number;
    353    } QEMU_PACKED kpcr;
    354
    355    if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base,
    356                            (void *)&kpcr, sizeof(kpcr), 0) < 0 ||
    357        kpcr.self != env->segs[R_FS].base) {
    358        return -1;
    359    }
    360    return kpcr.number;
    361}
    362
    363static int vapic_enable(VAPICROMState *s, X86CPU *cpu)
    364{
    365    int cpu_number = get_kpcr_number(cpu);
    366    hwaddr vapic_paddr;
    367    static const uint8_t enabled = 1;
    368
    369    if (cpu_number < 0) {
    370        return -1;
    371    }
    372    vapic_paddr = s->vapic_paddr +
    373        (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT);
    374    cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled),
    375                              &enabled, sizeof(enabled));
    376    apic_enable_vapic(cpu->apic_state, vapic_paddr);
    377
    378    s->state = VAPIC_ACTIVE;
    379
    380    return 0;
    381}
    382
    383static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte)
    384{
    385    cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1);
    386}
    387
    388static void patch_call(X86CPU *cpu, target_ulong ip, uint32_t target)
    389{
    390    uint32_t offset;
    391
    392    offset = cpu_to_le32(target - ip - 5);
    393    patch_byte(cpu, ip, 0xe8); /* call near */
    394    cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1);
    395}
    396
    397typedef struct PatchInfo {
    398    VAPICHandlers *handler;
    399    target_ulong ip;
    400} PatchInfo;
    401
    402static void do_patch_instruction(CPUState *cs, run_on_cpu_data data)
    403{
    404    X86CPU *x86_cpu = X86_CPU(cs);
    405    PatchInfo *info = (PatchInfo *) data.host_ptr;
    406    VAPICHandlers *handlers = info->handler;
    407    target_ulong ip = info->ip;
    408    uint8_t opcode[2];
    409    uint32_t imm32 = 0;
    410
    411    cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0);
    412
    413    switch (opcode[0]) {
    414    case 0x89: /* mov r32 to r/m32 */
    415        patch_byte(x86_cpu, ip, 0x50 + modrm_reg(opcode[1]));  /* push reg */
    416        patch_call(x86_cpu, ip + 1, handlers->set_tpr);
    417        break;
    418    case 0x8b: /* mov r/m32 to r32 */
    419        patch_byte(x86_cpu, ip, 0x90);
    420        patch_call(x86_cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]);
    421        break;
    422    case 0xa1: /* mov abs to eax */
    423        patch_call(x86_cpu, ip, handlers->get_tpr[0]);
    424        break;
    425    case 0xa3: /* mov eax to abs */
    426        patch_call(x86_cpu, ip, handlers->set_tpr_eax);
    427        break;
    428    case 0xc7: /* mov imm32, r/m32 (c7/0) */
    429        patch_byte(x86_cpu, ip, 0x68);  /* push imm32 */
    430        cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0);
    431        cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1);
    432        patch_call(x86_cpu, ip + 5, handlers->set_tpr);
    433        break;
    434    case 0xff: /* push r/m32 */
    435        patch_byte(x86_cpu, ip, 0x50); /* push eax */
    436        patch_call(x86_cpu, ip + 1, handlers->get_tpr_stack);
    437        break;
    438    default:
    439        abort();
    440    }
    441
    442    g_free(info);
    443}
    444
    445static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
    446{
    447    MachineState *ms = MACHINE(qdev_get_machine());
    448    CPUState *cs = CPU(cpu);
    449    VAPICHandlers *handlers;
    450    PatchInfo *info;
    451
    452    if (ms->smp.cpus == 1) {
    453        handlers = &s->rom_state.up;
    454    } else {
    455        handlers = &s->rom_state.mp;
    456    }
    457
    458    info  = g_new(PatchInfo, 1);
    459    info->handler = handlers;
    460    info->ip = ip;
    461
    462    async_safe_run_on_cpu(cs, do_patch_instruction, RUN_ON_CPU_HOST_PTR(info));
    463}
    464
    465void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip,
    466                             TPRAccess access)
    467{
    468    VAPICROMState *s = VAPIC(dev);
    469    X86CPU *cpu = X86_CPU(cs);
    470    CPUX86State *env = &cpu->env;
    471
    472    cpu_synchronize_state(cs);
    473
    474    if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) {
    475        if (s->state == VAPIC_ACTIVE) {
    476            vapic_enable(s, cpu);
    477        }
    478        return;
    479    }
    480    if (update_rom_mapping(s, env, ip) < 0) {
    481        return;
    482    }
    483    if (vapic_enable(s, cpu) < 0) {
    484        return;
    485    }
    486    patch_instruction(s, cpu, ip);
    487}
    488
    489typedef struct VAPICEnableTPRReporting {
    490    DeviceState *apic;
    491    bool enable;
    492} VAPICEnableTPRReporting;
    493
    494static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data)
    495{
    496    VAPICEnableTPRReporting *info = data.host_ptr;
    497    apic_enable_tpr_access_reporting(info->apic, info->enable);
    498}
    499
    500static void vapic_enable_tpr_reporting(bool enable)
    501{
    502    VAPICEnableTPRReporting info = {
    503        .enable = enable,
    504    };
    505    CPUState *cs;
    506    X86CPU *cpu;
    507
    508    CPU_FOREACH(cs) {
    509        cpu = X86_CPU(cs);
    510        info.apic = cpu->apic_state;
    511        run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info));
    512    }
    513}
    514
    515static void vapic_reset(DeviceState *dev)
    516{
    517    VAPICROMState *s = VAPIC(dev);
    518
    519    s->state = VAPIC_INACTIVE;
    520    s->rom_state_paddr = 0;
    521    vapic_enable_tpr_reporting(false);
    522}
    523
    524/*
    525 * Set the IRQ polling hypercalls to the supported variant:
    526 *  - vmcall if using KVM in-kernel irqchip
    527 *  - 32-bit VAPIC port write otherwise
    528 */
    529static int patch_hypercalls(VAPICROMState *s)
    530{
    531    hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
    532    static const uint8_t vmcall_pattern[] = { /* vmcall */
    533        0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1
    534    };
    535    static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */
    536        0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e
    537    };
    538    uint8_t alternates[2];
    539    const uint8_t *pattern;
    540    const uint8_t *patch;
    541    off_t pos;
    542    uint8_t *rom;
    543
    544    rom = g_malloc(s->rom_size);
    545    cpu_physical_memory_read(rom_paddr, rom, s->rom_size);
    546
    547    for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) {
    548        if (kvm_irqchip_in_kernel()) {
    549            pattern = outl_pattern;
    550            alternates[0] = outl_pattern[7];
    551            alternates[1] = outl_pattern[7];
    552            patch = &vmcall_pattern[5];
    553        } else {
    554            pattern = vmcall_pattern;
    555            alternates[0] = vmcall_pattern[7];
    556            alternates[1] = 0xd9; /* AMD's VMMCALL */
    557            patch = &outl_pattern[5];
    558        }
    559        if (memcmp(rom + pos, pattern, 7) == 0 &&
    560            (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) {
    561            cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3);
    562            /*
    563             * Don't flush the tb here. Under ordinary conditions, the patched
    564             * calls are miles away from the current IP. Under malicious
    565             * conditions, the guest could trick us to crash.
    566             */
    567        }
    568    }
    569
    570    g_free(rom);
    571    return 0;
    572}
    573
    574/*
    575 * For TCG mode or the time KVM honors read-only memory regions, we need to
    576 * enable write access to the option ROM so that variables can be updated by
    577 * the guest.
    578 */
    579static int vapic_map_rom_writable(VAPICROMState *s)
    580{
    581    hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
    582    MemoryRegionSection section;
    583    MemoryRegion *as;
    584    size_t rom_size;
    585    uint8_t *ram;
    586
    587    as = sysbus_address_space(&s->busdev);
    588
    589    if (s->rom_mapped_writable) {
    590        memory_region_del_subregion(as, &s->rom);
    591        object_unparent(OBJECT(&s->rom));
    592    }
    593
    594    /* grab RAM memory region (region @rom_paddr may still be pc.rom) */
    595    section = memory_region_find(as, 0, 1);
    596
    597    /* read ROM size from RAM region */
    598    if (rom_paddr + 2 >= memory_region_size(section.mr)) {
    599        return -1;
    600    }
    601    ram = memory_region_get_ram_ptr(section.mr);
    602    rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE;
    603    if (rom_size == 0) {
    604        return -1;
    605    }
    606    s->rom_size = rom_size;
    607
    608    /* We need to round to avoid creating subpages
    609     * from which we cannot run code. */
    610    rom_size += rom_paddr & ~TARGET_PAGE_MASK;
    611    rom_paddr &= TARGET_PAGE_MASK;
    612    rom_size = TARGET_PAGE_ALIGN(rom_size);
    613
    614    memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr,
    615                             rom_paddr, rom_size);
    616    memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000);
    617    s->rom_mapped_writable = true;
    618    memory_region_unref(section.mr);
    619
    620    return 0;
    621}
    622
    623static int vapic_prepare(VAPICROMState *s)
    624{
    625    if (vapic_map_rom_writable(s) < 0) {
    626        return -1;
    627    }
    628
    629    if (patch_hypercalls(s) < 0) {
    630        return -1;
    631    }
    632
    633    vapic_enable_tpr_reporting(true);
    634
    635    return 0;
    636}
    637
    638static void vapic_write(void *opaque, hwaddr addr, uint64_t data,
    639                        unsigned int size)
    640{
    641    VAPICROMState *s = opaque;
    642    X86CPU *cpu;
    643    CPUX86State *env;
    644    hwaddr rom_paddr;
    645
    646    if (!current_cpu) {
    647        return;
    648    }
    649
    650    cpu_synchronize_state(current_cpu);
    651    cpu = X86_CPU(current_cpu);
    652    env = &cpu->env;
    653
    654    /*
    655     * The VAPIC supports two PIO-based hypercalls, both via port 0x7E.
    656     *  o 16-bit write access:
    657     *    Reports the option ROM initialization to the hypervisor. Written
    658     *    value is the offset of the state structure in the ROM.
    659     *  o 8-bit write access:
    660     *    Reactivates the VAPIC after a guest hibernation, i.e. after the
    661     *    option ROM content has been re-initialized by a guest power cycle.
    662     *  o 32-bit write access:
    663     *    Poll for pending IRQs, considering the current VAPIC state.
    664     */
    665    switch (size) {
    666    case 2:
    667        if (s->state == VAPIC_INACTIVE) {
    668            rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK;
    669            s->rom_state_paddr = rom_paddr + data;
    670
    671            s->state = VAPIC_STANDBY;
    672        }
    673        if (vapic_prepare(s) < 0) {
    674            s->state = VAPIC_INACTIVE;
    675            s->rom_state_paddr = 0;
    676            break;
    677        }
    678        break;
    679    case 1:
    680        if (kvm_enabled()) {
    681            /*
    682             * Disable triggering instruction in ROM by writing a NOP.
    683             *
    684             * We cannot do this in TCG mode as the reported IP is not
    685             * accurate.
    686             */
    687            pause_all_vcpus();
    688            patch_byte(cpu, env->eip - 2, 0x66);
    689            patch_byte(cpu, env->eip - 1, 0x90);
    690            resume_all_vcpus();
    691        }
    692
    693        if (s->state == VAPIC_ACTIVE) {
    694            break;
    695        }
    696        if (update_rom_mapping(s, env, env->eip) < 0) {
    697            break;
    698        }
    699        if (find_real_tpr_addr(s, env) < 0) {
    700            break;
    701        }
    702        vapic_enable(s, cpu);
    703        break;
    704    default:
    705    case 4:
    706        if (!kvm_irqchip_in_kernel()) {
    707            apic_poll_irq(cpu->apic_state);
    708        }
    709        break;
    710    }
    711}
    712
    713static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size)
    714{
    715    return 0xffffffff;
    716}
    717
    718static const MemoryRegionOps vapic_ops = {
    719    .write = vapic_write,
    720    .read = vapic_read,
    721    .endianness = DEVICE_NATIVE_ENDIAN,
    722};
    723
    724static void vapic_realize(DeviceState *dev, Error **errp)
    725{
    726    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
    727    VAPICROMState *s = VAPIC(dev);
    728
    729    memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2);
    730    sysbus_add_io(sbd, VAPIC_IO_PORT, &s->io);
    731    sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2);
    732
    733    option_rom[nb_option_roms].name = "kvmvapic.bin";
    734    option_rom[nb_option_roms].bootindex = -1;
    735    nb_option_roms++;
    736}
    737
    738static void do_vapic_enable(CPUState *cs, run_on_cpu_data data)
    739{
    740    VAPICROMState *s = data.host_ptr;
    741    X86CPU *cpu = X86_CPU(cs);
    742
    743    static const uint8_t enabled = 1;
    744    cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
    745                              &enabled, sizeof(enabled));
    746    apic_enable_vapic(cpu->apic_state, s->vapic_paddr);
    747    s->state = VAPIC_ACTIVE;
    748}
    749
    750static void kvmvapic_vm_state_change(void *opaque, bool running,
    751                                     RunState state)
    752{
    753    MachineState *ms = MACHINE(qdev_get_machine());
    754    VAPICROMState *s = opaque;
    755    uint8_t *zero;
    756
    757    if (!running) {
    758        return;
    759    }
    760
    761    if (s->state == VAPIC_ACTIVE) {
    762        if (ms->smp.cpus == 1) {
    763            run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s));
    764        } else {
    765            zero = g_malloc0(s->rom_state.vapic_size);
    766            cpu_physical_memory_write(s->vapic_paddr, zero,
    767                                      s->rom_state.vapic_size);
    768            g_free(zero);
    769        }
    770    }
    771
    772    qemu_del_vm_change_state_handler(s->vmsentry);
    773    s->vmsentry = NULL;
    774}
    775
    776static int vapic_post_load(void *opaque, int version_id)
    777{
    778    VAPICROMState *s = opaque;
    779
    780    /*
    781     * The old implementation of qemu-kvm did not provide the state
    782     * VAPIC_STANDBY. Reconstruct it.
    783     */
    784    if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) {
    785        s->state = VAPIC_STANDBY;
    786    }
    787
    788    if (s->state != VAPIC_INACTIVE) {
    789        if (vapic_prepare(s) < 0) {
    790            return -1;
    791        }
    792    }
    793
    794    if (!s->vmsentry) {
    795        s->vmsentry =
    796            qemu_add_vm_change_state_handler(kvmvapic_vm_state_change, s);
    797    }
    798    return 0;
    799}
    800
    801static const VMStateDescription vmstate_handlers = {
    802    .name = "kvmvapic-handlers",
    803    .version_id = 1,
    804    .minimum_version_id = 1,
    805    .fields = (VMStateField[]) {
    806        VMSTATE_UINT32(set_tpr, VAPICHandlers),
    807        VMSTATE_UINT32(set_tpr_eax, VAPICHandlers),
    808        VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8),
    809        VMSTATE_UINT32(get_tpr_stack, VAPICHandlers),
    810        VMSTATE_END_OF_LIST()
    811    }
    812};
    813
    814static const VMStateDescription vmstate_guest_rom = {
    815    .name = "kvmvapic-guest-rom",
    816    .version_id = 1,
    817    .minimum_version_id = 1,
    818    .fields = (VMStateField[]) {
    819        VMSTATE_UNUSED(8),     /* signature */
    820        VMSTATE_UINT32(vaddr, GuestROMState),
    821        VMSTATE_UINT32(fixup_start, GuestROMState),
    822        VMSTATE_UINT32(fixup_end, GuestROMState),
    823        VMSTATE_UINT32(vapic_vaddr, GuestROMState),
    824        VMSTATE_UINT32(vapic_size, GuestROMState),
    825        VMSTATE_UINT32(vcpu_shift, GuestROMState),
    826        VMSTATE_UINT32(real_tpr_addr, GuestROMState),
    827        VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
    828        VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
    829        VMSTATE_END_OF_LIST()
    830    }
    831};
    832
    833static const VMStateDescription vmstate_vapic = {
    834    .name = "kvm-tpr-opt",      /* compatible with qemu-kvm VAPIC */
    835    .version_id = 1,
    836    .minimum_version_id = 1,
    837    .post_load = vapic_post_load,
    838    .fields = (VMStateField[]) {
    839        VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom,
    840                       GuestROMState),
    841        VMSTATE_UINT32(state, VAPICROMState),
    842        VMSTATE_UINT32(real_tpr_addr, VAPICROMState),
    843        VMSTATE_UINT32(rom_state_vaddr, VAPICROMState),
    844        VMSTATE_UINT32(vapic_paddr, VAPICROMState),
    845        VMSTATE_UINT32(rom_state_paddr, VAPICROMState),
    846        VMSTATE_END_OF_LIST()
    847    }
    848};
    849
    850static void vapic_class_init(ObjectClass *klass, void *data)
    851{
    852    DeviceClass *dc = DEVICE_CLASS(klass);
    853
    854    dc->reset   = vapic_reset;
    855    dc->vmsd    = &vmstate_vapic;
    856    dc->realize = vapic_realize;
    857}
    858
    859static const TypeInfo vapic_type = {
    860    .name          = TYPE_VAPIC,
    861    .parent        = TYPE_SYS_BUS_DEVICE,
    862    .instance_size = sizeof(VAPICROMState),
    863    .class_init    = vapic_class_init,
    864};
    865
    866static void vapic_register(void)
    867{
    868    type_register_static(&vapic_type);
    869}
    870
    871type_init(vapic_register);