cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

whpx-all.c (58504B)


      1/*
      2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
      3 *
      4 * Copyright Microsoft Corp. 2017
      5 *
      6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
      7 * See the COPYING file in the top-level directory.
      8 *
      9 */
     10
     11#include "qemu/osdep.h"
     12#include "cpu.h"
     13#include "exec/address-spaces.h"
     14#include "exec/ioport.h"
     15#include "qemu-common.h"
     16#include "qemu/accel.h"
     17#include "sysemu/whpx.h"
     18#include "sysemu/cpus.h"
     19#include "sysemu/runstate.h"
     20#include "qemu/main-loop.h"
     21#include "hw/boards.h"
     22#include "hw/i386/ioapic.h"
     23#include "hw/i386/apic_internal.h"
     24#include "qemu/error-report.h"
     25#include "qapi/error.h"
     26#include "qapi/qapi-types-common.h"
     27#include "qapi/qapi-visit-common.h"
     28#include "migration/blocker.h"
     29#include <winerror.h>
     30
     31#include "whpx-internal.h"
     32#include "whpx-accel-ops.h"
     33
     34#include <WinHvPlatform.h>
     35#include <WinHvEmulation.h>
     36
     37#define HYPERV_APIC_BUS_FREQUENCY      (200000000ULL)
     38
     39static const WHV_REGISTER_NAME whpx_register_names[] = {
     40
     41    /* X64 General purpose registers */
     42    WHvX64RegisterRax,
     43    WHvX64RegisterRcx,
     44    WHvX64RegisterRdx,
     45    WHvX64RegisterRbx,
     46    WHvX64RegisterRsp,
     47    WHvX64RegisterRbp,
     48    WHvX64RegisterRsi,
     49    WHvX64RegisterRdi,
     50    WHvX64RegisterR8,
     51    WHvX64RegisterR9,
     52    WHvX64RegisterR10,
     53    WHvX64RegisterR11,
     54    WHvX64RegisterR12,
     55    WHvX64RegisterR13,
     56    WHvX64RegisterR14,
     57    WHvX64RegisterR15,
     58    WHvX64RegisterRip,
     59    WHvX64RegisterRflags,
     60
     61    /* X64 Segment registers */
     62    WHvX64RegisterEs,
     63    WHvX64RegisterCs,
     64    WHvX64RegisterSs,
     65    WHvX64RegisterDs,
     66    WHvX64RegisterFs,
     67    WHvX64RegisterGs,
     68    WHvX64RegisterLdtr,
     69    WHvX64RegisterTr,
     70
     71    /* X64 Table registers */
     72    WHvX64RegisterIdtr,
     73    WHvX64RegisterGdtr,
     74
     75    /* X64 Control Registers */
     76    WHvX64RegisterCr0,
     77    WHvX64RegisterCr2,
     78    WHvX64RegisterCr3,
     79    WHvX64RegisterCr4,
     80    WHvX64RegisterCr8,
     81
     82    /* X64 Debug Registers */
     83    /*
     84     * WHvX64RegisterDr0,
     85     * WHvX64RegisterDr1,
     86     * WHvX64RegisterDr2,
     87     * WHvX64RegisterDr3,
     88     * WHvX64RegisterDr6,
     89     * WHvX64RegisterDr7,
     90     */
     91
     92    /* X64 Floating Point and Vector Registers */
     93    WHvX64RegisterXmm0,
     94    WHvX64RegisterXmm1,
     95    WHvX64RegisterXmm2,
     96    WHvX64RegisterXmm3,
     97    WHvX64RegisterXmm4,
     98    WHvX64RegisterXmm5,
     99    WHvX64RegisterXmm6,
    100    WHvX64RegisterXmm7,
    101    WHvX64RegisterXmm8,
    102    WHvX64RegisterXmm9,
    103    WHvX64RegisterXmm10,
    104    WHvX64RegisterXmm11,
    105    WHvX64RegisterXmm12,
    106    WHvX64RegisterXmm13,
    107    WHvX64RegisterXmm14,
    108    WHvX64RegisterXmm15,
    109    WHvX64RegisterFpMmx0,
    110    WHvX64RegisterFpMmx1,
    111    WHvX64RegisterFpMmx2,
    112    WHvX64RegisterFpMmx3,
    113    WHvX64RegisterFpMmx4,
    114    WHvX64RegisterFpMmx5,
    115    WHvX64RegisterFpMmx6,
    116    WHvX64RegisterFpMmx7,
    117    WHvX64RegisterFpControlStatus,
    118    WHvX64RegisterXmmControlStatus,
    119
    120    /* X64 MSRs */
    121    WHvX64RegisterEfer,
    122#ifdef TARGET_X86_64
    123    WHvX64RegisterKernelGsBase,
    124#endif
    125    WHvX64RegisterApicBase,
    126    /* WHvX64RegisterPat, */
    127    WHvX64RegisterSysenterCs,
    128    WHvX64RegisterSysenterEip,
    129    WHvX64RegisterSysenterEsp,
    130    WHvX64RegisterStar,
    131#ifdef TARGET_X86_64
    132    WHvX64RegisterLstar,
    133    WHvX64RegisterCstar,
    134    WHvX64RegisterSfmask,
    135#endif
    136
    137    /* Interrupt / Event Registers */
    138    /*
    139     * WHvRegisterPendingInterruption,
    140     * WHvRegisterInterruptState,
    141     * WHvRegisterPendingEvent0,
    142     * WHvRegisterPendingEvent1
    143     * WHvX64RegisterDeliverabilityNotifications,
    144     */
    145};
    146
    147struct whpx_register_set {
    148    WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
    149};
    150
    151struct whpx_vcpu {
    152    WHV_EMULATOR_HANDLE emulator;
    153    bool window_registered;
    154    bool interruptable;
    155    bool ready_for_pic_interrupt;
    156    uint64_t tpr;
    157    uint64_t apic_base;
    158    bool interruption_pending;
    159
    160    /* Must be the last field as it may have a tail */
    161    WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
    162};
    163
    164static bool whpx_allowed;
    165static bool whp_dispatch_initialized;
    166static HMODULE hWinHvPlatform, hWinHvEmulation;
    167static uint32_t max_vcpu_index;
    168struct whpx_state whpx_global;
    169struct WHPDispatch whp_dispatch;
    170
    171
    172/*
    173 * VP support
    174 */
    175
    176static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
    177{
    178    return (struct whpx_vcpu *)cpu->hax_vcpu;
    179}
    180
    181static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
    182                                             int r86)
    183{
    184    WHV_X64_SEGMENT_REGISTER hs;
    185    unsigned flags = qs->flags;
    186
    187    hs.Base = qs->base;
    188    hs.Limit = qs->limit;
    189    hs.Selector = qs->selector;
    190
    191    if (v86) {
    192        hs.Attributes = 0;
    193        hs.SegmentType = 3;
    194        hs.Present = 1;
    195        hs.DescriptorPrivilegeLevel = 3;
    196        hs.NonSystemSegment = 1;
    197
    198    } else {
    199        hs.Attributes = (flags >> DESC_TYPE_SHIFT);
    200
    201        if (r86) {
    202            /* hs.Base &= 0xfffff; */
    203        }
    204    }
    205
    206    return hs;
    207}
    208
    209static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
    210{
    211    SegmentCache qs;
    212
    213    qs.base = hs->Base;
    214    qs.limit = hs->Limit;
    215    qs.selector = hs->Selector;
    216
    217    qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
    218
    219    return qs;
    220}
    221
    222static int whpx_set_tsc(CPUState *cpu)
    223{
    224    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
    225    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
    226    WHV_REGISTER_VALUE tsc_val;
    227    HRESULT hr;
    228    struct whpx_state *whpx = &whpx_global;
    229
    230    /*
    231     * Suspend the partition prior to setting the TSC to reduce the variance
    232     * in TSC across vCPUs. When the first vCPU runs post suspend, the
    233     * partition is automatically resumed.
    234     */
    235    if (whp_dispatch.WHvSuspendPartitionTime) {
    236
    237        /*
    238         * Unable to suspend partition while setting TSC is not a fatal
    239         * error. It just increases the likelihood of TSC variance between
    240         * vCPUs and some guest OS are able to handle that just fine.
    241         */
    242        hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
    243        if (FAILED(hr)) {
    244            warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
    245        }
    246    }
    247
    248    tsc_val.Reg64 = env->tsc;
    249    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
    250        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
    251    if (FAILED(hr)) {
    252        error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
    253        return -1;
    254    }
    255
    256    return 0;
    257}
    258
    259static void whpx_set_registers(CPUState *cpu, int level)
    260{
    261    struct whpx_state *whpx = &whpx_global;
    262    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
    263    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
    264    X86CPU *x86_cpu = X86_CPU(cpu);
    265    struct whpx_register_set vcxt;
    266    HRESULT hr;
    267    int idx;
    268    int idx_next;
    269    int i;
    270    int v86, r86;
    271
    272    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
    273
    274    /*
    275     * Following MSRs have side effects on the guest or are too heavy for
    276     * runtime. Limit them to full state update.
    277     */
    278    if (level >= WHPX_SET_RESET_STATE) {
    279        whpx_set_tsc(cpu);
    280    }
    281
    282    memset(&vcxt, 0, sizeof(struct whpx_register_set));
    283
    284    v86 = (env->eflags & VM_MASK);
    285    r86 = !(env->cr[0] & CR0_PE_MASK);
    286
    287    vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
    288    vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
    289
    290    idx = 0;
    291
    292    /* Indexes for first 16 registers match between HV and QEMU definitions */
    293    idx_next = 16;
    294    for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
    295        vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
    296    }
    297    idx = idx_next;
    298
    299    /* Same goes for RIP and RFLAGS */
    300    assert(whpx_register_names[idx] == WHvX64RegisterRip);
    301    vcxt.values[idx++].Reg64 = env->eip;
    302
    303    assert(whpx_register_names[idx] == WHvX64RegisterRflags);
    304    vcxt.values[idx++].Reg64 = env->eflags;
    305
    306    /* Translate 6+4 segment registers. HV and QEMU order matches  */
    307    assert(idx == WHvX64RegisterEs);
    308    for (i = 0; i < 6; i += 1, idx += 1) {
    309        vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
    310    }
    311
    312    assert(idx == WHvX64RegisterLdtr);
    313    vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
    314
    315    assert(idx == WHvX64RegisterTr);
    316    vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
    317
    318    assert(idx == WHvX64RegisterIdtr);
    319    vcxt.values[idx].Table.Base = env->idt.base;
    320    vcxt.values[idx].Table.Limit = env->idt.limit;
    321    idx += 1;
    322
    323    assert(idx == WHvX64RegisterGdtr);
    324    vcxt.values[idx].Table.Base = env->gdt.base;
    325    vcxt.values[idx].Table.Limit = env->gdt.limit;
    326    idx += 1;
    327
    328    /* CR0, 2, 3, 4, 8 */
    329    assert(whpx_register_names[idx] == WHvX64RegisterCr0);
    330    vcxt.values[idx++].Reg64 = env->cr[0];
    331    assert(whpx_register_names[idx] == WHvX64RegisterCr2);
    332    vcxt.values[idx++].Reg64 = env->cr[2];
    333    assert(whpx_register_names[idx] == WHvX64RegisterCr3);
    334    vcxt.values[idx++].Reg64 = env->cr[3];
    335    assert(whpx_register_names[idx] == WHvX64RegisterCr4);
    336    vcxt.values[idx++].Reg64 = env->cr[4];
    337    assert(whpx_register_names[idx] == WHvX64RegisterCr8);
    338    vcxt.values[idx++].Reg64 = vcpu->tpr;
    339
    340    /* 8 Debug Registers - Skipped */
    341
    342    /* 16 XMM registers */
    343    assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
    344    idx_next = idx + 16;
    345    for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
    346        vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
    347        vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
    348    }
    349    idx = idx_next;
    350
    351    /* 8 FP registers */
    352    assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
    353    for (i = 0; i < 8; i += 1, idx += 1) {
    354        vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
    355        /* vcxt.values[idx].Fp.AsUINT128.High64 =
    356               env->fpregs[i].mmx.MMX_Q(1);
    357        */
    358    }
    359
    360    /* FP control status register */
    361    assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
    362    vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
    363    vcxt.values[idx].FpControlStatus.FpStatus =
    364        (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    365    vcxt.values[idx].FpControlStatus.FpTag = 0;
    366    for (i = 0; i < 8; ++i) {
    367        vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
    368    }
    369    vcxt.values[idx].FpControlStatus.Reserved = 0;
    370    vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
    371    vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
    372    idx += 1;
    373
    374    /* XMM control status register */
    375    assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
    376    vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
    377    vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
    378    vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
    379    idx += 1;
    380
    381    /* MSRs */
    382    assert(whpx_register_names[idx] == WHvX64RegisterEfer);
    383    vcxt.values[idx++].Reg64 = env->efer;
    384#ifdef TARGET_X86_64
    385    assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
    386    vcxt.values[idx++].Reg64 = env->kernelgsbase;
    387#endif
    388
    389    assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
    390    vcxt.values[idx++].Reg64 = vcpu->apic_base;
    391
    392    /* WHvX64RegisterPat - Skipped */
    393
    394    assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
    395    vcxt.values[idx++].Reg64 = env->sysenter_cs;
    396    assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
    397    vcxt.values[idx++].Reg64 = env->sysenter_eip;
    398    assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
    399    vcxt.values[idx++].Reg64 = env->sysenter_esp;
    400    assert(whpx_register_names[idx] == WHvX64RegisterStar);
    401    vcxt.values[idx++].Reg64 = env->star;
    402#ifdef TARGET_X86_64
    403    assert(whpx_register_names[idx] == WHvX64RegisterLstar);
    404    vcxt.values[idx++].Reg64 = env->lstar;
    405    assert(whpx_register_names[idx] == WHvX64RegisterCstar);
    406    vcxt.values[idx++].Reg64 = env->cstar;
    407    assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
    408    vcxt.values[idx++].Reg64 = env->fmask;
    409#endif
    410
    411    /* Interrupt / Event Registers - Skipped */
    412
    413    assert(idx == RTL_NUMBER_OF(whpx_register_names));
    414
    415    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
    416        whpx->partition, cpu->cpu_index,
    417        whpx_register_names,
    418        RTL_NUMBER_OF(whpx_register_names),
    419        &vcxt.values[0]);
    420
    421    if (FAILED(hr)) {
    422        error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
    423                     hr);
    424    }
    425
    426    return;
    427}
    428
    429static int whpx_get_tsc(CPUState *cpu)
    430{
    431    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
    432    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
    433    WHV_REGISTER_VALUE tsc_val;
    434    HRESULT hr;
    435    struct whpx_state *whpx = &whpx_global;
    436
    437    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
    438        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
    439    if (FAILED(hr)) {
    440        error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
    441        return -1;
    442    }
    443
    444    env->tsc = tsc_val.Reg64;
    445    return 0;
    446}
    447
    448static void whpx_get_registers(CPUState *cpu)
    449{
    450    struct whpx_state *whpx = &whpx_global;
    451    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
    452    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
    453    X86CPU *x86_cpu = X86_CPU(cpu);
    454    struct whpx_register_set vcxt;
    455    uint64_t tpr, apic_base;
    456    HRESULT hr;
    457    int idx;
    458    int idx_next;
    459    int i;
    460
    461    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
    462
    463    if (!env->tsc_valid) {
    464        whpx_get_tsc(cpu);
    465        env->tsc_valid = !runstate_is_running();
    466    }
    467
    468    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
    469        whpx->partition, cpu->cpu_index,
    470        whpx_register_names,
    471        RTL_NUMBER_OF(whpx_register_names),
    472        &vcxt.values[0]);
    473    if (FAILED(hr)) {
    474        error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
    475                     hr);
    476    }
    477
    478    idx = 0;
    479
    480    /* Indexes for first 16 registers match between HV and QEMU definitions */
    481    idx_next = 16;
    482    for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
    483        env->regs[idx] = vcxt.values[idx].Reg64;
    484    }
    485    idx = idx_next;
    486
    487    /* Same goes for RIP and RFLAGS */
    488    assert(whpx_register_names[idx] == WHvX64RegisterRip);
    489    env->eip = vcxt.values[idx++].Reg64;
    490    assert(whpx_register_names[idx] == WHvX64RegisterRflags);
    491    env->eflags = vcxt.values[idx++].Reg64;
    492
    493    /* Translate 6+4 segment registers. HV and QEMU order matches  */
    494    assert(idx == WHvX64RegisterEs);
    495    for (i = 0; i < 6; i += 1, idx += 1) {
    496        env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
    497    }
    498
    499    assert(idx == WHvX64RegisterLdtr);
    500    env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
    501    assert(idx == WHvX64RegisterTr);
    502    env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
    503    assert(idx == WHvX64RegisterIdtr);
    504    env->idt.base = vcxt.values[idx].Table.Base;
    505    env->idt.limit = vcxt.values[idx].Table.Limit;
    506    idx += 1;
    507    assert(idx == WHvX64RegisterGdtr);
    508    env->gdt.base = vcxt.values[idx].Table.Base;
    509    env->gdt.limit = vcxt.values[idx].Table.Limit;
    510    idx += 1;
    511
    512    /* CR0, 2, 3, 4, 8 */
    513    assert(whpx_register_names[idx] == WHvX64RegisterCr0);
    514    env->cr[0] = vcxt.values[idx++].Reg64;
    515    assert(whpx_register_names[idx] == WHvX64RegisterCr2);
    516    env->cr[2] = vcxt.values[idx++].Reg64;
    517    assert(whpx_register_names[idx] == WHvX64RegisterCr3);
    518    env->cr[3] = vcxt.values[idx++].Reg64;
    519    assert(whpx_register_names[idx] == WHvX64RegisterCr4);
    520    env->cr[4] = vcxt.values[idx++].Reg64;
    521    assert(whpx_register_names[idx] == WHvX64RegisterCr8);
    522    tpr = vcxt.values[idx++].Reg64;
    523    if (tpr != vcpu->tpr) {
    524        vcpu->tpr = tpr;
    525        cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
    526    }
    527
    528    /* 8 Debug Registers - Skipped */
    529
    530    /* 16 XMM registers */
    531    assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
    532    idx_next = idx + 16;
    533    for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
    534        env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
    535        env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
    536    }
    537    idx = idx_next;
    538
    539    /* 8 FP registers */
    540    assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
    541    for (i = 0; i < 8; i += 1, idx += 1) {
    542        env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
    543        /* env->fpregs[i].mmx.MMX_Q(1) =
    544               vcxt.values[idx].Fp.AsUINT128.High64;
    545        */
    546    }
    547
    548    /* FP control status register */
    549    assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
    550    env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
    551    env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
    552    env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
    553    for (i = 0; i < 8; ++i) {
    554        env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
    555    }
    556    env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
    557    env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
    558    idx += 1;
    559
    560    /* XMM control status register */
    561    assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
    562    env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
    563    idx += 1;
    564
    565    /* MSRs */
    566    assert(whpx_register_names[idx] == WHvX64RegisterEfer);
    567    env->efer = vcxt.values[idx++].Reg64;
    568#ifdef TARGET_X86_64
    569    assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
    570    env->kernelgsbase = vcxt.values[idx++].Reg64;
    571#endif
    572
    573    assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
    574    apic_base = vcxt.values[idx++].Reg64;
    575    if (apic_base != vcpu->apic_base) {
    576        vcpu->apic_base = apic_base;
    577        cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
    578    }
    579
    580    /* WHvX64RegisterPat - Skipped */
    581
    582    assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
    583    env->sysenter_cs = vcxt.values[idx++].Reg64;
    584    assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
    585    env->sysenter_eip = vcxt.values[idx++].Reg64;
    586    assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
    587    env->sysenter_esp = vcxt.values[idx++].Reg64;
    588    assert(whpx_register_names[idx] == WHvX64RegisterStar);
    589    env->star = vcxt.values[idx++].Reg64;
    590#ifdef TARGET_X86_64
    591    assert(whpx_register_names[idx] == WHvX64RegisterLstar);
    592    env->lstar = vcxt.values[idx++].Reg64;
    593    assert(whpx_register_names[idx] == WHvX64RegisterCstar);
    594    env->cstar = vcxt.values[idx++].Reg64;
    595    assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
    596    env->fmask = vcxt.values[idx++].Reg64;
    597#endif
    598
    599    /* Interrupt / Event Registers - Skipped */
    600
    601    assert(idx == RTL_NUMBER_OF(whpx_register_names));
    602
    603    if (whpx_apic_in_platform()) {
    604        whpx_apic_get(x86_cpu->apic_state);
    605    }
    606
    607    return;
    608}
    609
    610static HRESULT CALLBACK whpx_emu_ioport_callback(
    611    void *ctx,
    612    WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
    613{
    614    MemTxAttrs attrs = { 0 };
    615    address_space_rw(&address_space_io, IoAccess->Port, attrs,
    616                     &IoAccess->Data, IoAccess->AccessSize,
    617                     IoAccess->Direction);
    618    return S_OK;
    619}
    620
    621static HRESULT CALLBACK whpx_emu_mmio_callback(
    622    void *ctx,
    623    WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
    624{
    625    cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
    626                           ma->Direction);
    627    return S_OK;
    628}
    629
    630static HRESULT CALLBACK whpx_emu_getreg_callback(
    631    void *ctx,
    632    const WHV_REGISTER_NAME *RegisterNames,
    633    UINT32 RegisterCount,
    634    WHV_REGISTER_VALUE *RegisterValues)
    635{
    636    HRESULT hr;
    637    struct whpx_state *whpx = &whpx_global;
    638    CPUState *cpu = (CPUState *)ctx;
    639
    640    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
    641        whpx->partition, cpu->cpu_index,
    642        RegisterNames, RegisterCount,
    643        RegisterValues);
    644    if (FAILED(hr)) {
    645        error_report("WHPX: Failed to get virtual processor registers,"
    646                     " hr=%08lx", hr);
    647    }
    648
    649    return hr;
    650}
    651
    652static HRESULT CALLBACK whpx_emu_setreg_callback(
    653    void *ctx,
    654    const WHV_REGISTER_NAME *RegisterNames,
    655    UINT32 RegisterCount,
    656    const WHV_REGISTER_VALUE *RegisterValues)
    657{
    658    HRESULT hr;
    659    struct whpx_state *whpx = &whpx_global;
    660    CPUState *cpu = (CPUState *)ctx;
    661
    662    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
    663        whpx->partition, cpu->cpu_index,
    664        RegisterNames, RegisterCount,
    665        RegisterValues);
    666    if (FAILED(hr)) {
    667        error_report("WHPX: Failed to set virtual processor registers,"
    668                     " hr=%08lx", hr);
    669    }
    670
    671    /*
    672     * The emulator just successfully wrote the register state. We clear the
    673     * dirty state so we avoid the double write on resume of the VP.
    674     */
    675    cpu->vcpu_dirty = false;
    676
    677    return hr;
    678}
    679
    680static HRESULT CALLBACK whpx_emu_translate_callback(
    681    void *ctx,
    682    WHV_GUEST_VIRTUAL_ADDRESS Gva,
    683    WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
    684    WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
    685    WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
    686{
    687    HRESULT hr;
    688    struct whpx_state *whpx = &whpx_global;
    689    CPUState *cpu = (CPUState *)ctx;
    690    WHV_TRANSLATE_GVA_RESULT res;
    691
    692    hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
    693                                      Gva, TranslateFlags, &res, Gpa);
    694    if (FAILED(hr)) {
    695        error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
    696    } else {
    697        *TranslationResult = res.ResultCode;
    698    }
    699
    700    return hr;
    701}
    702
    703static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
    704    .Size = sizeof(WHV_EMULATOR_CALLBACKS),
    705    .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
    706    .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
    707    .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
    708    .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
    709    .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
    710};
    711
    712static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
    713{
    714    HRESULT hr;
    715    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
    716    WHV_EMULATOR_STATUS emu_status;
    717
    718    hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
    719        vcpu->emulator, cpu,
    720        &vcpu->exit_ctx.VpContext, ctx,
    721        &emu_status);
    722    if (FAILED(hr)) {
    723        error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
    724        return -1;
    725    }
    726
    727    if (!emu_status.EmulationSuccessful) {
    728        error_report("WHPX: Failed to emulate MMIO access with"
    729                     " EmulatorReturnStatus: %u", emu_status.AsUINT32);
    730        return -1;
    731    }
    732
    733    return 0;
    734}
    735
    736static int whpx_handle_portio(CPUState *cpu,
    737                              WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
    738{
    739    HRESULT hr;
    740    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
    741    WHV_EMULATOR_STATUS emu_status;
    742
    743    hr = whp_dispatch.WHvEmulatorTryIoEmulation(
    744        vcpu->emulator, cpu,
    745        &vcpu->exit_ctx.VpContext, ctx,
    746        &emu_status);
    747    if (FAILED(hr)) {
    748        error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
    749        return -1;
    750    }
    751
    752    if (!emu_status.EmulationSuccessful) {
    753        error_report("WHPX: Failed to emulate PortIO access with"
    754                     " EmulatorReturnStatus: %u", emu_status.AsUINT32);
    755        return -1;
    756    }
    757
    758    return 0;
    759}
    760
    761static int whpx_handle_halt(CPUState *cpu)
    762{
    763    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
    764    int ret = 0;
    765
    766    qemu_mutex_lock_iothread();
    767    if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
    768          (env->eflags & IF_MASK)) &&
    769        !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
    770        cpu->exception_index = EXCP_HLT;
    771        cpu->halted = true;
    772        ret = 1;
    773    }
    774    qemu_mutex_unlock_iothread();
    775
    776    return ret;
    777}
    778
    779static void whpx_vcpu_pre_run(CPUState *cpu)
    780{
    781    HRESULT hr;
    782    struct whpx_state *whpx = &whpx_global;
    783    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
    784    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
    785    X86CPU *x86_cpu = X86_CPU(cpu);
    786    int irq;
    787    uint8_t tpr;
    788    WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
    789    UINT32 reg_count = 0;
    790    WHV_REGISTER_VALUE reg_values[3];
    791    WHV_REGISTER_NAME reg_names[3];
    792
    793    memset(&new_int, 0, sizeof(new_int));
    794    memset(reg_values, 0, sizeof(reg_values));
    795
    796    qemu_mutex_lock_iothread();
    797
    798    /* Inject NMI */
    799    if (!vcpu->interruption_pending &&
    800        cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
    801        if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
    802            cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
    803            vcpu->interruptable = false;
    804            new_int.InterruptionType = WHvX64PendingNmi;
    805            new_int.InterruptionPending = 1;
    806            new_int.InterruptionVector = 2;
    807        }
    808        if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
    809            cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
    810        }
    811    }
    812
    813    /*
    814     * Force the VCPU out of its inner loop to process any INIT requests or
    815     * commit pending TPR access.
    816     */
    817    if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
    818        if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
    819            !(env->hflags & HF_SMM_MASK)) {
    820            cpu->exit_request = 1;
    821        }
    822        if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
    823            cpu->exit_request = 1;
    824        }
    825    }
    826
    827    /* Get pending hard interruption or replay one that was overwritten */
    828    if (!whpx_apic_in_platform()) {
    829        if (!vcpu->interruption_pending &&
    830            vcpu->interruptable && (env->eflags & IF_MASK)) {
    831            assert(!new_int.InterruptionPending);
    832            if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
    833                cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
    834                irq = cpu_get_pic_interrupt(env);
    835                if (irq >= 0) {
    836                    new_int.InterruptionType = WHvX64PendingInterrupt;
    837                    new_int.InterruptionPending = 1;
    838                    new_int.InterruptionVector = irq;
    839                }
    840            }
    841        }
    842
    843        /* Setup interrupt state if new one was prepared */
    844        if (new_int.InterruptionPending) {
    845            reg_values[reg_count].PendingInterruption = new_int;
    846            reg_names[reg_count] = WHvRegisterPendingInterruption;
    847            reg_count += 1;
    848        }
    849    } else if (vcpu->ready_for_pic_interrupt &&
    850               (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
    851        cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
    852        irq = cpu_get_pic_interrupt(env);
    853        if (irq >= 0) {
    854            reg_names[reg_count] = WHvRegisterPendingEvent;
    855            reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT)
    856            {
    857                .EventPending = 1,
    858                .EventType = WHvX64PendingEventExtInt,
    859                .Vector = irq,
    860            };
    861            reg_count += 1;
    862        }
    863     }
    864
    865    /* Sync the TPR to the CR8 if was modified during the intercept */
    866    tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
    867    if (tpr != vcpu->tpr) {
    868        vcpu->tpr = tpr;
    869        reg_values[reg_count].Reg64 = tpr;
    870        cpu->exit_request = 1;
    871        reg_names[reg_count] = WHvX64RegisterCr8;
    872        reg_count += 1;
    873    }
    874
    875    /* Update the state of the interrupt delivery notification */
    876    if (!vcpu->window_registered &&
    877        cpu->interrupt_request & CPU_INTERRUPT_HARD) {
    878        reg_values[reg_count].DeliverabilityNotifications =
    879            (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) {
    880                .InterruptNotification = 1
    881            };
    882        vcpu->window_registered = 1;
    883        reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
    884        reg_count += 1;
    885    }
    886
    887    qemu_mutex_unlock_iothread();
    888    vcpu->ready_for_pic_interrupt = false;
    889
    890    if (reg_count) {
    891        hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
    892            whpx->partition, cpu->cpu_index,
    893            reg_names, reg_count, reg_values);
    894        if (FAILED(hr)) {
    895            error_report("WHPX: Failed to set interrupt state registers,"
    896                         " hr=%08lx", hr);
    897        }
    898    }
    899
    900    return;
    901}
    902
    903static void whpx_vcpu_post_run(CPUState *cpu)
    904{
    905    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
    906    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
    907    X86CPU *x86_cpu = X86_CPU(cpu);
    908
    909    env->eflags = vcpu->exit_ctx.VpContext.Rflags;
    910
    911    uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
    912    if (vcpu->tpr != tpr) {
    913        vcpu->tpr = tpr;
    914        qemu_mutex_lock_iothread();
    915        cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr);
    916        qemu_mutex_unlock_iothread();
    917    }
    918
    919    vcpu->interruption_pending =
    920        vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
    921
    922    vcpu->interruptable =
    923        !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
    924
    925    return;
    926}
    927
    928static void whpx_vcpu_process_async_events(CPUState *cpu)
    929{
    930    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
    931    X86CPU *x86_cpu = X86_CPU(cpu);
    932    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
    933
    934    if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
    935        !(env->hflags & HF_SMM_MASK)) {
    936        whpx_cpu_synchronize_state(cpu);
    937        do_cpu_init(x86_cpu);
    938        vcpu->interruptable = true;
    939    }
    940
    941    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
    942        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
    943        apic_poll_irq(x86_cpu->apic_state);
    944    }
    945
    946    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
    947         (env->eflags & IF_MASK)) ||
    948        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
    949        cpu->halted = false;
    950    }
    951
    952    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
    953        whpx_cpu_synchronize_state(cpu);
    954        do_cpu_sipi(x86_cpu);
    955    }
    956
    957    if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
    958        cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
    959        whpx_cpu_synchronize_state(cpu);
    960        apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
    961                                      env->tpr_access_type);
    962    }
    963
    964    return;
    965}
    966
    967static int whpx_vcpu_run(CPUState *cpu)
    968{
    969    HRESULT hr;
    970    struct whpx_state *whpx = &whpx_global;
    971    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
    972    int ret;
    973
    974    whpx_vcpu_process_async_events(cpu);
    975    if (cpu->halted && !whpx_apic_in_platform()) {
    976        cpu->exception_index = EXCP_HLT;
    977        qatomic_set(&cpu->exit_request, false);
    978        return 0;
    979    }
    980
    981    qemu_mutex_unlock_iothread();
    982    cpu_exec_start(cpu);
    983
    984    do {
    985        if (cpu->vcpu_dirty) {
    986            whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
    987            cpu->vcpu_dirty = false;
    988        }
    989
    990        whpx_vcpu_pre_run(cpu);
    991
    992        if (qatomic_read(&cpu->exit_request)) {
    993            whpx_vcpu_kick(cpu);
    994        }
    995
    996        hr = whp_dispatch.WHvRunVirtualProcessor(
    997            whpx->partition, cpu->cpu_index,
    998            &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
    999
   1000        if (FAILED(hr)) {
   1001            error_report("WHPX: Failed to exec a virtual processor,"
   1002                         " hr=%08lx", hr);
   1003            ret = -1;
   1004            break;
   1005        }
   1006
   1007        whpx_vcpu_post_run(cpu);
   1008
   1009        switch (vcpu->exit_ctx.ExitReason) {
   1010        case WHvRunVpExitReasonMemoryAccess:
   1011            ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
   1012            break;
   1013
   1014        case WHvRunVpExitReasonX64IoPortAccess:
   1015            ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
   1016            break;
   1017
   1018        case WHvRunVpExitReasonX64InterruptWindow:
   1019            vcpu->ready_for_pic_interrupt = 1;
   1020            vcpu->window_registered = 0;
   1021            ret = 0;
   1022            break;
   1023
   1024        case WHvRunVpExitReasonX64ApicEoi:
   1025            assert(whpx_apic_in_platform());
   1026            ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector);
   1027            break;
   1028
   1029        case WHvRunVpExitReasonX64Halt:
   1030            ret = whpx_handle_halt(cpu);
   1031            break;
   1032
   1033        case WHvRunVpExitReasonX64ApicInitSipiTrap: {
   1034            WHV_INTERRUPT_CONTROL ipi = {0};
   1035            uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr;
   1036            uint32_t delivery_mode =
   1037                (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT;
   1038            int dest_shorthand =
   1039                (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT;
   1040            bool broadcast = false;
   1041            bool include_self = false;
   1042            uint32_t i;
   1043
   1044            /* We only registered for INIT and SIPI exits. */
   1045            if ((delivery_mode != APIC_DM_INIT) &&
   1046                (delivery_mode != APIC_DM_SIPI)) {
   1047                error_report(
   1048                    "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
   1049                break;
   1050            }
   1051
   1052            if (delivery_mode == APIC_DM_INIT) {
   1053                ipi.Type = WHvX64InterruptTypeInit;
   1054            } else {
   1055                ipi.Type = WHvX64InterruptTypeSipi;
   1056            }
   1057
   1058            ipi.DestinationMode =
   1059                ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ?
   1060                    WHvX64InterruptDestinationModeLogical :
   1061                    WHvX64InterruptDestinationModePhysical;
   1062
   1063            ipi.TriggerMode =
   1064                ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ?
   1065                    WHvX64InterruptTriggerModeLevel :
   1066                    WHvX64InterruptTriggerModeEdge;
   1067
   1068            ipi.Vector = icr & APIC_VECTOR_MASK;
   1069            switch (dest_shorthand) {
   1070            /* no shorthand. Bits 56-63 contain the destination. */
   1071            case 0:
   1072                ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK;
   1073                hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
   1074                        &ipi, sizeof(ipi));
   1075                if (FAILED(hr)) {
   1076                    error_report("WHPX: Failed to request interrupt  hr=%08lx",
   1077                        hr);
   1078                }
   1079
   1080                break;
   1081
   1082            /* self */
   1083            case 1:
   1084                include_self = true;
   1085                break;
   1086
   1087            /* broadcast, including self */
   1088            case 2:
   1089                broadcast = true;
   1090                include_self = true;
   1091                break;
   1092
   1093            /* broadcast, excluding self */
   1094            case 3:
   1095                broadcast = true;
   1096                break;
   1097            }
   1098
   1099            if (!broadcast && !include_self) {
   1100                break;
   1101            }
   1102
   1103            for (i = 0; i <= max_vcpu_index; i++) {
   1104                if (i == cpu->cpu_index && !include_self) {
   1105                    continue;
   1106                }
   1107
   1108                /*
   1109                 * Assuming that APIC Ids are identity mapped since
   1110                 * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
   1111                 * are not handled yet and the hypervisor doesn't allow the
   1112                 * guest to modify the APIC ID.
   1113                 */
   1114                ipi.Destination = i;
   1115                hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
   1116                        &ipi, sizeof(ipi));
   1117                if (FAILED(hr)) {
   1118                    error_report(
   1119                        "WHPX: Failed to request SIPI for %d,  hr=%08lx",
   1120                        i, hr);
   1121                }
   1122            }
   1123
   1124            break;
   1125        }
   1126
   1127        case WHvRunVpExitReasonCanceled:
   1128            cpu->exception_index = EXCP_INTERRUPT;
   1129            ret = 1;
   1130            break;
   1131
   1132        case WHvRunVpExitReasonX64MsrAccess: {
   1133            WHV_REGISTER_VALUE reg_values[3] = {0};
   1134            WHV_REGISTER_NAME reg_names[3];
   1135            UINT32 reg_count;
   1136
   1137            reg_names[0] = WHvX64RegisterRip;
   1138            reg_names[1] = WHvX64RegisterRax;
   1139            reg_names[2] = WHvX64RegisterRdx;
   1140
   1141            reg_values[0].Reg64 =
   1142                vcpu->exit_ctx.VpContext.Rip +
   1143                vcpu->exit_ctx.VpContext.InstructionLength;
   1144
   1145            /*
   1146             * For all unsupported MSR access we:
   1147             *     ignore writes
   1148             *     return 0 on read.
   1149             */
   1150            reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
   1151                        1 : 3;
   1152
   1153            hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
   1154                whpx->partition,
   1155                cpu->cpu_index,
   1156                reg_names, reg_count,
   1157                reg_values);
   1158
   1159            if (FAILED(hr)) {
   1160                error_report("WHPX: Failed to set MsrAccess state "
   1161                             " registers, hr=%08lx", hr);
   1162            }
   1163            ret = 0;
   1164            break;
   1165        }
   1166        case WHvRunVpExitReasonX64Cpuid: {
   1167            WHV_REGISTER_VALUE reg_values[5];
   1168            WHV_REGISTER_NAME reg_names[5];
   1169            UINT32 reg_count = 5;
   1170            UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
   1171            X86CPU *x86_cpu = X86_CPU(cpu);
   1172            CPUX86State *env = &x86_cpu->env;
   1173
   1174            memset(reg_values, 0, sizeof(reg_values));
   1175
   1176            rip = vcpu->exit_ctx.VpContext.Rip +
   1177                  vcpu->exit_ctx.VpContext.InstructionLength;
   1178            cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
   1179
   1180            /*
   1181             * Ideally, these should be supplied to the hypervisor during VCPU
   1182             * initialization and it should be able to satisfy this request.
   1183             * But, currently, WHPX doesn't support setting CPUID values in the
   1184             * hypervisor once the partition has been setup, which is too late
   1185             * since VCPUs are realized later. For now, use the values from
   1186             * QEMU to satisfy these requests, until WHPX adds support for
   1187             * being able to set these values in the hypervisor at runtime.
   1188             */
   1189            cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
   1190                (UINT32 *)&rcx, (UINT32 *)&rdx);
   1191            switch (cpuid_fn) {
   1192            case 0x40000000:
   1193                /* Expose the vmware cpu frequency cpuid leaf */
   1194                rax = 0x40000010;
   1195                rbx = rcx = rdx = 0;
   1196                break;
   1197
   1198            case 0x40000010:
   1199                rax = env->tsc_khz;
   1200                rbx = env->apic_bus_freq / 1000; /* Hz to KHz */
   1201                rcx = rdx = 0;
   1202                break;
   1203
   1204            case 0x80000001:
   1205                /* Remove any support of OSVW */
   1206                rcx &= ~CPUID_EXT3_OSVW;
   1207                break;
   1208            }
   1209
   1210            reg_names[0] = WHvX64RegisterRip;
   1211            reg_names[1] = WHvX64RegisterRax;
   1212            reg_names[2] = WHvX64RegisterRcx;
   1213            reg_names[3] = WHvX64RegisterRdx;
   1214            reg_names[4] = WHvX64RegisterRbx;
   1215
   1216            reg_values[0].Reg64 = rip;
   1217            reg_values[1].Reg64 = rax;
   1218            reg_values[2].Reg64 = rcx;
   1219            reg_values[3].Reg64 = rdx;
   1220            reg_values[4].Reg64 = rbx;
   1221
   1222            hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
   1223                whpx->partition, cpu->cpu_index,
   1224                reg_names,
   1225                reg_count,
   1226                reg_values);
   1227
   1228            if (FAILED(hr)) {
   1229                error_report("WHPX: Failed to set CpuidAccess state registers,"
   1230                             " hr=%08lx", hr);
   1231            }
   1232            ret = 0;
   1233            break;
   1234        }
   1235        case WHvRunVpExitReasonNone:
   1236        case WHvRunVpExitReasonUnrecoverableException:
   1237        case WHvRunVpExitReasonInvalidVpRegisterValue:
   1238        case WHvRunVpExitReasonUnsupportedFeature:
   1239        case WHvRunVpExitReasonException:
   1240        default:
   1241            error_report("WHPX: Unexpected VP exit code %d",
   1242                         vcpu->exit_ctx.ExitReason);
   1243            whpx_get_registers(cpu);
   1244            qemu_mutex_lock_iothread();
   1245            qemu_system_guest_panicked(cpu_get_crash_info(cpu));
   1246            qemu_mutex_unlock_iothread();
   1247            break;
   1248        }
   1249
   1250    } while (!ret);
   1251
   1252    cpu_exec_end(cpu);
   1253    qemu_mutex_lock_iothread();
   1254    current_cpu = cpu;
   1255
   1256    qatomic_set(&cpu->exit_request, false);
   1257
   1258    return ret < 0;
   1259}
   1260
   1261static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
   1262{
   1263    if (!cpu->vcpu_dirty) {
   1264        whpx_get_registers(cpu);
   1265        cpu->vcpu_dirty = true;
   1266    }
   1267}
   1268
   1269static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
   1270                                               run_on_cpu_data arg)
   1271{
   1272    whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
   1273    cpu->vcpu_dirty = false;
   1274}
   1275
   1276static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
   1277                                              run_on_cpu_data arg)
   1278{
   1279    whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
   1280    cpu->vcpu_dirty = false;
   1281}
   1282
   1283static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
   1284                                               run_on_cpu_data arg)
   1285{
   1286    cpu->vcpu_dirty = true;
   1287}
   1288
   1289/*
   1290 * CPU support.
   1291 */
   1292
   1293void whpx_cpu_synchronize_state(CPUState *cpu)
   1294{
   1295    if (!cpu->vcpu_dirty) {
   1296        run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
   1297    }
   1298}
   1299
   1300void whpx_cpu_synchronize_post_reset(CPUState *cpu)
   1301{
   1302    run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
   1303}
   1304
   1305void whpx_cpu_synchronize_post_init(CPUState *cpu)
   1306{
   1307    run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
   1308}
   1309
   1310void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
   1311{
   1312    run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
   1313}
   1314
   1315/*
   1316 * Vcpu support.
   1317 */
   1318
   1319static Error *whpx_migration_blocker;
   1320
   1321static void whpx_cpu_update_state(void *opaque, bool running, RunState state)
   1322{
   1323    CPUX86State *env = opaque;
   1324
   1325    if (running) {
   1326        env->tsc_valid = false;
   1327    }
   1328}
   1329
   1330int whpx_init_vcpu(CPUState *cpu)
   1331{
   1332    HRESULT hr;
   1333    struct whpx_state *whpx = &whpx_global;
   1334    struct whpx_vcpu *vcpu = NULL;
   1335    Error *local_error = NULL;
   1336    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
   1337    X86CPU *x86_cpu = X86_CPU(cpu);
   1338    UINT64 freq = 0;
   1339    int ret;
   1340
   1341    /* Add migration blockers for all unsupported features of the
   1342     * Windows Hypervisor Platform
   1343     */
   1344    if (whpx_migration_blocker == NULL) {
   1345        error_setg(&whpx_migration_blocker,
   1346               "State blocked due to non-migratable CPUID feature support,"
   1347               "dirty memory tracking support, and XSAVE/XRSTOR support");
   1348
   1349        if (migrate_add_blocker(whpx_migration_blocker, &local_error) < 0) {
   1350            error_report_err(local_error);
   1351            error_free(whpx_migration_blocker);
   1352            ret = -EINVAL;
   1353            goto error;
   1354        }
   1355    }
   1356
   1357    vcpu = g_malloc0(sizeof(struct whpx_vcpu));
   1358
   1359    if (!vcpu) {
   1360        error_report("WHPX: Failed to allocte VCPU context.");
   1361        ret = -ENOMEM;
   1362        goto error;
   1363    }
   1364
   1365    hr = whp_dispatch.WHvEmulatorCreateEmulator(
   1366        &whpx_emu_callbacks,
   1367        &vcpu->emulator);
   1368    if (FAILED(hr)) {
   1369        error_report("WHPX: Failed to setup instruction completion support,"
   1370                     " hr=%08lx", hr);
   1371        ret = -EINVAL;
   1372        goto error;
   1373    }
   1374
   1375    hr = whp_dispatch.WHvCreateVirtualProcessor(
   1376        whpx->partition, cpu->cpu_index, 0);
   1377    if (FAILED(hr)) {
   1378        error_report("WHPX: Failed to create a virtual processor,"
   1379                     " hr=%08lx", hr);
   1380        whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
   1381        ret = -EINVAL;
   1382        goto error;
   1383    }
   1384
   1385    /*
   1386     * vcpu's TSC frequency is either specified by user, or use the value
   1387     * provided by Hyper-V if the former is not present. In the latter case, we
   1388     * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
   1389     * frequency can be migrated later via this field.
   1390     */
   1391    if (!env->tsc_khz) {
   1392        hr = whp_dispatch.WHvGetCapability(
   1393            WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq),
   1394                NULL);
   1395        if (hr != WHV_E_UNKNOWN_CAPABILITY) {
   1396            if (FAILED(hr)) {
   1397                printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr);
   1398            } else {
   1399                env->tsc_khz = freq / 1000; /* Hz to KHz */
   1400            }
   1401        }
   1402    }
   1403
   1404    env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY;
   1405    hr = whp_dispatch.WHvGetCapability(
   1406        WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL);
   1407    if (hr != WHV_E_UNKNOWN_CAPABILITY) {
   1408        if (FAILED(hr)) {
   1409            printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr);
   1410        } else {
   1411            env->apic_bus_freq = freq;
   1412        }
   1413    }
   1414
   1415    /*
   1416     * If the vmware cpuid frequency leaf option is set, and we have a valid
   1417     * tsc value, trap the corresponding cpuid's.
   1418     */
   1419    if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) {
   1420        UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010};
   1421
   1422        hr = whp_dispatch.WHvSetPartitionProperty(
   1423                whpx->partition,
   1424                WHvPartitionPropertyCodeCpuidExitList,
   1425                cpuidExitList,
   1426                RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
   1427
   1428        if (FAILED(hr)) {
   1429            error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
   1430                        hr);
   1431            ret = -EINVAL;
   1432            goto error;
   1433        }
   1434    }
   1435
   1436    vcpu->interruptable = true;
   1437    cpu->vcpu_dirty = true;
   1438    cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
   1439    max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
   1440    qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
   1441
   1442    return 0;
   1443
   1444error:
   1445    g_free(vcpu);
   1446
   1447    return ret;
   1448}
   1449
   1450int whpx_vcpu_exec(CPUState *cpu)
   1451{
   1452    int ret;
   1453    int fatal;
   1454
   1455    for (;;) {
   1456        if (cpu->exception_index >= EXCP_INTERRUPT) {
   1457            ret = cpu->exception_index;
   1458            cpu->exception_index = -1;
   1459            break;
   1460        }
   1461
   1462        fatal = whpx_vcpu_run(cpu);
   1463
   1464        if (fatal) {
   1465            error_report("WHPX: Failed to exec a virtual processor");
   1466            abort();
   1467        }
   1468    }
   1469
   1470    return ret;
   1471}
   1472
   1473void whpx_destroy_vcpu(CPUState *cpu)
   1474{
   1475    struct whpx_state *whpx = &whpx_global;
   1476    struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
   1477
   1478    whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
   1479    whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
   1480    g_free(cpu->hax_vcpu);
   1481    return;
   1482}
   1483
   1484void whpx_vcpu_kick(CPUState *cpu)
   1485{
   1486    struct whpx_state *whpx = &whpx_global;
   1487    whp_dispatch.WHvCancelRunVirtualProcessor(
   1488        whpx->partition, cpu->cpu_index, 0);
   1489}
   1490
   1491/*
   1492 * Memory support.
   1493 */
   1494
   1495static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
   1496                                void *host_va, int add, int rom,
   1497                                const char *name)
   1498{
   1499    struct whpx_state *whpx = &whpx_global;
   1500    HRESULT hr;
   1501
   1502    /*
   1503    if (add) {
   1504        printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
   1505               (void*)start_pa, (void*)size, host_va,
   1506               (rom ? "ROM" : "RAM"), name);
   1507    } else {
   1508        printf("WHPX: DEL PA:%p Size:%p, Host:%p,      '%s'\n",
   1509               (void*)start_pa, (void*)size, host_va, name);
   1510    }
   1511    */
   1512
   1513    if (add) {
   1514        hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
   1515                                         host_va,
   1516                                         start_pa,
   1517                                         size,
   1518                                         (WHvMapGpaRangeFlagRead |
   1519                                          WHvMapGpaRangeFlagExecute |
   1520                                          (rom ? 0 : WHvMapGpaRangeFlagWrite)));
   1521    } else {
   1522        hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
   1523                                           start_pa,
   1524                                           size);
   1525    }
   1526
   1527    if (FAILED(hr)) {
   1528        error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
   1529                     " Host:%p, hr=%08lx",
   1530                     (add ? "MAP" : "UNMAP"), name,
   1531                     (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
   1532    }
   1533}
   1534
   1535static void whpx_process_section(MemoryRegionSection *section, int add)
   1536{
   1537    MemoryRegion *mr = section->mr;
   1538    hwaddr start_pa = section->offset_within_address_space;
   1539    ram_addr_t size = int128_get64(section->size);
   1540    unsigned int delta;
   1541    uint64_t host_va;
   1542
   1543    if (!memory_region_is_ram(mr)) {
   1544        return;
   1545    }
   1546
   1547    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
   1548    delta &= ~qemu_real_host_page_mask;
   1549    if (delta > size) {
   1550        return;
   1551    }
   1552    start_pa += delta;
   1553    size -= delta;
   1554    size &= qemu_real_host_page_mask;
   1555    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
   1556        return;
   1557    }
   1558
   1559    host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
   1560            + section->offset_within_region + delta;
   1561
   1562    whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
   1563                        memory_region_is_rom(mr), mr->name);
   1564}
   1565
   1566static void whpx_region_add(MemoryListener *listener,
   1567                           MemoryRegionSection *section)
   1568{
   1569    memory_region_ref(section->mr);
   1570    whpx_process_section(section, 1);
   1571}
   1572
   1573static void whpx_region_del(MemoryListener *listener,
   1574                           MemoryRegionSection *section)
   1575{
   1576    whpx_process_section(section, 0);
   1577    memory_region_unref(section->mr);
   1578}
   1579
   1580static void whpx_transaction_begin(MemoryListener *listener)
   1581{
   1582}
   1583
   1584static void whpx_transaction_commit(MemoryListener *listener)
   1585{
   1586}
   1587
   1588static void whpx_log_sync(MemoryListener *listener,
   1589                         MemoryRegionSection *section)
   1590{
   1591    MemoryRegion *mr = section->mr;
   1592
   1593    if (!memory_region_is_ram(mr)) {
   1594        return;
   1595    }
   1596
   1597    memory_region_set_dirty(mr, 0, int128_get64(section->size));
   1598}
   1599
   1600static MemoryListener whpx_memory_listener = {
   1601    .name = "whpx",
   1602    .begin = whpx_transaction_begin,
   1603    .commit = whpx_transaction_commit,
   1604    .region_add = whpx_region_add,
   1605    .region_del = whpx_region_del,
   1606    .log_sync = whpx_log_sync,
   1607    .priority = 10,
   1608};
   1609
   1610static void whpx_memory_init(void)
   1611{
   1612    memory_listener_register(&whpx_memory_listener, &address_space_memory);
   1613}
   1614
   1615/*
   1616 * Load the functions from the given library, using the given handle. If a
   1617 * handle is provided, it is used, otherwise the library is opened. The
   1618 * handle will be updated on return with the opened one.
   1619 */
   1620static bool load_whp_dispatch_fns(HMODULE *handle,
   1621    WHPFunctionList function_list)
   1622{
   1623    HMODULE hLib = *handle;
   1624
   1625    #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
   1626    #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
   1627    #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
   1628        whp_dispatch.function_name = \
   1629            (function_name ## _t)GetProcAddress(hLib, #function_name); \
   1630
   1631    #define WHP_LOAD_FIELD(return_type, function_name, signature) \
   1632        whp_dispatch.function_name = \
   1633            (function_name ## _t)GetProcAddress(hLib, #function_name); \
   1634        if (!whp_dispatch.function_name) { \
   1635            error_report("Could not load function %s", #function_name); \
   1636            goto error; \
   1637        } \
   1638
   1639    #define WHP_LOAD_LIB(lib_name, handle_lib) \
   1640    if (!handle_lib) { \
   1641        handle_lib = LoadLibrary(lib_name); \
   1642        if (!handle_lib) { \
   1643            error_report("Could not load library %s.", lib_name); \
   1644            goto error; \
   1645        } \
   1646    } \
   1647
   1648    switch (function_list) {
   1649    case WINHV_PLATFORM_FNS_DEFAULT:
   1650        WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
   1651        LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
   1652        break;
   1653
   1654    case WINHV_EMULATION_FNS_DEFAULT:
   1655        WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
   1656        LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
   1657        break;
   1658
   1659    case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
   1660        WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
   1661        LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
   1662        break;
   1663    }
   1664
   1665    *handle = hLib;
   1666    return true;
   1667
   1668error:
   1669    if (hLib) {
   1670        FreeLibrary(hLib);
   1671    }
   1672
   1673    return false;
   1674}
   1675
   1676static void whpx_set_kernel_irqchip(Object *obj, Visitor *v,
   1677                                   const char *name, void *opaque,
   1678                                   Error **errp)
   1679{
   1680    struct whpx_state *whpx = &whpx_global;
   1681    OnOffSplit mode;
   1682
   1683    if (!visit_type_OnOffSplit(v, name, &mode, errp)) {
   1684        return;
   1685    }
   1686
   1687    switch (mode) {
   1688    case ON_OFF_SPLIT_ON:
   1689        whpx->kernel_irqchip_allowed = true;
   1690        whpx->kernel_irqchip_required = true;
   1691        break;
   1692
   1693    case ON_OFF_SPLIT_OFF:
   1694        whpx->kernel_irqchip_allowed = false;
   1695        whpx->kernel_irqchip_required = false;
   1696        break;
   1697
   1698    case ON_OFF_SPLIT_SPLIT:
   1699        error_setg(errp, "WHPX: split irqchip currently not supported");
   1700        error_append_hint(errp,
   1701            "Try without kernel-irqchip or with kernel-irqchip=on|off");
   1702        break;
   1703
   1704    default:
   1705        /*
   1706         * The value was checked in visit_type_OnOffSplit() above. If
   1707         * we get here, then something is wrong in QEMU.
   1708         */
   1709        abort();
   1710    }
   1711}
   1712
   1713/*
   1714 * Partition support
   1715 */
   1716
   1717static int whpx_accel_init(MachineState *ms)
   1718{
   1719    struct whpx_state *whpx;
   1720    int ret;
   1721    HRESULT hr;
   1722    WHV_CAPABILITY whpx_cap;
   1723    UINT32 whpx_cap_size;
   1724    WHV_PARTITION_PROPERTY prop;
   1725    UINT32 cpuidExitList[] = {1, 0x80000001};
   1726    WHV_CAPABILITY_FEATURES features = {0};
   1727
   1728    whpx = &whpx_global;
   1729
   1730    if (!init_whp_dispatch()) {
   1731        ret = -ENOSYS;
   1732        goto error;
   1733    }
   1734
   1735    whpx->mem_quota = ms->ram_size;
   1736
   1737    hr = whp_dispatch.WHvGetCapability(
   1738        WHvCapabilityCodeHypervisorPresent, &whpx_cap,
   1739        sizeof(whpx_cap), &whpx_cap_size);
   1740    if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
   1741        error_report("WHPX: No accelerator found, hr=%08lx", hr);
   1742        ret = -ENOSPC;
   1743        goto error;
   1744    }
   1745
   1746    hr = whp_dispatch.WHvGetCapability(
   1747        WHvCapabilityCodeFeatures, &features, sizeof(features), NULL);
   1748    if (FAILED(hr)) {
   1749        error_report("WHPX: Failed to query capabilities, hr=%08lx", hr);
   1750        ret = -EINVAL;
   1751        goto error;
   1752    }
   1753
   1754    hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
   1755    if (FAILED(hr)) {
   1756        error_report("WHPX: Failed to create partition, hr=%08lx", hr);
   1757        ret = -EINVAL;
   1758        goto error;
   1759    }
   1760
   1761    memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
   1762    prop.ProcessorCount = ms->smp.cpus;
   1763    hr = whp_dispatch.WHvSetPartitionProperty(
   1764        whpx->partition,
   1765        WHvPartitionPropertyCodeProcessorCount,
   1766        &prop,
   1767        sizeof(WHV_PARTITION_PROPERTY));
   1768
   1769    if (FAILED(hr)) {
   1770        error_report("WHPX: Failed to set partition core count to %d,"
   1771                     " hr=%08lx", ms->smp.cores, hr);
   1772        ret = -EINVAL;
   1773        goto error;
   1774    }
   1775
   1776    /*
   1777     * Error out if WHP doesn't support apic emulation and user is requiring
   1778     * it.
   1779     */
   1780    if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation ||
   1781            !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) {
   1782        error_report("WHPX: kernel irqchip requested, but unavailable. "
   1783            "Try without kernel-irqchip or with kernel-irqchip=off");
   1784        ret = -EINVAL;
   1785        goto error;
   1786    }
   1787
   1788    if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation &&
   1789        whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) {
   1790        WHV_X64_LOCAL_APIC_EMULATION_MODE mode =
   1791            WHvX64LocalApicEmulationModeXApic;
   1792        printf("WHPX: setting APIC emulation mode in the hypervisor\n");
   1793        hr = whp_dispatch.WHvSetPartitionProperty(
   1794            whpx->partition,
   1795            WHvPartitionPropertyCodeLocalApicEmulationMode,
   1796            &mode,
   1797            sizeof(mode));
   1798        if (FAILED(hr)) {
   1799            error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr);
   1800            if (whpx->kernel_irqchip_required) {
   1801                error_report("WHPX: kernel irqchip requested, but unavailable");
   1802                ret = -EINVAL;
   1803                goto error;
   1804            }
   1805        } else {
   1806            whpx->apic_in_platform = true;
   1807        }
   1808    }
   1809
   1810    /* Register for MSR and CPUID exits */
   1811    memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
   1812    prop.ExtendedVmExits.X64MsrExit = 1;
   1813    prop.ExtendedVmExits.X64CpuidExit = 1;
   1814    if (whpx_apic_in_platform()) {
   1815        prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
   1816    }
   1817
   1818    hr = whp_dispatch.WHvSetPartitionProperty(
   1819            whpx->partition,
   1820            WHvPartitionPropertyCodeExtendedVmExits,
   1821            &prop,
   1822            sizeof(WHV_PARTITION_PROPERTY));
   1823    if (FAILED(hr)) {
   1824        error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr);
   1825        ret = -EINVAL;
   1826        goto error;
   1827    }
   1828
   1829    hr = whp_dispatch.WHvSetPartitionProperty(
   1830        whpx->partition,
   1831        WHvPartitionPropertyCodeCpuidExitList,
   1832        cpuidExitList,
   1833        RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
   1834
   1835    if (FAILED(hr)) {
   1836        error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
   1837                     hr);
   1838        ret = -EINVAL;
   1839        goto error;
   1840    }
   1841
   1842    hr = whp_dispatch.WHvSetupPartition(whpx->partition);
   1843    if (FAILED(hr)) {
   1844        error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
   1845        ret = -EINVAL;
   1846        goto error;
   1847    }
   1848
   1849    whpx_memory_init();
   1850
   1851    printf("Windows Hypervisor Platform accelerator is operational\n");
   1852    return 0;
   1853
   1854error:
   1855
   1856    if (NULL != whpx->partition) {
   1857        whp_dispatch.WHvDeletePartition(whpx->partition);
   1858        whpx->partition = NULL;
   1859    }
   1860
   1861    return ret;
   1862}
   1863
   1864int whpx_enabled(void)
   1865{
   1866    return whpx_allowed;
   1867}
   1868
   1869bool whpx_apic_in_platform(void) {
   1870    return whpx_global.apic_in_platform;
   1871}
   1872
   1873static void whpx_accel_class_init(ObjectClass *oc, void *data)
   1874{
   1875    AccelClass *ac = ACCEL_CLASS(oc);
   1876    ac->name = "WHPX";
   1877    ac->init_machine = whpx_accel_init;
   1878    ac->allowed = &whpx_allowed;
   1879
   1880    object_class_property_add(oc, "kernel-irqchip", "on|off|split",
   1881        NULL, whpx_set_kernel_irqchip,
   1882        NULL, NULL);
   1883    object_class_property_set_description(oc, "kernel-irqchip",
   1884        "Configure WHPX in-kernel irqchip");
   1885}
   1886
   1887static void whpx_accel_instance_init(Object *obj)
   1888{
   1889    struct whpx_state *whpx = &whpx_global;
   1890
   1891    memset(whpx, 0, sizeof(struct whpx_state));
   1892    /* Turn on kernel-irqchip, by default */
   1893    whpx->kernel_irqchip_allowed = true;
   1894}
   1895
   1896static const TypeInfo whpx_accel_type = {
   1897    .name = ACCEL_CLASS_NAME("whpx"),
   1898    .parent = TYPE_ACCEL,
   1899    .instance_init = whpx_accel_instance_init,
   1900    .class_init = whpx_accel_class_init,
   1901};
   1902
   1903static void whpx_type_init(void)
   1904{
   1905    type_register_static(&whpx_accel_type);
   1906}
   1907
   1908bool init_whp_dispatch(void)
   1909{
   1910    if (whp_dispatch_initialized) {
   1911        return true;
   1912    }
   1913
   1914    if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
   1915        goto error;
   1916    }
   1917
   1918    if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
   1919        goto error;
   1920    }
   1921
   1922    assert(load_whp_dispatch_fns(&hWinHvPlatform,
   1923        WINHV_PLATFORM_FNS_SUPPLEMENTAL));
   1924    whp_dispatch_initialized = true;
   1925
   1926    return true;
   1927error:
   1928    if (hWinHvPlatform) {
   1929        FreeLibrary(hWinHvPlatform);
   1930    }
   1931
   1932    if (hWinHvEmulation) {
   1933        FreeLibrary(hWinHvEmulation);
   1934    }
   1935
   1936    return false;
   1937}
   1938
   1939type_init(whpx_type_init);