hax-all.c - cachepc-qemu - Fork of AMDESE/qemu with changes for cachepc side-channel attack

	cachepc-qemu Fork of AMDESE/qemu with changes for cachepc side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-qemu
	Log \| Files \| Refs \| Submodules \| LICENSE \| sfeed.txt
hax-all.c (30740B)
      1/*
      2 * QEMU HAX support
      3 *
      4 * Copyright IBM, Corp. 2008
      5 *           Red Hat, Inc. 2008
      6 *
      7 * Authors:
      8 *  Anthony Liguori   <aliguori@us.ibm.com>
      9 *  Glauber Costa     <gcosta@redhat.com>
     10 *
     11 * Copyright (c) 2011 Intel Corporation
     12 *  Written by:
     13 *  Jiang Yunhong<yunhong.jiang@intel.com>
     14 *  Xin Xiaohui<xiaohui.xin@intel.com>
     15 *  Zhang Xiantao<xiantao.zhang@intel.com>
     16 *
     17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
     18 * See the COPYING file in the top-level directory.
     19 *
     20 */
     21
     22/*
     23 * HAX common code for both windows and darwin
     24 */
     25
     26#include "qemu/osdep.h"
     27#include "cpu.h"
     28#include "exec/address-spaces.h"
     29
     30#include "qemu-common.h"
     31#include "qemu/accel.h"
     32#include "sysemu/reset.h"
     33#include "sysemu/runstate.h"
     34#include "hw/boards.h"
     35
     36#include "hax-accel-ops.h"
     37
     38#define DEBUG_HAX 0
     39
     40#define DPRINTF(fmt, ...) \
     41    do { \
     42        if (DEBUG_HAX) { \
     43            fprintf(stdout, fmt, ## __VA_ARGS__); \
     44        } \
     45    } while (0)
     46
     47/* Current version */
     48const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
     49/* Minimum HAX kernel version */
     50const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
     51
     52static bool hax_allowed;
     53
     54struct hax_state hax_global;
     55
     56static void hax_vcpu_sync_state(CPUArchState *env, int modified);
     57static int hax_arch_get_registers(CPUArchState *env);
     58
     59int hax_enabled(void)
     60{
     61    return hax_allowed;
     62}
     63
     64int valid_hax_tunnel_size(uint16_t size)
     65{
     66    return size >= sizeof(struct hax_tunnel);
     67}
     68
     69hax_fd hax_vcpu_get_fd(CPUArchState *env)
     70{
     71    struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
     72    if (!vcpu) {
     73        return HAX_INVALID_FD;
     74    }
     75    return vcpu->fd;
     76}
     77
     78static int hax_get_capability(struct hax_state *hax)
     79{
     80    int ret;
     81    struct hax_capabilityinfo capinfo, *cap = &capinfo;
     82
     83    ret = hax_capability(hax, cap);
     84    if (ret) {
     85        return ret;
     86    }
     87
     88    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
     89        if (cap->winfo & HAX_CAP_FAILREASON_VT) {
     90            DPRINTF
     91                ("VTX feature is not enabled, HAX driver will not work.\n");
     92        } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
     93            DPRINTF
     94                ("NX feature is not enabled, HAX driver will not work.\n");
     95        }
     96        return -ENXIO;
     97
     98    }
     99
    100    if (!(cap->winfo & HAX_CAP_UG)) {
    101        fprintf(stderr, "UG mode is not supported by the hardware.\n");
    102        return -ENOTSUP;
    103    }
    104
    105    hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
    106
    107    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
    108        if (cap->mem_quota < hax->mem_quota) {
    109            fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
    110            return -ENOSPC;
    111        }
    112    }
    113    return 0;
    114}
    115
    116static int hax_version_support(struct hax_state *hax)
    117{
    118    int ret;
    119    struct hax_module_version version;
    120
    121    ret = hax_mod_version(hax, &version);
    122    if (ret < 0) {
    123        return 0;
    124    }
    125
    126    if (hax_min_version > version.cur_version) {
    127        fprintf(stderr, "Incompatible HAX module version %d,",
    128                version.cur_version);
    129        fprintf(stderr, "requires minimum version %d\n", hax_min_version);
    130        return 0;
    131    }
    132    if (hax_cur_version < version.compat_version) {
    133        fprintf(stderr, "Incompatible QEMU HAX API version %x,",
    134                hax_cur_version);
    135        fprintf(stderr, "requires minimum HAX API version %x\n",
    136                version.compat_version);
    137        return 0;
    138    }
    139
    140    return 1;
    141}
    142
    143int hax_vcpu_create(int id)
    144{
    145    struct hax_vcpu_state *vcpu = NULL;
    146    int ret;
    147
    148    if (!hax_global.vm) {
    149        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
    150        return -1;
    151    }
    152
    153    if (hax_global.vm->vcpus[id]) {
    154        fprintf(stderr, "vcpu %x allocated already\n", id);
    155        return 0;
    156    }
    157
    158    vcpu = g_new0(struct hax_vcpu_state, 1);
    159
    160    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
    161    if (ret) {
    162        fprintf(stderr, "Failed to create vcpu %x\n", id);
    163        goto error;
    164    }
    165
    166    vcpu->vcpu_id = id;
    167    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
    168    if (hax_invalid_fd(vcpu->fd)) {
    169        fprintf(stderr, "Failed to open the vcpu\n");
    170        ret = -ENODEV;
    171        goto error;
    172    }
    173
    174    hax_global.vm->vcpus[id] = vcpu;
    175
    176    ret = hax_host_setup_vcpu_channel(vcpu);
    177    if (ret) {
    178        fprintf(stderr, "Invalid hax tunnel size\n");
    179        ret = -EINVAL;
    180        goto error;
    181    }
    182    return 0;
    183
    184  error:
    185    /* vcpu and tunnel will be closed automatically */
    186    if (vcpu && !hax_invalid_fd(vcpu->fd)) {
    187        hax_close_fd(vcpu->fd);
    188    }
    189
    190    hax_global.vm->vcpus[id] = NULL;
    191    g_free(vcpu);
    192    return -1;
    193}
    194
    195int hax_vcpu_destroy(CPUState *cpu)
    196{
    197    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
    198
    199    if (!hax_global.vm) {
    200        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
    201        return -1;
    202    }
    203
    204    if (!vcpu) {
    205        return 0;
    206    }
    207
    208    /*
    209     * 1. The hax_tunnel is also destroyed when vcpu is destroyed
    210     * 2. close fd will cause hax module vcpu be cleaned
    211     */
    212    hax_close_fd(vcpu->fd);
    213    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
    214    g_free(vcpu);
    215    return 0;
    216}
    217
    218int hax_init_vcpu(CPUState *cpu)
    219{
    220    int ret;
    221
    222    ret = hax_vcpu_create(cpu->cpu_index);
    223    if (ret < 0) {
    224        fprintf(stderr, "Failed to create HAX vcpu\n");
    225        exit(-1);
    226    }
    227
    228    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
    229    cpu->vcpu_dirty = true;
    230    qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
    231
    232    return ret;
    233}
    234
    235struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus)
    236{
    237    struct hax_vm *vm;
    238    int vm_id = 0, ret, i;
    239
    240    if (hax_invalid_fd(hax->fd)) {
    241        return NULL;
    242    }
    243
    244    if (hax->vm) {
    245        return hax->vm;
    246    }
    247
    248    if (max_cpus > HAX_MAX_VCPU) {
    249        fprintf(stderr, "Maximum VCPU number QEMU supported is %d\n", HAX_MAX_VCPU);
    250        return NULL;
    251    }
    252
    253    vm = g_new0(struct hax_vm, 1);
    254
    255    ret = hax_host_create_vm(hax, &vm_id);
    256    if (ret) {
    257        fprintf(stderr, "Failed to create vm %x\n", ret);
    258        goto error;
    259    }
    260    vm->id = vm_id;
    261    vm->fd = hax_host_open_vm(hax, vm_id);
    262    if (hax_invalid_fd(vm->fd)) {
    263        fprintf(stderr, "Failed to open vm %d\n", vm_id);
    264        goto error;
    265    }
    266
    267    vm->numvcpus = max_cpus;
    268    vm->vcpus = g_new0(struct hax_vcpu_state *, vm->numvcpus);
    269    for (i = 0; i < vm->numvcpus; i++) {
    270        vm->vcpus[i] = NULL;
    271    }
    272
    273    hax->vm = vm;
    274    return vm;
    275
    276  error:
    277    g_free(vm);
    278    hax->vm = NULL;
    279    return NULL;
    280}
    281
    282int hax_vm_destroy(struct hax_vm *vm)
    283{
    284    int i;
    285
    286    for (i = 0; i < vm->numvcpus; i++)
    287        if (vm->vcpus[i]) {
    288            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
    289            return -1;
    290        }
    291    hax_close_fd(vm->fd);
    292    vm->numvcpus = 0;
    293    g_free(vm->vcpus);
    294    g_free(vm);
    295    hax_global.vm = NULL;
    296    return 0;
    297}
    298
    299static int hax_init(ram_addr_t ram_size, int max_cpus)
    300{
    301    struct hax_state *hax = NULL;
    302    struct hax_qemu_version qversion;
    303    int ret;
    304
    305    hax = &hax_global;
    306
    307    memset(hax, 0, sizeof(struct hax_state));
    308    hax->mem_quota = ram_size;
    309
    310    hax->fd = hax_mod_open();
    311    if (hax_invalid_fd(hax->fd)) {
    312        hax->fd = 0;
    313        ret = -ENODEV;
    314        goto error;
    315    }
    316
    317    ret = hax_get_capability(hax);
    318
    319    if (ret) {
    320        if (ret != -ENOSPC) {
    321            ret = -EINVAL;
    322        }
    323        goto error;
    324    }
    325
    326    if (!hax_version_support(hax)) {
    327        ret = -EINVAL;
    328        goto error;
    329    }
    330
    331    hax->vm = hax_vm_create(hax, max_cpus);
    332    if (!hax->vm) {
    333        fprintf(stderr, "Failed to create HAX VM\n");
    334        ret = -EINVAL;
    335        goto error;
    336    }
    337
    338    hax_memory_init();
    339
    340    qversion.cur_version = hax_cur_version;
    341    qversion.min_version = hax_min_version;
    342    hax_notify_qemu_version(hax->vm->fd, &qversion);
    343
    344    return ret;
    345  error:
    346    if (hax->vm) {
    347        hax_vm_destroy(hax->vm);
    348    }
    349    if (hax->fd) {
    350        hax_mod_close(hax);
    351    }
    352
    353    return ret;
    354}
    355
    356static int hax_accel_init(MachineState *ms)
    357{
    358    int ret = hax_init(ms->ram_size, (int)ms->smp.max_cpus);
    359
    360    if (ret && (ret != -ENOSPC)) {
    361        fprintf(stderr, "No accelerator found.\n");
    362    } else {
    363        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
    364                !ret ? "working" : "not working",
    365                !ret ? "fast virt" : "emulation");
    366    }
    367    return ret;
    368}
    369
    370static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
    371{
    372    if (hft->direction < 2) {
    373        cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
    374                               hft->direction);
    375    } else {
    376        /*
    377         * HAX API v4 supports transferring data between two MMIO addresses,
    378         * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
    379         *  hft->direction == 2: gpa ==> gpa2
    380         */
    381        uint64_t value;
    382        cpu_physical_memory_read(hft->gpa, &value, hft->size);
    383        cpu_physical_memory_write(hft->gpa2, &value, hft->size);
    384    }
    385
    386    return 0;
    387}
    388
    389static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
    390                         int direction, int size, int count, void *buffer)
    391{
    392    uint8_t *ptr;
    393    int i;
    394    MemTxAttrs attrs = { 0 };
    395
    396    if (!df) {
    397        ptr = (uint8_t *) buffer;
    398    } else {
    399        ptr = buffer + size * count - size;
    400    }
    401    for (i = 0; i < count; i++) {
    402        address_space_rw(&address_space_io, port, attrs,
    403                         ptr, size, direction == HAX_EXIT_IO_OUT);
    404        if (!df) {
    405            ptr += size;
    406        } else {
    407            ptr -= size;
    408        }
    409    }
    410
    411    return 0;
    412}
    413
    414static int hax_vcpu_interrupt(CPUArchState *env)
    415{
    416    CPUState *cpu = env_cpu(env);
    417    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
    418    struct hax_tunnel *ht = vcpu->tunnel;
    419
    420    /*
    421     * Try to inject an interrupt if the guest can accept it
    422     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
    423     */
    424    if (ht->ready_for_interrupt_injection &&
    425        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
    426        int irq;
    427
    428        irq = cpu_get_pic_interrupt(env);
    429        if (irq >= 0) {
    430            hax_inject_interrupt(env, irq);
    431            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
    432        }
    433    }
    434
    435    /* If we have an interrupt but the guest is not ready to receive an
    436     * interrupt, request an interrupt window exit.  This will
    437     * cause a return to userspace as soon as the guest is ready to
    438     * receive interrupts. */
    439    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
    440        ht->request_interrupt_window = 1;
    441    } else {
    442        ht->request_interrupt_window = 0;
    443    }
    444    return 0;
    445}
    446
    447void hax_raise_event(CPUState *cpu)
    448{
    449    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
    450
    451    if (!vcpu) {
    452        return;
    453    }
    454    vcpu->tunnel->user_event_pending = 1;
    455}
    456
    457/*
    458 * Ask hax kernel module to run the CPU for us till:
    459 * 1. Guest crash or shutdown
    460 * 2. Need QEMU's emulation like guest execute MMIO instruction
    461 * 3. Guest execute HLT
    462 * 4. QEMU have Signal/event pending
    463 * 5. An unknown VMX exit happens
    464 */
    465static int hax_vcpu_hax_exec(CPUArchState *env)
    466{
    467    int ret = 0;
    468    CPUState *cpu = env_cpu(env);
    469    X86CPU *x86_cpu = X86_CPU(cpu);
    470    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
    471    struct hax_tunnel *ht = vcpu->tunnel;
    472
    473    if (!hax_enabled()) {
    474        DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
    475        return 0;
    476    }
    477
    478    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
    479        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
    480        apic_poll_irq(x86_cpu->apic_state);
    481    }
    482
    483    /* After a vcpu is halted (either because it is an AP and has just been
    484     * reset, or because it has executed the HLT instruction), it will not be
    485     * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
    486     * for events that may change the halted state of this vcpu:
    487     *  a) Maskable interrupt, when RFLAGS.IF is 1;
    488     *     Note: env->eflags may not reflect the current RFLAGS state, because
    489     *           it is not updated after each hax_vcpu_run(). We cannot afford
    490     *           to fail to recognize any unhalt-by-maskable-interrupt event
    491     *           (in which case the vcpu will halt forever), and yet we cannot
    492     *           afford the overhead of hax_vcpu_sync_state(). The current
    493     *           solution is to err on the side of caution and have the HLT
    494     *           handler (see case HAX_EXIT_HLT below) unconditionally set the
    495     *           IF_MASK bit in env->eflags, which, in effect, disables the
    496     *           RFLAGS.IF check.
    497     *  b) NMI;
    498     *  c) INIT signal;
    499     *  d) SIPI signal.
    500     */
    501    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
    502         (env->eflags & IF_MASK)) ||
    503        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
    504        cpu->halted = 0;
    505    }
    506
    507    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
    508        DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
    509                cpu->cpu_index);
    510        do_cpu_init(x86_cpu);
    511        hax_vcpu_sync_state(env, 1);
    512    }
    513
    514    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
    515        DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
    516                cpu->cpu_index);
    517        hax_vcpu_sync_state(env, 0);
    518        do_cpu_sipi(x86_cpu);
    519        hax_vcpu_sync_state(env, 1);
    520    }
    521
    522    if (cpu->halted) {
    523        /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
    524         * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
    525         * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
    526         * until the vcpu is unhalted.
    527         */
    528        cpu->exception_index = EXCP_HLT;
    529        return 0;
    530    }
    531
    532    do {
    533        int hax_ret;
    534
    535        if (cpu->exit_request) {
    536            ret = 1;
    537            break;
    538        }
    539
    540        hax_vcpu_interrupt(env);
    541
    542        qemu_mutex_unlock_iothread();
    543        cpu_exec_start(cpu);
    544        hax_ret = hax_vcpu_run(vcpu);
    545        cpu_exec_end(cpu);
    546        qemu_mutex_lock_iothread();
    547
    548        /* Simply continue the vcpu_run if system call interrupted */
    549        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
    550            DPRINTF("io window interrupted\n");
    551            continue;
    552        }
    553
    554        if (hax_ret < 0) {
    555            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
    556            abort();
    557        }
    558        switch (ht->_exit_status) {
    559        case HAX_EXIT_IO:
    560            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
    561                            ht->pio._direction,
    562                            ht->pio._size, ht->pio._count, vcpu->iobuf);
    563            break;
    564        case HAX_EXIT_FAST_MMIO:
    565            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
    566            break;
    567        /* Guest state changed, currently only for shutdown */
    568        case HAX_EXIT_STATECHANGE:
    569            fprintf(stdout, "VCPU shutdown request\n");
    570            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
    571            hax_vcpu_sync_state(env, 0);
    572            ret = 1;
    573            break;
    574        case HAX_EXIT_UNKNOWN_VMEXIT:
    575            fprintf(stderr, "Unknown VMX exit %x from guest\n",
    576                    ht->_exit_reason);
    577            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
    578            hax_vcpu_sync_state(env, 0);
    579            cpu_dump_state(cpu, stderr, 0);
    580            ret = -1;
    581            break;
    582        case HAX_EXIT_HLT:
    583            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
    584                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
    585                /* hlt instruction with interrupt disabled is shutdown */
    586                env->eflags |= IF_MASK;
    587                cpu->halted = 1;
    588                cpu->exception_index = EXCP_HLT;
    589                ret = 1;
    590            }
    591            break;
    592        /* these situations will continue to hax module */
    593        case HAX_EXIT_INTERRUPT:
    594        case HAX_EXIT_PAUSED:
    595            break;
    596        case HAX_EXIT_MMIO:
    597            /* Should not happen on UG system */
    598            fprintf(stderr, "HAX: unsupported MMIO emulation\n");
    599            ret = -1;
    600            break;
    601        case HAX_EXIT_REAL:
    602            /* Should not happen on UG system */
    603            fprintf(stderr, "HAX: unimplemented real mode emulation\n");
    604            ret = -1;
    605            break;
    606        default:
    607            fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
    608            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
    609            hax_vcpu_sync_state(env, 0);
    610            cpu_dump_state(cpu, stderr, 0);
    611            ret = 1;
    612            break;
    613        }
    614    } while (!ret);
    615
    616    if (cpu->exit_request) {
    617        cpu->exit_request = 0;
    618        cpu->exception_index = EXCP_INTERRUPT;
    619    }
    620    return ret < 0;
    621}
    622
    623static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
    624{
    625    CPUArchState *env = cpu->env_ptr;
    626
    627    hax_arch_get_registers(env);
    628    cpu->vcpu_dirty = true;
    629}
    630
    631void hax_cpu_synchronize_state(CPUState *cpu)
    632{
    633    if (!cpu->vcpu_dirty) {
    634        run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
    635    }
    636}
    637
    638static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
    639                                              run_on_cpu_data arg)
    640{
    641    CPUArchState *env = cpu->env_ptr;
    642
    643    hax_vcpu_sync_state(env, 1);
    644    cpu->vcpu_dirty = false;
    645}
    646
    647void hax_cpu_synchronize_post_reset(CPUState *cpu)
    648{
    649    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
    650}
    651
    652static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
    653{
    654    CPUArchState *env = cpu->env_ptr;
    655
    656    hax_vcpu_sync_state(env, 1);
    657    cpu->vcpu_dirty = false;
    658}
    659
    660void hax_cpu_synchronize_post_init(CPUState *cpu)
    661{
    662    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
    663}
    664
    665static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
    666{
    667    cpu->vcpu_dirty = true;
    668}
    669
    670void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
    671{
    672    run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
    673}
    674
    675int hax_smp_cpu_exec(CPUState *cpu)
    676{
    677    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
    678    int fatal;
    679    int ret;
    680
    681    while (1) {
    682        if (cpu->exception_index >= EXCP_INTERRUPT) {
    683            ret = cpu->exception_index;
    684            cpu->exception_index = -1;
    685            break;
    686        }
    687
    688        fatal = hax_vcpu_hax_exec(env);
    689
    690        if (fatal) {
    691            fprintf(stderr, "Unsupported HAX vcpu return\n");
    692            abort();
    693        }
    694    }
    695
    696    return ret;
    697}
    698
    699static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
    700{
    701    memset(lhs, 0, sizeof(struct segment_desc_t));
    702    lhs->selector = rhs->selector;
    703    lhs->base = rhs->base;
    704    lhs->limit = rhs->limit;
    705    lhs->type = 3;
    706    lhs->present = 1;
    707    lhs->dpl = 3;
    708    lhs->operand_size = 0;
    709    lhs->desc = 1;
    710    lhs->long_mode = 0;
    711    lhs->granularity = 0;
    712    lhs->available = 0;
    713}
    714
    715static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
    716{
    717    lhs->selector = rhs->selector;
    718    lhs->base = rhs->base;
    719    lhs->limit = rhs->limit;
    720    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
    721        | (rhs->present * DESC_P_MASK)
    722        | (rhs->dpl << DESC_DPL_SHIFT)
    723        | (rhs->operand_size << DESC_B_SHIFT)
    724        | (rhs->desc * DESC_S_MASK)
    725        | (rhs->long_mode << DESC_L_SHIFT)
    726        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
    727}
    728
    729static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
    730{
    731    unsigned flags = rhs->flags;
    732
    733    memset(lhs, 0, sizeof(struct segment_desc_t));
    734    lhs->selector = rhs->selector;
    735    lhs->base = rhs->base;
    736    lhs->limit = rhs->limit;
    737    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
    738    lhs->present = (flags & DESC_P_MASK) != 0;
    739    lhs->dpl = rhs->selector & 3;
    740    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
    741    lhs->desc = (flags & DESC_S_MASK) != 0;
    742    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
    743    lhs->granularity = (flags & DESC_G_MASK) != 0;
    744    lhs->available = (flags & DESC_AVL_MASK) != 0;
    745}
    746
    747static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
    748{
    749    target_ulong reg = *hax_reg;
    750
    751    if (set) {
    752        *hax_reg = *qemu_reg;
    753    } else {
    754        *qemu_reg = reg;
    755    }
    756}
    757
    758/* The sregs has been synced with HAX kernel already before this call */
    759static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
    760{
    761    get_seg(&env->segs[R_CS], &sregs->_cs);
    762    get_seg(&env->segs[R_DS], &sregs->_ds);
    763    get_seg(&env->segs[R_ES], &sregs->_es);
    764    get_seg(&env->segs[R_FS], &sregs->_fs);
    765    get_seg(&env->segs[R_GS], &sregs->_gs);
    766    get_seg(&env->segs[R_SS], &sregs->_ss);
    767
    768    get_seg(&env->tr, &sregs->_tr);
    769    get_seg(&env->ldt, &sregs->_ldt);
    770    env->idt.limit = sregs->_idt.limit;
    771    env->idt.base = sregs->_idt.base;
    772    env->gdt.limit = sregs->_gdt.limit;
    773    env->gdt.base = sregs->_gdt.base;
    774    return 0;
    775}
    776
    777static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
    778{
    779    if ((env->eflags & VM_MASK)) {
    780        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
    781        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
    782        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
    783        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
    784        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
    785        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
    786    } else {
    787        set_seg(&sregs->_cs, &env->segs[R_CS]);
    788        set_seg(&sregs->_ds, &env->segs[R_DS]);
    789        set_seg(&sregs->_es, &env->segs[R_ES]);
    790        set_seg(&sregs->_fs, &env->segs[R_FS]);
    791        set_seg(&sregs->_gs, &env->segs[R_GS]);
    792        set_seg(&sregs->_ss, &env->segs[R_SS]);
    793
    794        if (env->cr[0] & CR0_PE_MASK) {
    795            /* force ss cpl to cs cpl */
    796            sregs->_ss.selector = (sregs->_ss.selector & ~3) |
    797                                  (sregs->_cs.selector & 3);
    798            sregs->_ss.dpl = sregs->_ss.selector & 3;
    799        }
    800    }
    801
    802    set_seg(&sregs->_tr, &env->tr);
    803    set_seg(&sregs->_ldt, &env->ldt);
    804    sregs->_idt.limit = env->idt.limit;
    805    sregs->_idt.base = env->idt.base;
    806    sregs->_gdt.limit = env->gdt.limit;
    807    sregs->_gdt.base = env->gdt.base;
    808    return 0;
    809}
    810
    811static int hax_sync_vcpu_register(CPUArchState *env, int set)
    812{
    813    struct vcpu_state_t regs;
    814    int ret;
    815    memset(&regs, 0, sizeof(struct vcpu_state_t));
    816
    817    if (!set) {
    818        ret = hax_sync_vcpu_state(env, &regs, 0);
    819        if (ret < 0) {
    820            return -1;
    821        }
    822    }
    823
    824    /* generic register */
    825    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
    826    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
    827    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
    828    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
    829    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
    830    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
    831    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
    832    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
    833#ifdef TARGET_X86_64
    834    hax_getput_reg(&regs._r8, &env->regs[8], set);
    835    hax_getput_reg(&regs._r9, &env->regs[9], set);
    836    hax_getput_reg(&regs._r10, &env->regs[10], set);
    837    hax_getput_reg(&regs._r11, &env->regs[11], set);
    838    hax_getput_reg(&regs._r12, &env->regs[12], set);
    839    hax_getput_reg(&regs._r13, &env->regs[13], set);
    840    hax_getput_reg(&regs._r14, &env->regs[14], set);
    841    hax_getput_reg(&regs._r15, &env->regs[15], set);
    842#endif
    843    hax_getput_reg(&regs._rflags, &env->eflags, set);
    844    hax_getput_reg(&regs._rip, &env->eip, set);
    845
    846    if (set) {
    847        regs._cr0 = env->cr[0];
    848        regs._cr2 = env->cr[2];
    849        regs._cr3 = env->cr[3];
    850        regs._cr4 = env->cr[4];
    851        hax_set_segments(env, &regs);
    852    } else {
    853        env->cr[0] = regs._cr0;
    854        env->cr[2] = regs._cr2;
    855        env->cr[3] = regs._cr3;
    856        env->cr[4] = regs._cr4;
    857        hax_get_segments(env, &regs);
    858    }
    859
    860    if (set) {
    861        ret = hax_sync_vcpu_state(env, &regs, 1);
    862        if (ret < 0) {
    863            return -1;
    864        }
    865    }
    866    return 0;
    867}
    868
    869static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
    870                              uint64_t value)
    871{
    872    item->entry = index;
    873    item->value = value;
    874}
    875
    876static int hax_get_msrs(CPUArchState *env)
    877{
    878    struct hax_msr_data md;
    879    struct vmx_msr *msrs = md.entries;
    880    int ret, i, n;
    881
    882    n = 0;
    883    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
    884    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
    885    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
    886    msrs[n++].entry = MSR_IA32_TSC;
    887#ifdef TARGET_X86_64
    888    msrs[n++].entry = MSR_EFER;
    889    msrs[n++].entry = MSR_STAR;
    890    msrs[n++].entry = MSR_LSTAR;
    891    msrs[n++].entry = MSR_CSTAR;
    892    msrs[n++].entry = MSR_FMASK;
    893    msrs[n++].entry = MSR_KERNELGSBASE;
    894#endif
    895    md.nr_msr = n;
    896    ret = hax_sync_msr(env, &md, 0);
    897    if (ret < 0) {
    898        return ret;
    899    }
    900
    901    for (i = 0; i < md.done; i++) {
    902        switch (msrs[i].entry) {
    903        case MSR_IA32_SYSENTER_CS:
    904            env->sysenter_cs = msrs[i].value;
    905            break;
    906        case MSR_IA32_SYSENTER_ESP:
    907            env->sysenter_esp = msrs[i].value;
    908            break;
    909        case MSR_IA32_SYSENTER_EIP:
    910            env->sysenter_eip = msrs[i].value;
    911            break;
    912        case MSR_IA32_TSC:
    913            env->tsc = msrs[i].value;
    914            break;
    915#ifdef TARGET_X86_64
    916        case MSR_EFER:
    917            env->efer = msrs[i].value;
    918            break;
    919        case MSR_STAR:
    920            env->star = msrs[i].value;
    921            break;
    922        case MSR_LSTAR:
    923            env->lstar = msrs[i].value;
    924            break;
    925        case MSR_CSTAR:
    926            env->cstar = msrs[i].value;
    927            break;
    928        case MSR_FMASK:
    929            env->fmask = msrs[i].value;
    930            break;
    931        case MSR_KERNELGSBASE:
    932            env->kernelgsbase = msrs[i].value;
    933            break;
    934#endif
    935        }
    936    }
    937
    938    return 0;
    939}
    940
    941static int hax_set_msrs(CPUArchState *env)
    942{
    943    struct hax_msr_data md;
    944    struct vmx_msr *msrs;
    945    msrs = md.entries;
    946    int n = 0;
    947
    948    memset(&md, 0, sizeof(struct hax_msr_data));
    949    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
    950    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
    951    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
    952    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
    953#ifdef TARGET_X86_64
    954    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
    955    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
    956    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
    957    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
    958    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
    959    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
    960#endif
    961    md.nr_msr = n;
    962    md.done = 0;
    963
    964    return hax_sync_msr(env, &md, 1);
    965}
    966
    967static int hax_get_fpu(CPUArchState *env)
    968{
    969    struct fx_layout fpu;
    970    int i, ret;
    971
    972    ret = hax_sync_fpu(env, &fpu, 0);
    973    if (ret < 0) {
    974        return ret;
    975    }
    976
    977    env->fpstt = (fpu.fsw >> 11) & 7;
    978    env->fpus = fpu.fsw;
    979    env->fpuc = fpu.fcw;
    980    for (i = 0; i < 8; ++i) {
    981        env->fptags[i] = !((fpu.ftw >> i) & 1);
    982    }
    983    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
    984
    985    for (i = 0; i < 8; i++) {
    986        env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
    987        env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
    988        if (CPU_NB_REGS > 8) {
    989            env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
    990            env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
    991        }
    992    }
    993    env->mxcsr = fpu.mxcsr;
    994
    995    return 0;
    996}
    997
    998static int hax_set_fpu(CPUArchState *env)
    999{
   1000    struct fx_layout fpu;
   1001    int i;
   1002
   1003    memset(&fpu, 0, sizeof(fpu));
   1004    fpu.fsw = env->fpus & ~(7 << 11);
   1005    fpu.fsw |= (env->fpstt & 7) << 11;
   1006    fpu.fcw = env->fpuc;
   1007
   1008    for (i = 0; i < 8; ++i) {
   1009        fpu.ftw |= (!env->fptags[i]) << i;
   1010    }
   1011
   1012    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
   1013    for (i = 0; i < 8; i++) {
   1014        stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
   1015        stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
   1016        if (CPU_NB_REGS > 8) {
   1017            stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
   1018            stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
   1019        }
   1020    }
   1021
   1022    fpu.mxcsr = env->mxcsr;
   1023
   1024    return hax_sync_fpu(env, &fpu, 1);
   1025}
   1026
   1027static int hax_arch_get_registers(CPUArchState *env)
   1028{
   1029    int ret;
   1030
   1031    ret = hax_sync_vcpu_register(env, 0);
   1032    if (ret < 0) {
   1033        return ret;
   1034    }
   1035
   1036    ret = hax_get_fpu(env);
   1037    if (ret < 0) {
   1038        return ret;
   1039    }
   1040
   1041    ret = hax_get_msrs(env);
   1042    if (ret < 0) {
   1043        return ret;
   1044    }
   1045
   1046    x86_update_hflags(env);
   1047    return 0;
   1048}
   1049
   1050static int hax_arch_set_registers(CPUArchState *env)
   1051{
   1052    int ret;
   1053    ret = hax_sync_vcpu_register(env, 1);
   1054
   1055    if (ret < 0) {
   1056        fprintf(stderr, "Failed to sync vcpu reg\n");
   1057        return ret;
   1058    }
   1059    ret = hax_set_fpu(env);
   1060    if (ret < 0) {
   1061        fprintf(stderr, "FPU failed\n");
   1062        return ret;
   1063    }
   1064    ret = hax_set_msrs(env);
   1065    if (ret < 0) {
   1066        fprintf(stderr, "MSR failed\n");
   1067        return ret;
   1068    }
   1069
   1070    return 0;
   1071}
   1072
   1073static void hax_vcpu_sync_state(CPUArchState *env, int modified)
   1074{
   1075    if (hax_enabled()) {
   1076        if (modified) {
   1077            hax_arch_set_registers(env);
   1078        } else {
   1079            hax_arch_get_registers(env);
   1080        }
   1081    }
   1082}
   1083
   1084/*
   1085 * much simpler than kvm, at least in first stage because:
   1086 * We don't need consider the device pass-through, we don't need
   1087 * consider the framebuffer, and we may even remove the bios at all
   1088 */
   1089int hax_sync_vcpus(void)
   1090{
   1091    if (hax_enabled()) {
   1092        CPUState *cpu;
   1093
   1094        cpu = first_cpu;
   1095        if (!cpu) {
   1096            return 0;
   1097        }
   1098
   1099        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
   1100            int ret;
   1101
   1102            ret = hax_arch_set_registers(cpu->env_ptr);
   1103            if (ret < 0) {
   1104                return ret;
   1105            }
   1106        }
   1107    }
   1108
   1109    return 0;
   1110}
   1111
   1112void hax_reset_vcpu_state(void *opaque)
   1113{
   1114    CPUState *cpu;
   1115    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
   1116        cpu->hax_vcpu->tunnel->user_event_pending = 0;
   1117        cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
   1118    }
   1119}
   1120
   1121static void hax_accel_class_init(ObjectClass *oc, void *data)
   1122{
   1123    AccelClass *ac = ACCEL_CLASS(oc);
   1124    ac->name = "HAX";
   1125    ac->init_machine = hax_accel_init;
   1126    ac->allowed = &hax_allowed;
   1127}
   1128
   1129static const TypeInfo hax_accel_type = {
   1130    .name = ACCEL_CLASS_NAME("hax"),
   1131    .parent = TYPE_ACCEL,
   1132    .class_init = hax_accel_class_init,
   1133};
   1134
   1135static void hax_type_init(void)
   1136{
   1137    type_register_static(&hax_accel_type);
   1138}
   1139
   1140type_init(hax_type_init);