cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

emulate.c (152121B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/******************************************************************************
      3 * emulate.c
      4 *
      5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
      6 *
      7 * Copyright (c) 2005 Keir Fraser
      8 *
      9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
     10 * privileged instructions:
     11 *
     12 * Copyright (C) 2006 Qumranet
     13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
     14 *
     15 *   Avi Kivity <avi@qumranet.com>
     16 *   Yaniv Kamay <yaniv@qumranet.com>
     17 *
     18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
     19 */
     20
     21#include <linux/kvm_host.h>
     22#include "kvm_cache_regs.h"
     23#include "kvm_emulate.h"
     24#include <linux/stringify.h>
     25#include <asm/debugreg.h>
     26#include <asm/nospec-branch.h>
     27#include <asm/ibt.h>
     28
     29#include "x86.h"
     30#include "tss.h"
     31#include "mmu.h"
     32#include "pmu.h"
     33
     34/*
     35 * Operand types
     36 */
     37#define OpNone             0ull
     38#define OpImplicit         1ull  /* No generic decode */
     39#define OpReg              2ull  /* Register */
     40#define OpMem              3ull  /* Memory */
     41#define OpAcc              4ull  /* Accumulator: AL/AX/EAX/RAX */
     42#define OpDI               5ull  /* ES:DI/EDI/RDI */
     43#define OpMem64            6ull  /* Memory, 64-bit */
     44#define OpImmUByte         7ull  /* Zero-extended 8-bit immediate */
     45#define OpDX               8ull  /* DX register */
     46#define OpCL               9ull  /* CL register (for shifts) */
     47#define OpImmByte         10ull  /* 8-bit sign extended immediate */
     48#define OpOne             11ull  /* Implied 1 */
     49#define OpImm             12ull  /* Sign extended up to 32-bit immediate */
     50#define OpMem16           13ull  /* Memory operand (16-bit). */
     51#define OpMem32           14ull  /* Memory operand (32-bit). */
     52#define OpImmU            15ull  /* Immediate operand, zero extended */
     53#define OpSI              16ull  /* SI/ESI/RSI */
     54#define OpImmFAddr        17ull  /* Immediate far address */
     55#define OpMemFAddr        18ull  /* Far address in memory */
     56#define OpImmU16          19ull  /* Immediate operand, 16 bits, zero extended */
     57#define OpES              20ull  /* ES */
     58#define OpCS              21ull  /* CS */
     59#define OpSS              22ull  /* SS */
     60#define OpDS              23ull  /* DS */
     61#define OpFS              24ull  /* FS */
     62#define OpGS              25ull  /* GS */
     63#define OpMem8            26ull  /* 8-bit zero extended memory operand */
     64#define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */
     65#define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */
     66#define OpAccLo           29ull  /* Low part of extended acc (AX/AX/EAX/RAX) */
     67#define OpAccHi           30ull  /* High part of extended acc (-/DX/EDX/RDX) */
     68
     69#define OpBits             5  /* Width of operand field */
     70#define OpMask             ((1ull << OpBits) - 1)
     71
     72/*
     73 * Opcode effective-address decode tables.
     74 * Note that we only emulate instructions that have at least one memory
     75 * operand (excluding implicit stack references). We assume that stack
     76 * references and instruction fetches will never occur in special memory
     77 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
     78 * not be handled.
     79 */
     80
     81/* Operand sizes: 8-bit operands or specified/overridden size. */
     82#define ByteOp      (1<<0)	/* 8-bit operands. */
     83/* Destination operand type. */
     84#define DstShift    1
     85#define ImplicitOps (OpImplicit << DstShift)
     86#define DstReg      (OpReg << DstShift)
     87#define DstMem      (OpMem << DstShift)
     88#define DstAcc      (OpAcc << DstShift)
     89#define DstDI       (OpDI << DstShift)
     90#define DstMem64    (OpMem64 << DstShift)
     91#define DstMem16    (OpMem16 << DstShift)
     92#define DstImmUByte (OpImmUByte << DstShift)
     93#define DstDX       (OpDX << DstShift)
     94#define DstAccLo    (OpAccLo << DstShift)
     95#define DstMask     (OpMask << DstShift)
     96/* Source operand type. */
     97#define SrcShift    6
     98#define SrcNone     (OpNone << SrcShift)
     99#define SrcReg      (OpReg << SrcShift)
    100#define SrcMem      (OpMem << SrcShift)
    101#define SrcMem16    (OpMem16 << SrcShift)
    102#define SrcMem32    (OpMem32 << SrcShift)
    103#define SrcImm      (OpImm << SrcShift)
    104#define SrcImmByte  (OpImmByte << SrcShift)
    105#define SrcOne      (OpOne << SrcShift)
    106#define SrcImmUByte (OpImmUByte << SrcShift)
    107#define SrcImmU     (OpImmU << SrcShift)
    108#define SrcSI       (OpSI << SrcShift)
    109#define SrcXLat     (OpXLat << SrcShift)
    110#define SrcImmFAddr (OpImmFAddr << SrcShift)
    111#define SrcMemFAddr (OpMemFAddr << SrcShift)
    112#define SrcAcc      (OpAcc << SrcShift)
    113#define SrcImmU16   (OpImmU16 << SrcShift)
    114#define SrcImm64    (OpImm64 << SrcShift)
    115#define SrcDX       (OpDX << SrcShift)
    116#define SrcMem8     (OpMem8 << SrcShift)
    117#define SrcAccHi    (OpAccHi << SrcShift)
    118#define SrcMask     (OpMask << SrcShift)
    119#define BitOp       (1<<11)
    120#define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
    121#define String      (1<<13)     /* String instruction (rep capable) */
    122#define Stack       (1<<14)     /* Stack instruction (push/pop) */
    123#define GroupMask   (7<<15)     /* Opcode uses one of the group mechanisms */
    124#define Group       (1<<15)     /* Bits 3:5 of modrm byte extend opcode */
    125#define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */
    126#define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
    127#define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
    128#define Escape      (5<<15)     /* Escape to coprocessor instruction */
    129#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
    130#define ModeDual    (7<<15)     /* Different instruction for 32/64 bit */
    131#define Sse         (1<<18)     /* SSE Vector instruction */
    132/* Generic ModRM decode. */
    133#define ModRM       (1<<19)
    134/* Destination is only written; never read. */
    135#define Mov         (1<<20)
    136/* Misc flags */
    137#define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
    138#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
    139#define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
    140#define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
    141#define Undefined   (1<<25) /* No Such Instruction */
    142#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
    143#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
    144#define No64	    (1<<28)
    145#define PageTable   (1 << 29)   /* instruction used to write page table */
    146#define NotImpl     (1 << 30)   /* instruction is not implemented */
    147/* Source 2 operand type */
    148#define Src2Shift   (31)
    149#define Src2None    (OpNone << Src2Shift)
    150#define Src2Mem     (OpMem << Src2Shift)
    151#define Src2CL      (OpCL << Src2Shift)
    152#define Src2ImmByte (OpImmByte << Src2Shift)
    153#define Src2One     (OpOne << Src2Shift)
    154#define Src2Imm     (OpImm << Src2Shift)
    155#define Src2ES      (OpES << Src2Shift)
    156#define Src2CS      (OpCS << Src2Shift)
    157#define Src2SS      (OpSS << Src2Shift)
    158#define Src2DS      (OpDS << Src2Shift)
    159#define Src2FS      (OpFS << Src2Shift)
    160#define Src2GS      (OpGS << Src2Shift)
    161#define Src2Mask    (OpMask << Src2Shift)
    162#define Mmx         ((u64)1 << 40)  /* MMX Vector instruction */
    163#define AlignMask   ((u64)7 << 41)
    164#define Aligned     ((u64)1 << 41)  /* Explicitly aligned (e.g. MOVDQA) */
    165#define Unaligned   ((u64)2 << 41)  /* Explicitly unaligned (e.g. MOVDQU) */
    166#define Avx         ((u64)3 << 41)  /* Advanced Vector Extensions */
    167#define Aligned16   ((u64)4 << 41)  /* Aligned to 16 byte boundary (e.g. FXSAVE) */
    168#define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */
    169#define NoWrite     ((u64)1 << 45)  /* No writeback */
    170#define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
    171#define NoMod	    ((u64)1 << 47)  /* Mod field is ignored */
    172#define Intercept   ((u64)1 << 48)  /* Has valid intercept field */
    173#define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
    174#define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
    175#define NearBranch  ((u64)1 << 52)  /* Near branches */
    176#define No16	    ((u64)1 << 53)  /* No 16 bit operand */
    177#define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
    178#define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
    179#define IsBranch    ((u64)1 << 56)  /* Instruction is considered a branch. */
    180
    181#define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
    182
    183#define X2(x...) x, x
    184#define X3(x...) X2(x), x
    185#define X4(x...) X2(x), X2(x)
    186#define X5(x...) X4(x), x
    187#define X6(x...) X4(x), X2(x)
    188#define X7(x...) X4(x), X3(x)
    189#define X8(x...) X4(x), X4(x)
    190#define X16(x...) X8(x), X8(x)
    191
    192#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
    193#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT))
    194
    195struct opcode {
    196	u64 flags;
    197	u8 intercept;
    198	u8 pad[7];
    199	union {
    200		int (*execute)(struct x86_emulate_ctxt *ctxt);
    201		const struct opcode *group;
    202		const struct group_dual *gdual;
    203		const struct gprefix *gprefix;
    204		const struct escape *esc;
    205		const struct instr_dual *idual;
    206		const struct mode_dual *mdual;
    207		void (*fastop)(struct fastop *fake);
    208	} u;
    209	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
    210};
    211
    212struct group_dual {
    213	struct opcode mod012[8];
    214	struct opcode mod3[8];
    215};
    216
    217struct gprefix {
    218	struct opcode pfx_no;
    219	struct opcode pfx_66;
    220	struct opcode pfx_f2;
    221	struct opcode pfx_f3;
    222};
    223
    224struct escape {
    225	struct opcode op[8];
    226	struct opcode high[64];
    227};
    228
    229struct instr_dual {
    230	struct opcode mod012;
    231	struct opcode mod3;
    232};
    233
    234struct mode_dual {
    235	struct opcode mode32;
    236	struct opcode mode64;
    237};
    238
    239#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
    240
    241enum x86_transfer_type {
    242	X86_TRANSFER_NONE,
    243	X86_TRANSFER_CALL_JMP,
    244	X86_TRANSFER_RET,
    245	X86_TRANSFER_TASK_SWITCH,
    246};
    247
    248static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
    249{
    250	if (!(ctxt->regs_valid & (1 << nr))) {
    251		ctxt->regs_valid |= 1 << nr;
    252		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
    253	}
    254	return ctxt->_regs[nr];
    255}
    256
    257static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
    258{
    259	ctxt->regs_valid |= 1 << nr;
    260	ctxt->regs_dirty |= 1 << nr;
    261	return &ctxt->_regs[nr];
    262}
    263
    264static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
    265{
    266	reg_read(ctxt, nr);
    267	return reg_write(ctxt, nr);
    268}
    269
    270static void writeback_registers(struct x86_emulate_ctxt *ctxt)
    271{
    272	unsigned reg;
    273
    274	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
    275		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
    276}
    277
    278static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
    279{
    280	ctxt->regs_dirty = 0;
    281	ctxt->regs_valid = 0;
    282}
    283
    284/*
    285 * These EFLAGS bits are restored from saved value during emulation, and
    286 * any changes are written back to the saved value after emulation.
    287 */
    288#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
    289		     X86_EFLAGS_PF|X86_EFLAGS_CF)
    290
    291#ifdef CONFIG_X86_64
    292#define ON64(x) x
    293#else
    294#define ON64(x)
    295#endif
    296
    297/*
    298 * fastop functions have a special calling convention:
    299 *
    300 * dst:    rax        (in/out)
    301 * src:    rdx        (in/out)
    302 * src2:   rcx        (in)
    303 * flags:  rflags     (in/out)
    304 * ex:     rsi        (in:fastop pointer, out:zero if exception)
    305 *
    306 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
    307 * different operand sizes can be reached by calculation, rather than a jump
    308 * table (which would be bigger than the code).
    309 */
    310static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
    311
    312#define __FOP_FUNC(name) \
    313	".align " __stringify(FASTOP_SIZE) " \n\t" \
    314	".type " name ", @function \n\t" \
    315	name ":\n\t" \
    316	ASM_ENDBR
    317
    318#define FOP_FUNC(name) \
    319	__FOP_FUNC(#name)
    320
    321#define __FOP_RET(name) \
    322	"11: " ASM_RET \
    323	".size " name ", .-" name "\n\t"
    324
    325#define FOP_RET(name) \
    326	__FOP_RET(#name)
    327
    328#define FOP_START(op) \
    329	extern void em_##op(struct fastop *fake); \
    330	asm(".pushsection .text, \"ax\" \n\t" \
    331	    ".global em_" #op " \n\t" \
    332	    ".align " __stringify(FASTOP_SIZE) " \n\t" \
    333	    "em_" #op ":\n\t"
    334
    335#define FOP_END \
    336	    ".popsection")
    337
    338#define __FOPNOP(name) \
    339	__FOP_FUNC(name) \
    340	__FOP_RET(name)
    341
    342#define FOPNOP() \
    343	__FOPNOP(__stringify(__UNIQUE_ID(nop)))
    344
    345#define FOP1E(op,  dst) \
    346	__FOP_FUNC(#op "_" #dst) \
    347	"10: " #op " %" #dst " \n\t" \
    348	__FOP_RET(#op "_" #dst)
    349
    350#define FOP1EEX(op,  dst) \
    351	FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
    352
    353#define FASTOP1(op) \
    354	FOP_START(op) \
    355	FOP1E(op##b, al) \
    356	FOP1E(op##w, ax) \
    357	FOP1E(op##l, eax) \
    358	ON64(FOP1E(op##q, rax))	\
    359	FOP_END
    360
    361/* 1-operand, using src2 (for MUL/DIV r/m) */
    362#define FASTOP1SRC2(op, name) \
    363	FOP_START(name) \
    364	FOP1E(op, cl) \
    365	FOP1E(op, cx) \
    366	FOP1E(op, ecx) \
    367	ON64(FOP1E(op, rcx)) \
    368	FOP_END
    369
    370/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
    371#define FASTOP1SRC2EX(op, name) \
    372	FOP_START(name) \
    373	FOP1EEX(op, cl) \
    374	FOP1EEX(op, cx) \
    375	FOP1EEX(op, ecx) \
    376	ON64(FOP1EEX(op, rcx)) \
    377	FOP_END
    378
    379#define FOP2E(op,  dst, src)	   \
    380	__FOP_FUNC(#op "_" #dst "_" #src) \
    381	#op " %" #src ", %" #dst " \n\t" \
    382	__FOP_RET(#op "_" #dst "_" #src)
    383
    384#define FASTOP2(op) \
    385	FOP_START(op) \
    386	FOP2E(op##b, al, dl) \
    387	FOP2E(op##w, ax, dx) \
    388	FOP2E(op##l, eax, edx) \
    389	ON64(FOP2E(op##q, rax, rdx)) \
    390	FOP_END
    391
    392/* 2 operand, word only */
    393#define FASTOP2W(op) \
    394	FOP_START(op) \
    395	FOPNOP() \
    396	FOP2E(op##w, ax, dx) \
    397	FOP2E(op##l, eax, edx) \
    398	ON64(FOP2E(op##q, rax, rdx)) \
    399	FOP_END
    400
    401/* 2 operand, src is CL */
    402#define FASTOP2CL(op) \
    403	FOP_START(op) \
    404	FOP2E(op##b, al, cl) \
    405	FOP2E(op##w, ax, cl) \
    406	FOP2E(op##l, eax, cl) \
    407	ON64(FOP2E(op##q, rax, cl)) \
    408	FOP_END
    409
    410/* 2 operand, src and dest are reversed */
    411#define FASTOP2R(op, name) \
    412	FOP_START(name) \
    413	FOP2E(op##b, dl, al) \
    414	FOP2E(op##w, dx, ax) \
    415	FOP2E(op##l, edx, eax) \
    416	ON64(FOP2E(op##q, rdx, rax)) \
    417	FOP_END
    418
    419#define FOP3E(op,  dst, src, src2) \
    420	__FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
    421	#op " %" #src2 ", %" #src ", %" #dst " \n\t"\
    422	__FOP_RET(#op "_" #dst "_" #src "_" #src2)
    423
    424/* 3-operand, word-only, src2=cl */
    425#define FASTOP3WCL(op) \
    426	FOP_START(op) \
    427	FOPNOP() \
    428	FOP3E(op##w, ax, dx, cl) \
    429	FOP3E(op##l, eax, edx, cl) \
    430	ON64(FOP3E(op##q, rax, rdx, cl)) \
    431	FOP_END
    432
    433/* Special case for SETcc - 1 instruction per cc */
    434
    435/*
    436 * Depending on .config the SETcc functions look like:
    437 *
    438 * ENDBR       [4 bytes; CONFIG_X86_KERNEL_IBT]
    439 * SETcc %al   [3 bytes]
    440 * RET         [1 byte]
    441 * INT3        [1 byte; CONFIG_SLS]
    442 *
    443 * Which gives possible sizes 4, 5, 8 or 9.  When rounded up to the
    444 * next power-of-two alignment they become 4, 8 or 16 resp.
    445 */
    446#define SETCC_LENGTH	(ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS))
    447#define SETCC_ALIGN	(4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT)
    448static_assert(SETCC_LENGTH <= SETCC_ALIGN);
    449
    450#define FOP_SETCC(op) \
    451	".align " __stringify(SETCC_ALIGN) " \n\t" \
    452	".type " #op ", @function \n\t" \
    453	#op ": \n\t" \
    454	ASM_ENDBR \
    455	#op " %al \n\t" \
    456	__FOP_RET(#op)
    457
    458FOP_START(setcc)
    459FOP_SETCC(seto)
    460FOP_SETCC(setno)
    461FOP_SETCC(setc)
    462FOP_SETCC(setnc)
    463FOP_SETCC(setz)
    464FOP_SETCC(setnz)
    465FOP_SETCC(setbe)
    466FOP_SETCC(setnbe)
    467FOP_SETCC(sets)
    468FOP_SETCC(setns)
    469FOP_SETCC(setp)
    470FOP_SETCC(setnp)
    471FOP_SETCC(setl)
    472FOP_SETCC(setnl)
    473FOP_SETCC(setle)
    474FOP_SETCC(setnle)
    475FOP_END;
    476
    477FOP_START(salc)
    478FOP_FUNC(salc)
    479"pushf; sbb %al, %al; popf \n\t"
    480FOP_RET(salc)
    481FOP_END;
    482
    483/*
    484 * XXX: inoutclob user must know where the argument is being expanded.
    485 *      Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault.
    486 */
    487#define asm_safe(insn, inoutclob...) \
    488({ \
    489	int _fault = 0; \
    490 \
    491	asm volatile("1:" insn "\n" \
    492	             "2:\n" \
    493		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
    494	             : [_fault] "+r"(_fault) inoutclob ); \
    495 \
    496	_fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
    497})
    498
    499static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
    500				    enum x86_intercept intercept,
    501				    enum x86_intercept_stage stage)
    502{
    503	struct x86_instruction_info info = {
    504		.intercept  = intercept,
    505		.rep_prefix = ctxt->rep_prefix,
    506		.modrm_mod  = ctxt->modrm_mod,
    507		.modrm_reg  = ctxt->modrm_reg,
    508		.modrm_rm   = ctxt->modrm_rm,
    509		.src_val    = ctxt->src.val64,
    510		.dst_val    = ctxt->dst.val64,
    511		.src_bytes  = ctxt->src.bytes,
    512		.dst_bytes  = ctxt->dst.bytes,
    513		.ad_bytes   = ctxt->ad_bytes,
    514		.next_rip   = ctxt->eip,
    515	};
    516
    517	return ctxt->ops->intercept(ctxt, &info, stage);
    518}
    519
    520static void assign_masked(ulong *dest, ulong src, ulong mask)
    521{
    522	*dest = (*dest & ~mask) | (src & mask);
    523}
    524
    525static void assign_register(unsigned long *reg, u64 val, int bytes)
    526{
    527	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
    528	switch (bytes) {
    529	case 1:
    530		*(u8 *)reg = (u8)val;
    531		break;
    532	case 2:
    533		*(u16 *)reg = (u16)val;
    534		break;
    535	case 4:
    536		*reg = (u32)val;
    537		break;	/* 64b: zero-extend */
    538	case 8:
    539		*reg = val;
    540		break;
    541	}
    542}
    543
    544static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
    545{
    546	return (1UL << (ctxt->ad_bytes << 3)) - 1;
    547}
    548
    549static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
    550{
    551	u16 sel;
    552	struct desc_struct ss;
    553
    554	if (ctxt->mode == X86EMUL_MODE_PROT64)
    555		return ~0UL;
    556	ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
    557	return ~0U >> ((ss.d ^ 1) * 16);  /* d=0: 0xffff; d=1: 0xffffffff */
    558}
    559
    560static int stack_size(struct x86_emulate_ctxt *ctxt)
    561{
    562	return (__fls(stack_mask(ctxt)) + 1) >> 3;
    563}
    564
    565/* Access/update address held in a register, based on addressing mode. */
    566static inline unsigned long
    567address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
    568{
    569	if (ctxt->ad_bytes == sizeof(unsigned long))
    570		return reg;
    571	else
    572		return reg & ad_mask(ctxt);
    573}
    574
    575static inline unsigned long
    576register_address(struct x86_emulate_ctxt *ctxt, int reg)
    577{
    578	return address_mask(ctxt, reg_read(ctxt, reg));
    579}
    580
    581static void masked_increment(ulong *reg, ulong mask, int inc)
    582{
    583	assign_masked(reg, *reg + inc, mask);
    584}
    585
    586static inline void
    587register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
    588{
    589	ulong *preg = reg_rmw(ctxt, reg);
    590
    591	assign_register(preg, *preg + inc, ctxt->ad_bytes);
    592}
    593
    594static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
    595{
    596	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
    597}
    598
    599static u32 desc_limit_scaled(struct desc_struct *desc)
    600{
    601	u32 limit = get_desc_limit(desc);
    602
    603	return desc->g ? (limit << 12) | 0xfff : limit;
    604}
    605
    606static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
    607{
    608	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
    609		return 0;
    610
    611	return ctxt->ops->get_cached_segment_base(ctxt, seg);
    612}
    613
    614static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
    615			     u32 error, bool valid)
    616{
    617	WARN_ON(vec > 0x1f);
    618	ctxt->exception.vector = vec;
    619	ctxt->exception.error_code = error;
    620	ctxt->exception.error_code_valid = valid;
    621	return X86EMUL_PROPAGATE_FAULT;
    622}
    623
    624static int emulate_db(struct x86_emulate_ctxt *ctxt)
    625{
    626	return emulate_exception(ctxt, DB_VECTOR, 0, false);
    627}
    628
    629static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
    630{
    631	return emulate_exception(ctxt, GP_VECTOR, err, true);
    632}
    633
    634static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
    635{
    636	return emulate_exception(ctxt, SS_VECTOR, err, true);
    637}
    638
    639static int emulate_ud(struct x86_emulate_ctxt *ctxt)
    640{
    641	return emulate_exception(ctxt, UD_VECTOR, 0, false);
    642}
    643
    644static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
    645{
    646	return emulate_exception(ctxt, TS_VECTOR, err, true);
    647}
    648
    649static int emulate_de(struct x86_emulate_ctxt *ctxt)
    650{
    651	return emulate_exception(ctxt, DE_VECTOR, 0, false);
    652}
    653
    654static int emulate_nm(struct x86_emulate_ctxt *ctxt)
    655{
    656	return emulate_exception(ctxt, NM_VECTOR, 0, false);
    657}
    658
    659static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
    660{
    661	u16 selector;
    662	struct desc_struct desc;
    663
    664	ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
    665	return selector;
    666}
    667
    668static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
    669				 unsigned seg)
    670{
    671	u16 dummy;
    672	u32 base3;
    673	struct desc_struct desc;
    674
    675	ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
    676	ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
    677}
    678
    679static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
    680{
    681	return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
    682}
    683
    684static inline bool emul_is_noncanonical_address(u64 la,
    685						struct x86_emulate_ctxt *ctxt)
    686{
    687	return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
    688}
    689
    690/*
    691 * x86 defines three classes of vector instructions: explicitly
    692 * aligned, explicitly unaligned, and the rest, which change behaviour
    693 * depending on whether they're AVX encoded or not.
    694 *
    695 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
    696 * subject to the same check.  FXSAVE and FXRSTOR are checked here too as their
    697 * 512 bytes of data must be aligned to a 16 byte boundary.
    698 */
    699static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
    700{
    701	u64 alignment = ctxt->d & AlignMask;
    702
    703	if (likely(size < 16))
    704		return 1;
    705
    706	switch (alignment) {
    707	case Unaligned:
    708	case Avx:
    709		return 1;
    710	case Aligned16:
    711		return 16;
    712	case Aligned:
    713	default:
    714		return size;
    715	}
    716}
    717
    718static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
    719				       struct segmented_address addr,
    720				       unsigned *max_size, unsigned size,
    721				       bool write, bool fetch,
    722				       enum x86emul_mode mode, ulong *linear)
    723{
    724	struct desc_struct desc;
    725	bool usable;
    726	ulong la;
    727	u32 lim;
    728	u16 sel;
    729	u8  va_bits;
    730
    731	la = seg_base(ctxt, addr.seg) + addr.ea;
    732	*max_size = 0;
    733	switch (mode) {
    734	case X86EMUL_MODE_PROT64:
    735		*linear = la;
    736		va_bits = ctxt_virt_addr_bits(ctxt);
    737		if (!__is_canonical_address(la, va_bits))
    738			goto bad;
    739
    740		*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
    741		if (size > *max_size)
    742			goto bad;
    743		break;
    744	default:
    745		*linear = la = (u32)la;
    746		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
    747						addr.seg);
    748		if (!usable)
    749			goto bad;
    750		/* code segment in protected mode or read-only data segment */
    751		if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
    752					|| !(desc.type & 2)) && write)
    753			goto bad;
    754		/* unreadable code segment */
    755		if (!fetch && (desc.type & 8) && !(desc.type & 2))
    756			goto bad;
    757		lim = desc_limit_scaled(&desc);
    758		if (!(desc.type & 8) && (desc.type & 4)) {
    759			/* expand-down segment */
    760			if (addr.ea <= lim)
    761				goto bad;
    762			lim = desc.d ? 0xffffffff : 0xffff;
    763		}
    764		if (addr.ea > lim)
    765			goto bad;
    766		if (lim == 0xffffffff)
    767			*max_size = ~0u;
    768		else {
    769			*max_size = (u64)lim + 1 - addr.ea;
    770			if (size > *max_size)
    771				goto bad;
    772		}
    773		break;
    774	}
    775	if (la & (insn_alignment(ctxt, size) - 1))
    776		return emulate_gp(ctxt, 0);
    777	return X86EMUL_CONTINUE;
    778bad:
    779	if (addr.seg == VCPU_SREG_SS)
    780		return emulate_ss(ctxt, 0);
    781	else
    782		return emulate_gp(ctxt, 0);
    783}
    784
    785static int linearize(struct x86_emulate_ctxt *ctxt,
    786		     struct segmented_address addr,
    787		     unsigned size, bool write,
    788		     ulong *linear)
    789{
    790	unsigned max_size;
    791	return __linearize(ctxt, addr, &max_size, size, write, false,
    792			   ctxt->mode, linear);
    793}
    794
    795static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
    796			     enum x86emul_mode mode)
    797{
    798	ulong linear;
    799	int rc;
    800	unsigned max_size;
    801	struct segmented_address addr = { .seg = VCPU_SREG_CS,
    802					   .ea = dst };
    803
    804	if (ctxt->op_bytes != sizeof(unsigned long))
    805		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
    806	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
    807	if (rc == X86EMUL_CONTINUE)
    808		ctxt->_eip = addr.ea;
    809	return rc;
    810}
    811
    812static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
    813{
    814	return assign_eip(ctxt, dst, ctxt->mode);
    815}
    816
    817static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
    818			  const struct desc_struct *cs_desc)
    819{
    820	enum x86emul_mode mode = ctxt->mode;
    821	int rc;
    822
    823#ifdef CONFIG_X86_64
    824	if (ctxt->mode >= X86EMUL_MODE_PROT16) {
    825		if (cs_desc->l) {
    826			u64 efer = 0;
    827
    828			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
    829			if (efer & EFER_LMA)
    830				mode = X86EMUL_MODE_PROT64;
    831		} else
    832			mode = X86EMUL_MODE_PROT32; /* temporary value */
    833	}
    834#endif
    835	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
    836		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
    837	rc = assign_eip(ctxt, dst, mode);
    838	if (rc == X86EMUL_CONTINUE)
    839		ctxt->mode = mode;
    840	return rc;
    841}
    842
    843static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
    844{
    845	return assign_eip_near(ctxt, ctxt->_eip + rel);
    846}
    847
    848static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
    849			      void *data, unsigned size)
    850{
    851	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
    852}
    853
    854static int linear_write_system(struct x86_emulate_ctxt *ctxt,
    855			       ulong linear, void *data,
    856			       unsigned int size)
    857{
    858	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
    859}
    860
    861static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
    862			      struct segmented_address addr,
    863			      void *data,
    864			      unsigned size)
    865{
    866	int rc;
    867	ulong linear;
    868
    869	rc = linearize(ctxt, addr, size, false, &linear);
    870	if (rc != X86EMUL_CONTINUE)
    871		return rc;
    872	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
    873}
    874
    875static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
    876			       struct segmented_address addr,
    877			       void *data,
    878			       unsigned int size)
    879{
    880	int rc;
    881	ulong linear;
    882
    883	rc = linearize(ctxt, addr, size, true, &linear);
    884	if (rc != X86EMUL_CONTINUE)
    885		return rc;
    886	return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
    887}
    888
    889/*
    890 * Prefetch the remaining bytes of the instruction without crossing page
    891 * boundary if they are not in fetch_cache yet.
    892 */
    893static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
    894{
    895	int rc;
    896	unsigned size, max_size;
    897	unsigned long linear;
    898	int cur_size = ctxt->fetch.end - ctxt->fetch.data;
    899	struct segmented_address addr = { .seg = VCPU_SREG_CS,
    900					   .ea = ctxt->eip + cur_size };
    901
    902	/*
    903	 * We do not know exactly how many bytes will be needed, and
    904	 * __linearize is expensive, so fetch as much as possible.  We
    905	 * just have to avoid going beyond the 15 byte limit, the end
    906	 * of the segment, or the end of the page.
    907	 *
    908	 * __linearize is called with size 0 so that it does not do any
    909	 * boundary check itself.  Instead, we use max_size to check
    910	 * against op_size.
    911	 */
    912	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
    913			 &linear);
    914	if (unlikely(rc != X86EMUL_CONTINUE))
    915		return rc;
    916
    917	size = min_t(unsigned, 15UL ^ cur_size, max_size);
    918	size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
    919
    920	/*
    921	 * One instruction can only straddle two pages,
    922	 * and one has been loaded at the beginning of
    923	 * x86_decode_insn.  So, if not enough bytes
    924	 * still, we must have hit the 15-byte boundary.
    925	 */
    926	if (unlikely(size < op_size))
    927		return emulate_gp(ctxt, 0);
    928
    929	rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
    930			      size, &ctxt->exception);
    931	if (unlikely(rc != X86EMUL_CONTINUE))
    932		return rc;
    933	ctxt->fetch.end += size;
    934	return X86EMUL_CONTINUE;
    935}
    936
    937static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
    938					       unsigned size)
    939{
    940	unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
    941
    942	if (unlikely(done_size < size))
    943		return __do_insn_fetch_bytes(ctxt, size - done_size);
    944	else
    945		return X86EMUL_CONTINUE;
    946}
    947
    948/* Fetch next part of the instruction being emulated. */
    949#define insn_fetch(_type, _ctxt)					\
    950({	_type _x;							\
    951									\
    952	rc = do_insn_fetch_bytes(_ctxt, sizeof(_type));			\
    953	if (rc != X86EMUL_CONTINUE)					\
    954		goto done;						\
    955	ctxt->_eip += sizeof(_type);					\
    956	memcpy(&_x, ctxt->fetch.ptr, sizeof(_type));			\
    957	ctxt->fetch.ptr += sizeof(_type);				\
    958	_x;								\
    959})
    960
    961#define insn_fetch_arr(_arr, _size, _ctxt)				\
    962({									\
    963	rc = do_insn_fetch_bytes(_ctxt, _size);				\
    964	if (rc != X86EMUL_CONTINUE)					\
    965		goto done;						\
    966	ctxt->_eip += (_size);						\
    967	memcpy(_arr, ctxt->fetch.ptr, _size);				\
    968	ctxt->fetch.ptr += (_size);					\
    969})
    970
    971/*
    972 * Given the 'reg' portion of a ModRM byte, and a register block, return a
    973 * pointer into the block that addresses the relevant register.
    974 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
    975 */
    976static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
    977			     int byteop)
    978{
    979	void *p;
    980	int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
    981
    982	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
    983		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
    984	else
    985		p = reg_rmw(ctxt, modrm_reg);
    986	return p;
    987}
    988
    989static int read_descriptor(struct x86_emulate_ctxt *ctxt,
    990			   struct segmented_address addr,
    991			   u16 *size, unsigned long *address, int op_bytes)
    992{
    993	int rc;
    994
    995	if (op_bytes == 2)
    996		op_bytes = 3;
    997	*address = 0;
    998	rc = segmented_read_std(ctxt, addr, size, 2);
    999	if (rc != X86EMUL_CONTINUE)
   1000		return rc;
   1001	addr.ea += 2;
   1002	rc = segmented_read_std(ctxt, addr, address, op_bytes);
   1003	return rc;
   1004}
   1005
   1006FASTOP2(add);
   1007FASTOP2(or);
   1008FASTOP2(adc);
   1009FASTOP2(sbb);
   1010FASTOP2(and);
   1011FASTOP2(sub);
   1012FASTOP2(xor);
   1013FASTOP2(cmp);
   1014FASTOP2(test);
   1015
   1016FASTOP1SRC2(mul, mul_ex);
   1017FASTOP1SRC2(imul, imul_ex);
   1018FASTOP1SRC2EX(div, div_ex);
   1019FASTOP1SRC2EX(idiv, idiv_ex);
   1020
   1021FASTOP3WCL(shld);
   1022FASTOP3WCL(shrd);
   1023
   1024FASTOP2W(imul);
   1025
   1026FASTOP1(not);
   1027FASTOP1(neg);
   1028FASTOP1(inc);
   1029FASTOP1(dec);
   1030
   1031FASTOP2CL(rol);
   1032FASTOP2CL(ror);
   1033FASTOP2CL(rcl);
   1034FASTOP2CL(rcr);
   1035FASTOP2CL(shl);
   1036FASTOP2CL(shr);
   1037FASTOP2CL(sar);
   1038
   1039FASTOP2W(bsf);
   1040FASTOP2W(bsr);
   1041FASTOP2W(bt);
   1042FASTOP2W(bts);
   1043FASTOP2W(btr);
   1044FASTOP2W(btc);
   1045
   1046FASTOP2(xadd);
   1047
   1048FASTOP2R(cmp, cmp_r);
   1049
   1050static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
   1051{
   1052	/* If src is zero, do not writeback, but update flags */
   1053	if (ctxt->src.val == 0)
   1054		ctxt->dst.type = OP_NONE;
   1055	return fastop(ctxt, em_bsf);
   1056}
   1057
   1058static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
   1059{
   1060	/* If src is zero, do not writeback, but update flags */
   1061	if (ctxt->src.val == 0)
   1062		ctxt->dst.type = OP_NONE;
   1063	return fastop(ctxt, em_bsr);
   1064}
   1065
   1066static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
   1067{
   1068	u8 rc;
   1069	void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
   1070
   1071	flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
   1072	asm("push %[flags]; popf; " CALL_NOSPEC
   1073	    : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
   1074	return rc;
   1075}
   1076
   1077static void fetch_register_operand(struct operand *op)
   1078{
   1079	switch (op->bytes) {
   1080	case 1:
   1081		op->val = *(u8 *)op->addr.reg;
   1082		break;
   1083	case 2:
   1084		op->val = *(u16 *)op->addr.reg;
   1085		break;
   1086	case 4:
   1087		op->val = *(u32 *)op->addr.reg;
   1088		break;
   1089	case 8:
   1090		op->val = *(u64 *)op->addr.reg;
   1091		break;
   1092	}
   1093}
   1094
   1095static int em_fninit(struct x86_emulate_ctxt *ctxt)
   1096{
   1097	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
   1098		return emulate_nm(ctxt);
   1099
   1100	kvm_fpu_get();
   1101	asm volatile("fninit");
   1102	kvm_fpu_put();
   1103	return X86EMUL_CONTINUE;
   1104}
   1105
   1106static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
   1107{
   1108	u16 fcw;
   1109
   1110	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
   1111		return emulate_nm(ctxt);
   1112
   1113	kvm_fpu_get();
   1114	asm volatile("fnstcw %0": "+m"(fcw));
   1115	kvm_fpu_put();
   1116
   1117	ctxt->dst.val = fcw;
   1118
   1119	return X86EMUL_CONTINUE;
   1120}
   1121
   1122static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
   1123{
   1124	u16 fsw;
   1125
   1126	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
   1127		return emulate_nm(ctxt);
   1128
   1129	kvm_fpu_get();
   1130	asm volatile("fnstsw %0": "+m"(fsw));
   1131	kvm_fpu_put();
   1132
   1133	ctxt->dst.val = fsw;
   1134
   1135	return X86EMUL_CONTINUE;
   1136}
   1137
   1138static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
   1139				    struct operand *op)
   1140{
   1141	unsigned reg = ctxt->modrm_reg;
   1142
   1143	if (!(ctxt->d & ModRM))
   1144		reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
   1145
   1146	if (ctxt->d & Sse) {
   1147		op->type = OP_XMM;
   1148		op->bytes = 16;
   1149		op->addr.xmm = reg;
   1150		kvm_read_sse_reg(reg, &op->vec_val);
   1151		return;
   1152	}
   1153	if (ctxt->d & Mmx) {
   1154		reg &= 7;
   1155		op->type = OP_MM;
   1156		op->bytes = 8;
   1157		op->addr.mm = reg;
   1158		return;
   1159	}
   1160
   1161	op->type = OP_REG;
   1162	op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
   1163	op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
   1164
   1165	fetch_register_operand(op);
   1166	op->orig_val = op->val;
   1167}
   1168
   1169static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
   1170{
   1171	if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
   1172		ctxt->modrm_seg = VCPU_SREG_SS;
   1173}
   1174
   1175static int decode_modrm(struct x86_emulate_ctxt *ctxt,
   1176			struct operand *op)
   1177{
   1178	u8 sib;
   1179	int index_reg, base_reg, scale;
   1180	int rc = X86EMUL_CONTINUE;
   1181	ulong modrm_ea = 0;
   1182
   1183	ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
   1184	index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
   1185	base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
   1186
   1187	ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
   1188	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
   1189	ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
   1190	ctxt->modrm_seg = VCPU_SREG_DS;
   1191
   1192	if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
   1193		op->type = OP_REG;
   1194		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
   1195		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
   1196				ctxt->d & ByteOp);
   1197		if (ctxt->d & Sse) {
   1198			op->type = OP_XMM;
   1199			op->bytes = 16;
   1200			op->addr.xmm = ctxt->modrm_rm;
   1201			kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
   1202			return rc;
   1203		}
   1204		if (ctxt->d & Mmx) {
   1205			op->type = OP_MM;
   1206			op->bytes = 8;
   1207			op->addr.mm = ctxt->modrm_rm & 7;
   1208			return rc;
   1209		}
   1210		fetch_register_operand(op);
   1211		return rc;
   1212	}
   1213
   1214	op->type = OP_MEM;
   1215
   1216	if (ctxt->ad_bytes == 2) {
   1217		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
   1218		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
   1219		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
   1220		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
   1221
   1222		/* 16-bit ModR/M decode. */
   1223		switch (ctxt->modrm_mod) {
   1224		case 0:
   1225			if (ctxt->modrm_rm == 6)
   1226				modrm_ea += insn_fetch(u16, ctxt);
   1227			break;
   1228		case 1:
   1229			modrm_ea += insn_fetch(s8, ctxt);
   1230			break;
   1231		case 2:
   1232			modrm_ea += insn_fetch(u16, ctxt);
   1233			break;
   1234		}
   1235		switch (ctxt->modrm_rm) {
   1236		case 0:
   1237			modrm_ea += bx + si;
   1238			break;
   1239		case 1:
   1240			modrm_ea += bx + di;
   1241			break;
   1242		case 2:
   1243			modrm_ea += bp + si;
   1244			break;
   1245		case 3:
   1246			modrm_ea += bp + di;
   1247			break;
   1248		case 4:
   1249			modrm_ea += si;
   1250			break;
   1251		case 5:
   1252			modrm_ea += di;
   1253			break;
   1254		case 6:
   1255			if (ctxt->modrm_mod != 0)
   1256				modrm_ea += bp;
   1257			break;
   1258		case 7:
   1259			modrm_ea += bx;
   1260			break;
   1261		}
   1262		if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
   1263		    (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
   1264			ctxt->modrm_seg = VCPU_SREG_SS;
   1265		modrm_ea = (u16)modrm_ea;
   1266	} else {
   1267		/* 32/64-bit ModR/M decode. */
   1268		if ((ctxt->modrm_rm & 7) == 4) {
   1269			sib = insn_fetch(u8, ctxt);
   1270			index_reg |= (sib >> 3) & 7;
   1271			base_reg |= sib & 7;
   1272			scale = sib >> 6;
   1273
   1274			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
   1275				modrm_ea += insn_fetch(s32, ctxt);
   1276			else {
   1277				modrm_ea += reg_read(ctxt, base_reg);
   1278				adjust_modrm_seg(ctxt, base_reg);
   1279				/* Increment ESP on POP [ESP] */
   1280				if ((ctxt->d & IncSP) &&
   1281				    base_reg == VCPU_REGS_RSP)
   1282					modrm_ea += ctxt->op_bytes;
   1283			}
   1284			if (index_reg != 4)
   1285				modrm_ea += reg_read(ctxt, index_reg) << scale;
   1286		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
   1287			modrm_ea += insn_fetch(s32, ctxt);
   1288			if (ctxt->mode == X86EMUL_MODE_PROT64)
   1289				ctxt->rip_relative = 1;
   1290		} else {
   1291			base_reg = ctxt->modrm_rm;
   1292			modrm_ea += reg_read(ctxt, base_reg);
   1293			adjust_modrm_seg(ctxt, base_reg);
   1294		}
   1295		switch (ctxt->modrm_mod) {
   1296		case 1:
   1297			modrm_ea += insn_fetch(s8, ctxt);
   1298			break;
   1299		case 2:
   1300			modrm_ea += insn_fetch(s32, ctxt);
   1301			break;
   1302		}
   1303	}
   1304	op->addr.mem.ea = modrm_ea;
   1305	if (ctxt->ad_bytes != 8)
   1306		ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
   1307
   1308done:
   1309	return rc;
   1310}
   1311
   1312static int decode_abs(struct x86_emulate_ctxt *ctxt,
   1313		      struct operand *op)
   1314{
   1315	int rc = X86EMUL_CONTINUE;
   1316
   1317	op->type = OP_MEM;
   1318	switch (ctxt->ad_bytes) {
   1319	case 2:
   1320		op->addr.mem.ea = insn_fetch(u16, ctxt);
   1321		break;
   1322	case 4:
   1323		op->addr.mem.ea = insn_fetch(u32, ctxt);
   1324		break;
   1325	case 8:
   1326		op->addr.mem.ea = insn_fetch(u64, ctxt);
   1327		break;
   1328	}
   1329done:
   1330	return rc;
   1331}
   1332
   1333static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
   1334{
   1335	long sv = 0, mask;
   1336
   1337	if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
   1338		mask = ~((long)ctxt->dst.bytes * 8 - 1);
   1339
   1340		if (ctxt->src.bytes == 2)
   1341			sv = (s16)ctxt->src.val & (s16)mask;
   1342		else if (ctxt->src.bytes == 4)
   1343			sv = (s32)ctxt->src.val & (s32)mask;
   1344		else
   1345			sv = (s64)ctxt->src.val & (s64)mask;
   1346
   1347		ctxt->dst.addr.mem.ea = address_mask(ctxt,
   1348					   ctxt->dst.addr.mem.ea + (sv >> 3));
   1349	}
   1350
   1351	/* only subword offset */
   1352	ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
   1353}
   1354
   1355static int read_emulated(struct x86_emulate_ctxt *ctxt,
   1356			 unsigned long addr, void *dest, unsigned size)
   1357{
   1358	int rc;
   1359	struct read_cache *mc = &ctxt->mem_read;
   1360
   1361	if (mc->pos < mc->end)
   1362		goto read_cached;
   1363
   1364	WARN_ON((mc->end + size) >= sizeof(mc->data));
   1365
   1366	rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
   1367				      &ctxt->exception);
   1368	if (rc != X86EMUL_CONTINUE)
   1369		return rc;
   1370
   1371	mc->end += size;
   1372
   1373read_cached:
   1374	memcpy(dest, mc->data + mc->pos, size);
   1375	mc->pos += size;
   1376	return X86EMUL_CONTINUE;
   1377}
   1378
   1379static int segmented_read(struct x86_emulate_ctxt *ctxt,
   1380			  struct segmented_address addr,
   1381			  void *data,
   1382			  unsigned size)
   1383{
   1384	int rc;
   1385	ulong linear;
   1386
   1387	rc = linearize(ctxt, addr, size, false, &linear);
   1388	if (rc != X86EMUL_CONTINUE)
   1389		return rc;
   1390	return read_emulated(ctxt, linear, data, size);
   1391}
   1392
   1393static int segmented_write(struct x86_emulate_ctxt *ctxt,
   1394			   struct segmented_address addr,
   1395			   const void *data,
   1396			   unsigned size)
   1397{
   1398	int rc;
   1399	ulong linear;
   1400
   1401	rc = linearize(ctxt, addr, size, true, &linear);
   1402	if (rc != X86EMUL_CONTINUE)
   1403		return rc;
   1404	return ctxt->ops->write_emulated(ctxt, linear, data, size,
   1405					 &ctxt->exception);
   1406}
   1407
   1408static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
   1409			     struct segmented_address addr,
   1410			     const void *orig_data, const void *data,
   1411			     unsigned size)
   1412{
   1413	int rc;
   1414	ulong linear;
   1415
   1416	rc = linearize(ctxt, addr, size, true, &linear);
   1417	if (rc != X86EMUL_CONTINUE)
   1418		return rc;
   1419	return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
   1420					   size, &ctxt->exception);
   1421}
   1422
   1423static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
   1424			   unsigned int size, unsigned short port,
   1425			   void *dest)
   1426{
   1427	struct read_cache *rc = &ctxt->io_read;
   1428
   1429	if (rc->pos == rc->end) { /* refill pio read ahead */
   1430		unsigned int in_page, n;
   1431		unsigned int count = ctxt->rep_prefix ?
   1432			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
   1433		in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
   1434			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
   1435			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
   1436		n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
   1437		if (n == 0)
   1438			n = 1;
   1439		rc->pos = rc->end = 0;
   1440		if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
   1441			return 0;
   1442		rc->end = n * size;
   1443	}
   1444
   1445	if (ctxt->rep_prefix && (ctxt->d & String) &&
   1446	    !(ctxt->eflags & X86_EFLAGS_DF)) {
   1447		ctxt->dst.data = rc->data + rc->pos;
   1448		ctxt->dst.type = OP_MEM_STR;
   1449		ctxt->dst.count = (rc->end - rc->pos) / size;
   1450		rc->pos = rc->end;
   1451	} else {
   1452		memcpy(dest, rc->data + rc->pos, size);
   1453		rc->pos += size;
   1454	}
   1455	return 1;
   1456}
   1457
   1458static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
   1459				     u16 index, struct desc_struct *desc)
   1460{
   1461	struct desc_ptr dt;
   1462	ulong addr;
   1463
   1464	ctxt->ops->get_idt(ctxt, &dt);
   1465
   1466	if (dt.size < index * 8 + 7)
   1467		return emulate_gp(ctxt, index << 3 | 0x2);
   1468
   1469	addr = dt.address + index * 8;
   1470	return linear_read_system(ctxt, addr, desc, sizeof(*desc));
   1471}
   1472
   1473static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
   1474				     u16 selector, struct desc_ptr *dt)
   1475{
   1476	const struct x86_emulate_ops *ops = ctxt->ops;
   1477	u32 base3 = 0;
   1478
   1479	if (selector & 1 << 2) {
   1480		struct desc_struct desc;
   1481		u16 sel;
   1482
   1483		memset(dt, 0, sizeof(*dt));
   1484		if (!ops->get_segment(ctxt, &sel, &desc, &base3,
   1485				      VCPU_SREG_LDTR))
   1486			return;
   1487
   1488		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
   1489		dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
   1490	} else
   1491		ops->get_gdt(ctxt, dt);
   1492}
   1493
   1494static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
   1495			      u16 selector, ulong *desc_addr_p)
   1496{
   1497	struct desc_ptr dt;
   1498	u16 index = selector >> 3;
   1499	ulong addr;
   1500
   1501	get_descriptor_table_ptr(ctxt, selector, &dt);
   1502
   1503	if (dt.size < index * 8 + 7)
   1504		return emulate_gp(ctxt, selector & 0xfffc);
   1505
   1506	addr = dt.address + index * 8;
   1507
   1508#ifdef CONFIG_X86_64
   1509	if (addr >> 32 != 0) {
   1510		u64 efer = 0;
   1511
   1512		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
   1513		if (!(efer & EFER_LMA))
   1514			addr &= (u32)-1;
   1515	}
   1516#endif
   1517
   1518	*desc_addr_p = addr;
   1519	return X86EMUL_CONTINUE;
   1520}
   1521
   1522/* allowed just for 8 bytes segments */
   1523static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
   1524				   u16 selector, struct desc_struct *desc,
   1525				   ulong *desc_addr_p)
   1526{
   1527	int rc;
   1528
   1529	rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
   1530	if (rc != X86EMUL_CONTINUE)
   1531		return rc;
   1532
   1533	return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
   1534}
   1535
   1536/* allowed just for 8 bytes segments */
   1537static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
   1538				    u16 selector, struct desc_struct *desc)
   1539{
   1540	int rc;
   1541	ulong addr;
   1542
   1543	rc = get_descriptor_ptr(ctxt, selector, &addr);
   1544	if (rc != X86EMUL_CONTINUE)
   1545		return rc;
   1546
   1547	return linear_write_system(ctxt, addr, desc, sizeof(*desc));
   1548}
   1549
   1550static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
   1551				     u16 selector, int seg, u8 cpl,
   1552				     enum x86_transfer_type transfer,
   1553				     struct desc_struct *desc)
   1554{
   1555	struct desc_struct seg_desc, old_desc;
   1556	u8 dpl, rpl;
   1557	unsigned err_vec = GP_VECTOR;
   1558	u32 err_code = 0;
   1559	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
   1560	ulong desc_addr;
   1561	int ret;
   1562	u16 dummy;
   1563	u32 base3 = 0;
   1564
   1565	memset(&seg_desc, 0, sizeof(seg_desc));
   1566
   1567	if (ctxt->mode == X86EMUL_MODE_REAL) {
   1568		/* set real mode segment descriptor (keep limit etc. for
   1569		 * unreal mode) */
   1570		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
   1571		set_desc_base(&seg_desc, selector << 4);
   1572		goto load;
   1573	} else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
   1574		/* VM86 needs a clean new segment descriptor */
   1575		set_desc_base(&seg_desc, selector << 4);
   1576		set_desc_limit(&seg_desc, 0xffff);
   1577		seg_desc.type = 3;
   1578		seg_desc.p = 1;
   1579		seg_desc.s = 1;
   1580		seg_desc.dpl = 3;
   1581		goto load;
   1582	}
   1583
   1584	rpl = selector & 3;
   1585
   1586	/* TR should be in GDT only */
   1587	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
   1588		goto exception;
   1589
   1590	/* NULL selector is not valid for TR, CS and (except for long mode) SS */
   1591	if (null_selector) {
   1592		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
   1593			goto exception;
   1594
   1595		if (seg == VCPU_SREG_SS) {
   1596			if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
   1597				goto exception;
   1598
   1599			/*
   1600			 * ctxt->ops->set_segment expects the CPL to be in
   1601			 * SS.DPL, so fake an expand-up 32-bit data segment.
   1602			 */
   1603			seg_desc.type = 3;
   1604			seg_desc.p = 1;
   1605			seg_desc.s = 1;
   1606			seg_desc.dpl = cpl;
   1607			seg_desc.d = 1;
   1608			seg_desc.g = 1;
   1609		}
   1610
   1611		/* Skip all following checks */
   1612		goto load;
   1613	}
   1614
   1615	ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
   1616	if (ret != X86EMUL_CONTINUE)
   1617		return ret;
   1618
   1619	err_code = selector & 0xfffc;
   1620	err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
   1621							   GP_VECTOR;
   1622
   1623	/* can't load system descriptor into segment selector */
   1624	if (seg <= VCPU_SREG_GS && !seg_desc.s) {
   1625		if (transfer == X86_TRANSFER_CALL_JMP)
   1626			return X86EMUL_UNHANDLEABLE;
   1627		goto exception;
   1628	}
   1629
   1630	dpl = seg_desc.dpl;
   1631
   1632	switch (seg) {
   1633	case VCPU_SREG_SS:
   1634		/*
   1635		 * segment is not a writable data segment or segment
   1636		 * selector's RPL != CPL or segment selector's RPL != CPL
   1637		 */
   1638		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
   1639			goto exception;
   1640		break;
   1641	case VCPU_SREG_CS:
   1642		if (!(seg_desc.type & 8))
   1643			goto exception;
   1644
   1645		if (transfer == X86_TRANSFER_RET) {
   1646			/* RET can never return to an inner privilege level. */
   1647			if (rpl < cpl)
   1648				goto exception;
   1649			/* Outer-privilege level return is not implemented */
   1650			if (rpl > cpl)
   1651				return X86EMUL_UNHANDLEABLE;
   1652		}
   1653		if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
   1654			if (seg_desc.type & 4) {
   1655				/* conforming */
   1656				if (dpl > rpl)
   1657					goto exception;
   1658			} else {
   1659				/* nonconforming */
   1660				if (dpl != rpl)
   1661					goto exception;
   1662			}
   1663		} else { /* X86_TRANSFER_CALL_JMP */
   1664			if (seg_desc.type & 4) {
   1665				/* conforming */
   1666				if (dpl > cpl)
   1667					goto exception;
   1668			} else {
   1669				/* nonconforming */
   1670				if (rpl > cpl || dpl != cpl)
   1671					goto exception;
   1672			}
   1673		}
   1674		/* in long-mode d/b must be clear if l is set */
   1675		if (seg_desc.d && seg_desc.l) {
   1676			u64 efer = 0;
   1677
   1678			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
   1679			if (efer & EFER_LMA)
   1680				goto exception;
   1681		}
   1682
   1683		/* CS(RPL) <- CPL */
   1684		selector = (selector & 0xfffc) | cpl;
   1685		break;
   1686	case VCPU_SREG_TR:
   1687		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
   1688			goto exception;
   1689		if (!seg_desc.p) {
   1690			err_vec = NP_VECTOR;
   1691			goto exception;
   1692		}
   1693		old_desc = seg_desc;
   1694		seg_desc.type |= 2; /* busy */
   1695		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
   1696						  sizeof(seg_desc), &ctxt->exception);
   1697		if (ret != X86EMUL_CONTINUE)
   1698			return ret;
   1699		break;
   1700	case VCPU_SREG_LDTR:
   1701		if (seg_desc.s || seg_desc.type != 2)
   1702			goto exception;
   1703		break;
   1704	default: /*  DS, ES, FS, or GS */
   1705		/*
   1706		 * segment is not a data or readable code segment or
   1707		 * ((segment is a data or nonconforming code segment)
   1708		 * and (both RPL and CPL > DPL))
   1709		 */
   1710		if ((seg_desc.type & 0xa) == 0x8 ||
   1711		    (((seg_desc.type & 0xc) != 0xc) &&
   1712		     (rpl > dpl && cpl > dpl)))
   1713			goto exception;
   1714		break;
   1715	}
   1716
   1717	if (!seg_desc.p) {
   1718		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
   1719		goto exception;
   1720	}
   1721
   1722	if (seg_desc.s) {
   1723		/* mark segment as accessed */
   1724		if (!(seg_desc.type & 1)) {
   1725			seg_desc.type |= 1;
   1726			ret = write_segment_descriptor(ctxt, selector,
   1727						       &seg_desc);
   1728			if (ret != X86EMUL_CONTINUE)
   1729				return ret;
   1730		}
   1731	} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
   1732		ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
   1733		if (ret != X86EMUL_CONTINUE)
   1734			return ret;
   1735		if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
   1736				((u64)base3 << 32), ctxt))
   1737			return emulate_gp(ctxt, 0);
   1738	}
   1739load:
   1740	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
   1741	if (desc)
   1742		*desc = seg_desc;
   1743	return X86EMUL_CONTINUE;
   1744exception:
   1745	return emulate_exception(ctxt, err_vec, err_code, true);
   1746}
   1747
   1748static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
   1749				   u16 selector, int seg)
   1750{
   1751	u8 cpl = ctxt->ops->cpl(ctxt);
   1752
   1753	/*
   1754	 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
   1755	 * they can load it at CPL<3 (Intel's manual says only LSS can,
   1756	 * but it's wrong).
   1757	 *
   1758	 * However, the Intel manual says that putting IST=1/DPL=3 in
   1759	 * an interrupt gate will result in SS=3 (the AMD manual instead
   1760	 * says it doesn't), so allow SS=3 in __load_segment_descriptor
   1761	 * and only forbid it here.
   1762	 */
   1763	if (seg == VCPU_SREG_SS && selector == 3 &&
   1764	    ctxt->mode == X86EMUL_MODE_PROT64)
   1765		return emulate_exception(ctxt, GP_VECTOR, 0, true);
   1766
   1767	return __load_segment_descriptor(ctxt, selector, seg, cpl,
   1768					 X86_TRANSFER_NONE, NULL);
   1769}
   1770
   1771static void write_register_operand(struct operand *op)
   1772{
   1773	return assign_register(op->addr.reg, op->val, op->bytes);
   1774}
   1775
   1776static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
   1777{
   1778	switch (op->type) {
   1779	case OP_REG:
   1780		write_register_operand(op);
   1781		break;
   1782	case OP_MEM:
   1783		if (ctxt->lock_prefix)
   1784			return segmented_cmpxchg(ctxt,
   1785						 op->addr.mem,
   1786						 &op->orig_val,
   1787						 &op->val,
   1788						 op->bytes);
   1789		else
   1790			return segmented_write(ctxt,
   1791					       op->addr.mem,
   1792					       &op->val,
   1793					       op->bytes);
   1794		break;
   1795	case OP_MEM_STR:
   1796		return segmented_write(ctxt,
   1797				       op->addr.mem,
   1798				       op->data,
   1799				       op->bytes * op->count);
   1800		break;
   1801	case OP_XMM:
   1802		kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
   1803		break;
   1804	case OP_MM:
   1805		kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
   1806		break;
   1807	case OP_NONE:
   1808		/* no writeback */
   1809		break;
   1810	default:
   1811		break;
   1812	}
   1813	return X86EMUL_CONTINUE;
   1814}
   1815
   1816static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
   1817{
   1818	struct segmented_address addr;
   1819
   1820	rsp_increment(ctxt, -bytes);
   1821	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
   1822	addr.seg = VCPU_SREG_SS;
   1823
   1824	return segmented_write(ctxt, addr, data, bytes);
   1825}
   1826
   1827static int em_push(struct x86_emulate_ctxt *ctxt)
   1828{
   1829	/* Disable writeback. */
   1830	ctxt->dst.type = OP_NONE;
   1831	return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
   1832}
   1833
   1834static int emulate_pop(struct x86_emulate_ctxt *ctxt,
   1835		       void *dest, int len)
   1836{
   1837	int rc;
   1838	struct segmented_address addr;
   1839
   1840	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
   1841	addr.seg = VCPU_SREG_SS;
   1842	rc = segmented_read(ctxt, addr, dest, len);
   1843	if (rc != X86EMUL_CONTINUE)
   1844		return rc;
   1845
   1846	rsp_increment(ctxt, len);
   1847	return rc;
   1848}
   1849
   1850static int em_pop(struct x86_emulate_ctxt *ctxt)
   1851{
   1852	return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
   1853}
   1854
   1855static int emulate_popf(struct x86_emulate_ctxt *ctxt,
   1856			void *dest, int len)
   1857{
   1858	int rc;
   1859	unsigned long val, change_mask;
   1860	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
   1861	int cpl = ctxt->ops->cpl(ctxt);
   1862
   1863	rc = emulate_pop(ctxt, &val, len);
   1864	if (rc != X86EMUL_CONTINUE)
   1865		return rc;
   1866
   1867	change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
   1868		      X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
   1869		      X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
   1870		      X86_EFLAGS_AC | X86_EFLAGS_ID;
   1871
   1872	switch(ctxt->mode) {
   1873	case X86EMUL_MODE_PROT64:
   1874	case X86EMUL_MODE_PROT32:
   1875	case X86EMUL_MODE_PROT16:
   1876		if (cpl == 0)
   1877			change_mask |= X86_EFLAGS_IOPL;
   1878		if (cpl <= iopl)
   1879			change_mask |= X86_EFLAGS_IF;
   1880		break;
   1881	case X86EMUL_MODE_VM86:
   1882		if (iopl < 3)
   1883			return emulate_gp(ctxt, 0);
   1884		change_mask |= X86_EFLAGS_IF;
   1885		break;
   1886	default: /* real mode */
   1887		change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
   1888		break;
   1889	}
   1890
   1891	*(unsigned long *)dest =
   1892		(ctxt->eflags & ~change_mask) | (val & change_mask);
   1893
   1894	return rc;
   1895}
   1896
   1897static int em_popf(struct x86_emulate_ctxt *ctxt)
   1898{
   1899	ctxt->dst.type = OP_REG;
   1900	ctxt->dst.addr.reg = &ctxt->eflags;
   1901	ctxt->dst.bytes = ctxt->op_bytes;
   1902	return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
   1903}
   1904
   1905static int em_enter(struct x86_emulate_ctxt *ctxt)
   1906{
   1907	int rc;
   1908	unsigned frame_size = ctxt->src.val;
   1909	unsigned nesting_level = ctxt->src2.val & 31;
   1910	ulong rbp;
   1911
   1912	if (nesting_level)
   1913		return X86EMUL_UNHANDLEABLE;
   1914
   1915	rbp = reg_read(ctxt, VCPU_REGS_RBP);
   1916	rc = push(ctxt, &rbp, stack_size(ctxt));
   1917	if (rc != X86EMUL_CONTINUE)
   1918		return rc;
   1919	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
   1920		      stack_mask(ctxt));
   1921	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
   1922		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
   1923		      stack_mask(ctxt));
   1924	return X86EMUL_CONTINUE;
   1925}
   1926
   1927static int em_leave(struct x86_emulate_ctxt *ctxt)
   1928{
   1929	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
   1930		      stack_mask(ctxt));
   1931	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
   1932}
   1933
   1934static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
   1935{
   1936	int seg = ctxt->src2.val;
   1937
   1938	ctxt->src.val = get_segment_selector(ctxt, seg);
   1939	if (ctxt->op_bytes == 4) {
   1940		rsp_increment(ctxt, -2);
   1941		ctxt->op_bytes = 2;
   1942	}
   1943
   1944	return em_push(ctxt);
   1945}
   1946
   1947static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
   1948{
   1949	int seg = ctxt->src2.val;
   1950	unsigned long selector;
   1951	int rc;
   1952
   1953	rc = emulate_pop(ctxt, &selector, 2);
   1954	if (rc != X86EMUL_CONTINUE)
   1955		return rc;
   1956
   1957	if (ctxt->modrm_reg == VCPU_SREG_SS)
   1958		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
   1959	if (ctxt->op_bytes > 2)
   1960		rsp_increment(ctxt, ctxt->op_bytes - 2);
   1961
   1962	rc = load_segment_descriptor(ctxt, (u16)selector, seg);
   1963	return rc;
   1964}
   1965
   1966static int em_pusha(struct x86_emulate_ctxt *ctxt)
   1967{
   1968	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
   1969	int rc = X86EMUL_CONTINUE;
   1970	int reg = VCPU_REGS_RAX;
   1971
   1972	while (reg <= VCPU_REGS_RDI) {
   1973		(reg == VCPU_REGS_RSP) ?
   1974		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
   1975
   1976		rc = em_push(ctxt);
   1977		if (rc != X86EMUL_CONTINUE)
   1978			return rc;
   1979
   1980		++reg;
   1981	}
   1982
   1983	return rc;
   1984}
   1985
   1986static int em_pushf(struct x86_emulate_ctxt *ctxt)
   1987{
   1988	ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
   1989	return em_push(ctxt);
   1990}
   1991
   1992static int em_popa(struct x86_emulate_ctxt *ctxt)
   1993{
   1994	int rc = X86EMUL_CONTINUE;
   1995	int reg = VCPU_REGS_RDI;
   1996	u32 val;
   1997
   1998	while (reg >= VCPU_REGS_RAX) {
   1999		if (reg == VCPU_REGS_RSP) {
   2000			rsp_increment(ctxt, ctxt->op_bytes);
   2001			--reg;
   2002		}
   2003
   2004		rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
   2005		if (rc != X86EMUL_CONTINUE)
   2006			break;
   2007		assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
   2008		--reg;
   2009	}
   2010	return rc;
   2011}
   2012
   2013static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
   2014{
   2015	const struct x86_emulate_ops *ops = ctxt->ops;
   2016	int rc;
   2017	struct desc_ptr dt;
   2018	gva_t cs_addr;
   2019	gva_t eip_addr;
   2020	u16 cs, eip;
   2021
   2022	/* TODO: Add limit checks */
   2023	ctxt->src.val = ctxt->eflags;
   2024	rc = em_push(ctxt);
   2025	if (rc != X86EMUL_CONTINUE)
   2026		return rc;
   2027
   2028	ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
   2029
   2030	ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
   2031	rc = em_push(ctxt);
   2032	if (rc != X86EMUL_CONTINUE)
   2033		return rc;
   2034
   2035	ctxt->src.val = ctxt->_eip;
   2036	rc = em_push(ctxt);
   2037	if (rc != X86EMUL_CONTINUE)
   2038		return rc;
   2039
   2040	ops->get_idt(ctxt, &dt);
   2041
   2042	eip_addr = dt.address + (irq << 2);
   2043	cs_addr = dt.address + (irq << 2) + 2;
   2044
   2045	rc = linear_read_system(ctxt, cs_addr, &cs, 2);
   2046	if (rc != X86EMUL_CONTINUE)
   2047		return rc;
   2048
   2049	rc = linear_read_system(ctxt, eip_addr, &eip, 2);
   2050	if (rc != X86EMUL_CONTINUE)
   2051		return rc;
   2052
   2053	rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
   2054	if (rc != X86EMUL_CONTINUE)
   2055		return rc;
   2056
   2057	ctxt->_eip = eip;
   2058
   2059	return rc;
   2060}
   2061
   2062int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
   2063{
   2064	int rc;
   2065
   2066	invalidate_registers(ctxt);
   2067	rc = __emulate_int_real(ctxt, irq);
   2068	if (rc == X86EMUL_CONTINUE)
   2069		writeback_registers(ctxt);
   2070	return rc;
   2071}
   2072
   2073static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
   2074{
   2075	switch(ctxt->mode) {
   2076	case X86EMUL_MODE_REAL:
   2077		return __emulate_int_real(ctxt, irq);
   2078	case X86EMUL_MODE_VM86:
   2079	case X86EMUL_MODE_PROT16:
   2080	case X86EMUL_MODE_PROT32:
   2081	case X86EMUL_MODE_PROT64:
   2082	default:
   2083		/* Protected mode interrupts unimplemented yet */
   2084		return X86EMUL_UNHANDLEABLE;
   2085	}
   2086}
   2087
   2088static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
   2089{
   2090	int rc = X86EMUL_CONTINUE;
   2091	unsigned long temp_eip = 0;
   2092	unsigned long temp_eflags = 0;
   2093	unsigned long cs = 0;
   2094	unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
   2095			     X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
   2096			     X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
   2097			     X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
   2098			     X86_EFLAGS_AC | X86_EFLAGS_ID |
   2099			     X86_EFLAGS_FIXED;
   2100	unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
   2101				  X86_EFLAGS_VIP;
   2102
   2103	/* TODO: Add stack limit check */
   2104
   2105	rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
   2106
   2107	if (rc != X86EMUL_CONTINUE)
   2108		return rc;
   2109
   2110	if (temp_eip & ~0xffff)
   2111		return emulate_gp(ctxt, 0);
   2112
   2113	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
   2114
   2115	if (rc != X86EMUL_CONTINUE)
   2116		return rc;
   2117
   2118	rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
   2119
   2120	if (rc != X86EMUL_CONTINUE)
   2121		return rc;
   2122
   2123	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
   2124
   2125	if (rc != X86EMUL_CONTINUE)
   2126		return rc;
   2127
   2128	ctxt->_eip = temp_eip;
   2129
   2130	if (ctxt->op_bytes == 4)
   2131		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
   2132	else if (ctxt->op_bytes == 2) {
   2133		ctxt->eflags &= ~0xffff;
   2134		ctxt->eflags |= temp_eflags;
   2135	}
   2136
   2137	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
   2138	ctxt->eflags |= X86_EFLAGS_FIXED;
   2139	ctxt->ops->set_nmi_mask(ctxt, false);
   2140
   2141	return rc;
   2142}
   2143
   2144static int em_iret(struct x86_emulate_ctxt *ctxt)
   2145{
   2146	switch(ctxt->mode) {
   2147	case X86EMUL_MODE_REAL:
   2148		return emulate_iret_real(ctxt);
   2149	case X86EMUL_MODE_VM86:
   2150	case X86EMUL_MODE_PROT16:
   2151	case X86EMUL_MODE_PROT32:
   2152	case X86EMUL_MODE_PROT64:
   2153	default:
   2154		/* iret from protected mode unimplemented yet */
   2155		return X86EMUL_UNHANDLEABLE;
   2156	}
   2157}
   2158
   2159static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
   2160{
   2161	int rc;
   2162	unsigned short sel;
   2163	struct desc_struct new_desc;
   2164	u8 cpl = ctxt->ops->cpl(ctxt);
   2165
   2166	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
   2167
   2168	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
   2169				       X86_TRANSFER_CALL_JMP,
   2170				       &new_desc);
   2171	if (rc != X86EMUL_CONTINUE)
   2172		return rc;
   2173
   2174	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
   2175	/* Error handling is not implemented. */
   2176	if (rc != X86EMUL_CONTINUE)
   2177		return X86EMUL_UNHANDLEABLE;
   2178
   2179	return rc;
   2180}
   2181
   2182static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
   2183{
   2184	return assign_eip_near(ctxt, ctxt->src.val);
   2185}
   2186
   2187static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
   2188{
   2189	int rc;
   2190	long int old_eip;
   2191
   2192	old_eip = ctxt->_eip;
   2193	rc = assign_eip_near(ctxt, ctxt->src.val);
   2194	if (rc != X86EMUL_CONTINUE)
   2195		return rc;
   2196	ctxt->src.val = old_eip;
   2197	rc = em_push(ctxt);
   2198	return rc;
   2199}
   2200
   2201static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
   2202{
   2203	u64 old = ctxt->dst.orig_val64;
   2204
   2205	if (ctxt->dst.bytes == 16)
   2206		return X86EMUL_UNHANDLEABLE;
   2207
   2208	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
   2209	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
   2210		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
   2211		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
   2212		ctxt->eflags &= ~X86_EFLAGS_ZF;
   2213	} else {
   2214		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
   2215			(u32) reg_read(ctxt, VCPU_REGS_RBX);
   2216
   2217		ctxt->eflags |= X86_EFLAGS_ZF;
   2218	}
   2219	return X86EMUL_CONTINUE;
   2220}
   2221
   2222static int em_ret(struct x86_emulate_ctxt *ctxt)
   2223{
   2224	int rc;
   2225	unsigned long eip;
   2226
   2227	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
   2228	if (rc != X86EMUL_CONTINUE)
   2229		return rc;
   2230
   2231	return assign_eip_near(ctxt, eip);
   2232}
   2233
   2234static int em_ret_far(struct x86_emulate_ctxt *ctxt)
   2235{
   2236	int rc;
   2237	unsigned long eip, cs;
   2238	int cpl = ctxt->ops->cpl(ctxt);
   2239	struct desc_struct new_desc;
   2240
   2241	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
   2242	if (rc != X86EMUL_CONTINUE)
   2243		return rc;
   2244	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
   2245	if (rc != X86EMUL_CONTINUE)
   2246		return rc;
   2247	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
   2248				       X86_TRANSFER_RET,
   2249				       &new_desc);
   2250	if (rc != X86EMUL_CONTINUE)
   2251		return rc;
   2252	rc = assign_eip_far(ctxt, eip, &new_desc);
   2253	/* Error handling is not implemented. */
   2254	if (rc != X86EMUL_CONTINUE)
   2255		return X86EMUL_UNHANDLEABLE;
   2256
   2257	return rc;
   2258}
   2259
   2260static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
   2261{
   2262        int rc;
   2263
   2264        rc = em_ret_far(ctxt);
   2265        if (rc != X86EMUL_CONTINUE)
   2266                return rc;
   2267        rsp_increment(ctxt, ctxt->src.val);
   2268        return X86EMUL_CONTINUE;
   2269}
   2270
   2271static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
   2272{
   2273	/* Save real source value, then compare EAX against destination. */
   2274	ctxt->dst.orig_val = ctxt->dst.val;
   2275	ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
   2276	ctxt->src.orig_val = ctxt->src.val;
   2277	ctxt->src.val = ctxt->dst.orig_val;
   2278	fastop(ctxt, em_cmp);
   2279
   2280	if (ctxt->eflags & X86_EFLAGS_ZF) {
   2281		/* Success: write back to memory; no update of EAX */
   2282		ctxt->src.type = OP_NONE;
   2283		ctxt->dst.val = ctxt->src.orig_val;
   2284	} else {
   2285		/* Failure: write the value we saw to EAX. */
   2286		ctxt->src.type = OP_REG;
   2287		ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
   2288		ctxt->src.val = ctxt->dst.orig_val;
   2289		/* Create write-cycle to dest by writing the same value */
   2290		ctxt->dst.val = ctxt->dst.orig_val;
   2291	}
   2292	return X86EMUL_CONTINUE;
   2293}
   2294
   2295static int em_lseg(struct x86_emulate_ctxt *ctxt)
   2296{
   2297	int seg = ctxt->src2.val;
   2298	unsigned short sel;
   2299	int rc;
   2300
   2301	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
   2302
   2303	rc = load_segment_descriptor(ctxt, sel, seg);
   2304	if (rc != X86EMUL_CONTINUE)
   2305		return rc;
   2306
   2307	ctxt->dst.val = ctxt->src.val;
   2308	return rc;
   2309}
   2310
   2311static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
   2312{
   2313#ifdef CONFIG_X86_64
   2314	return ctxt->ops->guest_has_long_mode(ctxt);
   2315#else
   2316	return false;
   2317#endif
   2318}
   2319
   2320static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
   2321{
   2322	desc->g    = (flags >> 23) & 1;
   2323	desc->d    = (flags >> 22) & 1;
   2324	desc->l    = (flags >> 21) & 1;
   2325	desc->avl  = (flags >> 20) & 1;
   2326	desc->p    = (flags >> 15) & 1;
   2327	desc->dpl  = (flags >> 13) & 3;
   2328	desc->s    = (flags >> 12) & 1;
   2329	desc->type = (flags >>  8) & 15;
   2330}
   2331
   2332static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
   2333			   int n)
   2334{
   2335	struct desc_struct desc;
   2336	int offset;
   2337	u16 selector;
   2338
   2339	selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
   2340
   2341	if (n < 3)
   2342		offset = 0x7f84 + n * 12;
   2343	else
   2344		offset = 0x7f2c + (n - 3) * 12;
   2345
   2346	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
   2347	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
   2348	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
   2349	ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
   2350	return X86EMUL_CONTINUE;
   2351}
   2352
   2353#ifdef CONFIG_X86_64
   2354static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
   2355			   int n)
   2356{
   2357	struct desc_struct desc;
   2358	int offset;
   2359	u16 selector;
   2360	u32 base3;
   2361
   2362	offset = 0x7e00 + n * 16;
   2363
   2364	selector =                GET_SMSTATE(u16, smstate, offset);
   2365	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
   2366	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
   2367	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
   2368	base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
   2369
   2370	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
   2371	return X86EMUL_CONTINUE;
   2372}
   2373#endif
   2374
   2375static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
   2376				    u64 cr0, u64 cr3, u64 cr4)
   2377{
   2378	int bad;
   2379	u64 pcid;
   2380
   2381	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
   2382	pcid = 0;
   2383	if (cr4 & X86_CR4_PCIDE) {
   2384		pcid = cr3 & 0xfff;
   2385		cr3 &= ~0xfff;
   2386	}
   2387
   2388	bad = ctxt->ops->set_cr(ctxt, 3, cr3);
   2389	if (bad)
   2390		return X86EMUL_UNHANDLEABLE;
   2391
   2392	/*
   2393	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
   2394	 * Then enable protected mode.	However, PCID cannot be enabled
   2395	 * if EFER.LMA=0, so set it separately.
   2396	 */
   2397	bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
   2398	if (bad)
   2399		return X86EMUL_UNHANDLEABLE;
   2400
   2401	bad = ctxt->ops->set_cr(ctxt, 0, cr0);
   2402	if (bad)
   2403		return X86EMUL_UNHANDLEABLE;
   2404
   2405	if (cr4 & X86_CR4_PCIDE) {
   2406		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
   2407		if (bad)
   2408			return X86EMUL_UNHANDLEABLE;
   2409		if (pcid) {
   2410			bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
   2411			if (bad)
   2412				return X86EMUL_UNHANDLEABLE;
   2413		}
   2414
   2415	}
   2416
   2417	return X86EMUL_CONTINUE;
   2418}
   2419
   2420static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
   2421			     const char *smstate)
   2422{
   2423	struct desc_struct desc;
   2424	struct desc_ptr dt;
   2425	u16 selector;
   2426	u32 val, cr0, cr3, cr4;
   2427	int i;
   2428
   2429	cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
   2430	cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
   2431	ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
   2432	ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
   2433
   2434	for (i = 0; i < 8; i++)
   2435		*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
   2436
   2437	val = GET_SMSTATE(u32, smstate, 0x7fcc);
   2438
   2439	if (ctxt->ops->set_dr(ctxt, 6, val))
   2440		return X86EMUL_UNHANDLEABLE;
   2441
   2442	val = GET_SMSTATE(u32, smstate, 0x7fc8);
   2443
   2444	if (ctxt->ops->set_dr(ctxt, 7, val))
   2445		return X86EMUL_UNHANDLEABLE;
   2446
   2447	selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
   2448	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
   2449	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
   2450	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
   2451	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
   2452
   2453	selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
   2454	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
   2455	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
   2456	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
   2457	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
   2458
   2459	dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
   2460	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
   2461	ctxt->ops->set_gdt(ctxt, &dt);
   2462
   2463	dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
   2464	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
   2465	ctxt->ops->set_idt(ctxt, &dt);
   2466
   2467	for (i = 0; i < 6; i++) {
   2468		int r = rsm_load_seg_32(ctxt, smstate, i);
   2469		if (r != X86EMUL_CONTINUE)
   2470			return r;
   2471	}
   2472
   2473	cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
   2474
   2475	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
   2476
   2477	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
   2478}
   2479
   2480#ifdef CONFIG_X86_64
   2481static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
   2482			     const char *smstate)
   2483{
   2484	struct desc_struct desc;
   2485	struct desc_ptr dt;
   2486	u64 val, cr0, cr3, cr4;
   2487	u32 base3;
   2488	u16 selector;
   2489	int i, r;
   2490
   2491	for (i = 0; i < 16; i++)
   2492		*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
   2493
   2494	ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
   2495	ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
   2496
   2497	val = GET_SMSTATE(u64, smstate, 0x7f68);
   2498
   2499	if (ctxt->ops->set_dr(ctxt, 6, val))
   2500		return X86EMUL_UNHANDLEABLE;
   2501
   2502	val = GET_SMSTATE(u64, smstate, 0x7f60);
   2503
   2504	if (ctxt->ops->set_dr(ctxt, 7, val))
   2505		return X86EMUL_UNHANDLEABLE;
   2506
   2507	cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
   2508	cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
   2509	cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
   2510	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
   2511	val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
   2512
   2513	if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
   2514		return X86EMUL_UNHANDLEABLE;
   2515
   2516	selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
   2517	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
   2518	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
   2519	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
   2520	base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
   2521	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
   2522
   2523	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
   2524	dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
   2525	ctxt->ops->set_idt(ctxt, &dt);
   2526
   2527	selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
   2528	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
   2529	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
   2530	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
   2531	base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
   2532	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
   2533
   2534	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
   2535	dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
   2536	ctxt->ops->set_gdt(ctxt, &dt);
   2537
   2538	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
   2539	if (r != X86EMUL_CONTINUE)
   2540		return r;
   2541
   2542	for (i = 0; i < 6; i++) {
   2543		r = rsm_load_seg_64(ctxt, smstate, i);
   2544		if (r != X86EMUL_CONTINUE)
   2545			return r;
   2546	}
   2547
   2548	return X86EMUL_CONTINUE;
   2549}
   2550#endif
   2551
   2552static int em_rsm(struct x86_emulate_ctxt *ctxt)
   2553{
   2554	unsigned long cr0, cr4, efer;
   2555	char buf[512];
   2556	u64 smbase;
   2557	int ret;
   2558
   2559	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
   2560		return emulate_ud(ctxt);
   2561
   2562	smbase = ctxt->ops->get_smbase(ctxt);
   2563
   2564	ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
   2565	if (ret != X86EMUL_CONTINUE)
   2566		return X86EMUL_UNHANDLEABLE;
   2567
   2568	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
   2569		ctxt->ops->set_nmi_mask(ctxt, false);
   2570
   2571	ctxt->ops->exiting_smm(ctxt);
   2572
   2573	/*
   2574	 * Get back to real mode, to prepare a safe state in which to load
   2575	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
   2576	 * supports long mode.
   2577	 */
   2578	if (emulator_has_longmode(ctxt)) {
   2579		struct desc_struct cs_desc;
   2580
   2581		/* Zero CR4.PCIDE before CR0.PG.  */
   2582		cr4 = ctxt->ops->get_cr(ctxt, 4);
   2583		if (cr4 & X86_CR4_PCIDE)
   2584			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
   2585
   2586		/* A 32-bit code segment is required to clear EFER.LMA.  */
   2587		memset(&cs_desc, 0, sizeof(cs_desc));
   2588		cs_desc.type = 0xb;
   2589		cs_desc.s = cs_desc.g = cs_desc.p = 1;
   2590		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
   2591	}
   2592
   2593	/* For the 64-bit case, this will clear EFER.LMA.  */
   2594	cr0 = ctxt->ops->get_cr(ctxt, 0);
   2595	if (cr0 & X86_CR0_PE)
   2596		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
   2597
   2598	if (emulator_has_longmode(ctxt)) {
   2599		/* Clear CR4.PAE before clearing EFER.LME. */
   2600		cr4 = ctxt->ops->get_cr(ctxt, 4);
   2601		if (cr4 & X86_CR4_PAE)
   2602			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
   2603
   2604		/* And finally go back to 32-bit mode.  */
   2605		efer = 0;
   2606		ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
   2607	}
   2608
   2609	/*
   2610	 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
   2611	 * state (e.g. enter guest mode) before loading state from the SMM
   2612	 * state-save area.
   2613	 */
   2614	if (ctxt->ops->leave_smm(ctxt, buf))
   2615		goto emulate_shutdown;
   2616
   2617#ifdef CONFIG_X86_64
   2618	if (emulator_has_longmode(ctxt))
   2619		ret = rsm_load_state_64(ctxt, buf);
   2620	else
   2621#endif
   2622		ret = rsm_load_state_32(ctxt, buf);
   2623
   2624	if (ret != X86EMUL_CONTINUE)
   2625		goto emulate_shutdown;
   2626
   2627	/*
   2628	 * Note, the ctxt->ops callbacks are responsible for handling side
   2629	 * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
   2630	 * runtime updates, etc...  If that changes, e.g. this flow is moved
   2631	 * out of the emulator to make it look more like enter_smm(), then
   2632	 * those side effects need to be explicitly handled for both success
   2633	 * and shutdown.
   2634	 */
   2635	return X86EMUL_CONTINUE;
   2636
   2637emulate_shutdown:
   2638	ctxt->ops->triple_fault(ctxt);
   2639	return X86EMUL_CONTINUE;
   2640}
   2641
   2642static void
   2643setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
   2644{
   2645	cs->l = 0;		/* will be adjusted later */
   2646	set_desc_base(cs, 0);	/* flat segment */
   2647	cs->g = 1;		/* 4kb granularity */
   2648	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
   2649	cs->type = 0x0b;	/* Read, Execute, Accessed */
   2650	cs->s = 1;
   2651	cs->dpl = 0;		/* will be adjusted later */
   2652	cs->p = 1;
   2653	cs->d = 1;
   2654	cs->avl = 0;
   2655
   2656	set_desc_base(ss, 0);	/* flat segment */
   2657	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
   2658	ss->g = 1;		/* 4kb granularity */
   2659	ss->s = 1;
   2660	ss->type = 0x03;	/* Read/Write, Accessed */
   2661	ss->d = 1;		/* 32bit stack segment */
   2662	ss->dpl = 0;
   2663	ss->p = 1;
   2664	ss->l = 0;
   2665	ss->avl = 0;
   2666}
   2667
   2668static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
   2669{
   2670	u32 eax, ebx, ecx, edx;
   2671
   2672	eax = ecx = 0;
   2673	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
   2674	return is_guest_vendor_intel(ebx, ecx, edx);
   2675}
   2676
   2677static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
   2678{
   2679	const struct x86_emulate_ops *ops = ctxt->ops;
   2680	u32 eax, ebx, ecx, edx;
   2681
   2682	/*
   2683	 * syscall should always be enabled in longmode - so only become
   2684	 * vendor specific (cpuid) if other modes are active...
   2685	 */
   2686	if (ctxt->mode == X86EMUL_MODE_PROT64)
   2687		return true;
   2688
   2689	eax = 0x00000000;
   2690	ecx = 0x00000000;
   2691	ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
   2692	/*
   2693	 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
   2694	 * 64bit guest with a 32bit compat-app running will #UD !! While this
   2695	 * behaviour can be fixed (by emulating) into AMD response - CPUs of
   2696	 * AMD can't behave like Intel.
   2697	 */
   2698	if (is_guest_vendor_intel(ebx, ecx, edx))
   2699		return false;
   2700
   2701	if (is_guest_vendor_amd(ebx, ecx, edx) ||
   2702	    is_guest_vendor_hygon(ebx, ecx, edx))
   2703		return true;
   2704
   2705	/*
   2706	 * default: (not Intel, not AMD, not Hygon), apply Intel's
   2707	 * stricter rules...
   2708	 */
   2709	return false;
   2710}
   2711
   2712static int em_syscall(struct x86_emulate_ctxt *ctxt)
   2713{
   2714	const struct x86_emulate_ops *ops = ctxt->ops;
   2715	struct desc_struct cs, ss;
   2716	u64 msr_data;
   2717	u16 cs_sel, ss_sel;
   2718	u64 efer = 0;
   2719
   2720	/* syscall is not available in real mode */
   2721	if (ctxt->mode == X86EMUL_MODE_REAL ||
   2722	    ctxt->mode == X86EMUL_MODE_VM86)
   2723		return emulate_ud(ctxt);
   2724
   2725	if (!(em_syscall_is_enabled(ctxt)))
   2726		return emulate_ud(ctxt);
   2727
   2728	ops->get_msr(ctxt, MSR_EFER, &efer);
   2729	if (!(efer & EFER_SCE))
   2730		return emulate_ud(ctxt);
   2731
   2732	setup_syscalls_segments(&cs, &ss);
   2733	ops->get_msr(ctxt, MSR_STAR, &msr_data);
   2734	msr_data >>= 32;
   2735	cs_sel = (u16)(msr_data & 0xfffc);
   2736	ss_sel = (u16)(msr_data + 8);
   2737
   2738	if (efer & EFER_LMA) {
   2739		cs.d = 0;
   2740		cs.l = 1;
   2741	}
   2742	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
   2743	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
   2744
   2745	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
   2746	if (efer & EFER_LMA) {
   2747#ifdef CONFIG_X86_64
   2748		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
   2749
   2750		ops->get_msr(ctxt,
   2751			     ctxt->mode == X86EMUL_MODE_PROT64 ?
   2752			     MSR_LSTAR : MSR_CSTAR, &msr_data);
   2753		ctxt->_eip = msr_data;
   2754
   2755		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
   2756		ctxt->eflags &= ~msr_data;
   2757		ctxt->eflags |= X86_EFLAGS_FIXED;
   2758#endif
   2759	} else {
   2760		/* legacy mode */
   2761		ops->get_msr(ctxt, MSR_STAR, &msr_data);
   2762		ctxt->_eip = (u32)msr_data;
   2763
   2764		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
   2765	}
   2766
   2767	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
   2768	return X86EMUL_CONTINUE;
   2769}
   2770
   2771static int em_sysenter(struct x86_emulate_ctxt *ctxt)
   2772{
   2773	const struct x86_emulate_ops *ops = ctxt->ops;
   2774	struct desc_struct cs, ss;
   2775	u64 msr_data;
   2776	u16 cs_sel, ss_sel;
   2777	u64 efer = 0;
   2778
   2779	ops->get_msr(ctxt, MSR_EFER, &efer);
   2780	/* inject #GP if in real mode */
   2781	if (ctxt->mode == X86EMUL_MODE_REAL)
   2782		return emulate_gp(ctxt, 0);
   2783
   2784	/*
   2785	 * Not recognized on AMD in compat mode (but is recognized in legacy
   2786	 * mode).
   2787	 */
   2788	if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
   2789	    && !vendor_intel(ctxt))
   2790		return emulate_ud(ctxt);
   2791
   2792	/* sysenter/sysexit have not been tested in 64bit mode. */
   2793	if (ctxt->mode == X86EMUL_MODE_PROT64)
   2794		return X86EMUL_UNHANDLEABLE;
   2795
   2796	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
   2797	if ((msr_data & 0xfffc) == 0x0)
   2798		return emulate_gp(ctxt, 0);
   2799
   2800	setup_syscalls_segments(&cs, &ss);
   2801	ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
   2802	cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
   2803	ss_sel = cs_sel + 8;
   2804	if (efer & EFER_LMA) {
   2805		cs.d = 0;
   2806		cs.l = 1;
   2807	}
   2808
   2809	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
   2810	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
   2811
   2812	ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
   2813	ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
   2814
   2815	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
   2816	*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
   2817							      (u32)msr_data;
   2818	if (efer & EFER_LMA)
   2819		ctxt->mode = X86EMUL_MODE_PROT64;
   2820
   2821	return X86EMUL_CONTINUE;
   2822}
   2823
   2824static int em_sysexit(struct x86_emulate_ctxt *ctxt)
   2825{
   2826	const struct x86_emulate_ops *ops = ctxt->ops;
   2827	struct desc_struct cs, ss;
   2828	u64 msr_data, rcx, rdx;
   2829	int usermode;
   2830	u16 cs_sel = 0, ss_sel = 0;
   2831
   2832	/* inject #GP if in real mode or Virtual 8086 mode */
   2833	if (ctxt->mode == X86EMUL_MODE_REAL ||
   2834	    ctxt->mode == X86EMUL_MODE_VM86)
   2835		return emulate_gp(ctxt, 0);
   2836
   2837	setup_syscalls_segments(&cs, &ss);
   2838
   2839	if ((ctxt->rex_prefix & 0x8) != 0x0)
   2840		usermode = X86EMUL_MODE_PROT64;
   2841	else
   2842		usermode = X86EMUL_MODE_PROT32;
   2843
   2844	rcx = reg_read(ctxt, VCPU_REGS_RCX);
   2845	rdx = reg_read(ctxt, VCPU_REGS_RDX);
   2846
   2847	cs.dpl = 3;
   2848	ss.dpl = 3;
   2849	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
   2850	switch (usermode) {
   2851	case X86EMUL_MODE_PROT32:
   2852		cs_sel = (u16)(msr_data + 16);
   2853		if ((msr_data & 0xfffc) == 0x0)
   2854			return emulate_gp(ctxt, 0);
   2855		ss_sel = (u16)(msr_data + 24);
   2856		rcx = (u32)rcx;
   2857		rdx = (u32)rdx;
   2858		break;
   2859	case X86EMUL_MODE_PROT64:
   2860		cs_sel = (u16)(msr_data + 32);
   2861		if (msr_data == 0x0)
   2862			return emulate_gp(ctxt, 0);
   2863		ss_sel = cs_sel + 8;
   2864		cs.d = 0;
   2865		cs.l = 1;
   2866		if (emul_is_noncanonical_address(rcx, ctxt) ||
   2867		    emul_is_noncanonical_address(rdx, ctxt))
   2868			return emulate_gp(ctxt, 0);
   2869		break;
   2870	}
   2871	cs_sel |= SEGMENT_RPL_MASK;
   2872	ss_sel |= SEGMENT_RPL_MASK;
   2873
   2874	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
   2875	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
   2876
   2877	ctxt->_eip = rdx;
   2878	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
   2879
   2880	return X86EMUL_CONTINUE;
   2881}
   2882
   2883static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
   2884{
   2885	int iopl;
   2886	if (ctxt->mode == X86EMUL_MODE_REAL)
   2887		return false;
   2888	if (ctxt->mode == X86EMUL_MODE_VM86)
   2889		return true;
   2890	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
   2891	return ctxt->ops->cpl(ctxt) > iopl;
   2892}
   2893
   2894#define VMWARE_PORT_VMPORT	(0x5658)
   2895#define VMWARE_PORT_VMRPC	(0x5659)
   2896
   2897static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
   2898					    u16 port, u16 len)
   2899{
   2900	const struct x86_emulate_ops *ops = ctxt->ops;
   2901	struct desc_struct tr_seg;
   2902	u32 base3;
   2903	int r;
   2904	u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
   2905	unsigned mask = (1 << len) - 1;
   2906	unsigned long base;
   2907
   2908	/*
   2909	 * VMware allows access to these ports even if denied
   2910	 * by TSS I/O permission bitmap. Mimic behavior.
   2911	 */
   2912	if (enable_vmware_backdoor &&
   2913	    ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
   2914		return true;
   2915
   2916	ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
   2917	if (!tr_seg.p)
   2918		return false;
   2919	if (desc_limit_scaled(&tr_seg) < 103)
   2920		return false;
   2921	base = get_desc_base(&tr_seg);
   2922#ifdef CONFIG_X86_64
   2923	base |= ((u64)base3) << 32;
   2924#endif
   2925	r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
   2926	if (r != X86EMUL_CONTINUE)
   2927		return false;
   2928	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
   2929		return false;
   2930	r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
   2931	if (r != X86EMUL_CONTINUE)
   2932		return false;
   2933	if ((perm >> bit_idx) & mask)
   2934		return false;
   2935	return true;
   2936}
   2937
   2938static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
   2939				 u16 port, u16 len)
   2940{
   2941	if (ctxt->perm_ok)
   2942		return true;
   2943
   2944	if (emulator_bad_iopl(ctxt))
   2945		if (!emulator_io_port_access_allowed(ctxt, port, len))
   2946			return false;
   2947
   2948	ctxt->perm_ok = true;
   2949
   2950	return true;
   2951}
   2952
   2953static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
   2954{
   2955	/*
   2956	 * Intel CPUs mask the counter and pointers in quite strange
   2957	 * manner when ECX is zero due to REP-string optimizations.
   2958	 */
   2959#ifdef CONFIG_X86_64
   2960	if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
   2961		return;
   2962
   2963	*reg_write(ctxt, VCPU_REGS_RCX) = 0;
   2964
   2965	switch (ctxt->b) {
   2966	case 0xa4:	/* movsb */
   2967	case 0xa5:	/* movsd/w */
   2968		*reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
   2969		fallthrough;
   2970	case 0xaa:	/* stosb */
   2971	case 0xab:	/* stosd/w */
   2972		*reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
   2973	}
   2974#endif
   2975}
   2976
   2977static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
   2978				struct tss_segment_16 *tss)
   2979{
   2980	tss->ip = ctxt->_eip;
   2981	tss->flag = ctxt->eflags;
   2982	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
   2983	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
   2984	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
   2985	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
   2986	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
   2987	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
   2988	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
   2989	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
   2990
   2991	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
   2992	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
   2993	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
   2994	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
   2995	tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
   2996}
   2997
   2998static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
   2999				 struct tss_segment_16 *tss)
   3000{
   3001	int ret;
   3002	u8 cpl;
   3003
   3004	ctxt->_eip = tss->ip;
   3005	ctxt->eflags = tss->flag | 2;
   3006	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
   3007	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
   3008	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
   3009	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
   3010	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
   3011	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
   3012	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
   3013	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
   3014
   3015	/*
   3016	 * SDM says that segment selectors are loaded before segment
   3017	 * descriptors
   3018	 */
   3019	set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
   3020	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
   3021	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
   3022	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
   3023	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
   3024
   3025	cpl = tss->cs & 3;
   3026
   3027	/*
   3028	 * Now load segment descriptors. If fault happens at this stage
   3029	 * it is handled in a context of new task
   3030	 */
   3031	ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
   3032					X86_TRANSFER_TASK_SWITCH, NULL);
   3033	if (ret != X86EMUL_CONTINUE)
   3034		return ret;
   3035	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
   3036					X86_TRANSFER_TASK_SWITCH, NULL);
   3037	if (ret != X86EMUL_CONTINUE)
   3038		return ret;
   3039	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
   3040					X86_TRANSFER_TASK_SWITCH, NULL);
   3041	if (ret != X86EMUL_CONTINUE)
   3042		return ret;
   3043	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
   3044					X86_TRANSFER_TASK_SWITCH, NULL);
   3045	if (ret != X86EMUL_CONTINUE)
   3046		return ret;
   3047	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
   3048					X86_TRANSFER_TASK_SWITCH, NULL);
   3049	if (ret != X86EMUL_CONTINUE)
   3050		return ret;
   3051
   3052	return X86EMUL_CONTINUE;
   3053}
   3054
   3055static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
   3056			  ulong old_tss_base, struct desc_struct *new_desc)
   3057{
   3058	struct tss_segment_16 tss_seg;
   3059	int ret;
   3060	u32 new_tss_base = get_desc_base(new_desc);
   3061
   3062	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
   3063	if (ret != X86EMUL_CONTINUE)
   3064		return ret;
   3065
   3066	save_state_to_tss16(ctxt, &tss_seg);
   3067
   3068	ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
   3069	if (ret != X86EMUL_CONTINUE)
   3070		return ret;
   3071
   3072	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
   3073	if (ret != X86EMUL_CONTINUE)
   3074		return ret;
   3075
   3076	if (old_tss_sel != 0xffff) {
   3077		tss_seg.prev_task_link = old_tss_sel;
   3078
   3079		ret = linear_write_system(ctxt, new_tss_base,
   3080					  &tss_seg.prev_task_link,
   3081					  sizeof(tss_seg.prev_task_link));
   3082		if (ret != X86EMUL_CONTINUE)
   3083			return ret;
   3084	}
   3085
   3086	return load_state_from_tss16(ctxt, &tss_seg);
   3087}
   3088
   3089static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
   3090				struct tss_segment_32 *tss)
   3091{
   3092	/* CR3 and ldt selector are not saved intentionally */
   3093	tss->eip = ctxt->_eip;
   3094	tss->eflags = ctxt->eflags;
   3095	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
   3096	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
   3097	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
   3098	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
   3099	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
   3100	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
   3101	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
   3102	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
   3103
   3104	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
   3105	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
   3106	tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
   3107	tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
   3108	tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
   3109	tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
   3110}
   3111
   3112static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
   3113				 struct tss_segment_32 *tss)
   3114{
   3115	int ret;
   3116	u8 cpl;
   3117
   3118	if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
   3119		return emulate_gp(ctxt, 0);
   3120	ctxt->_eip = tss->eip;
   3121	ctxt->eflags = tss->eflags | 2;
   3122
   3123	/* General purpose registers */
   3124	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
   3125	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
   3126	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
   3127	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
   3128	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
   3129	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
   3130	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
   3131	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
   3132
   3133	/*
   3134	 * SDM says that segment selectors are loaded before segment
   3135	 * descriptors.  This is important because CPL checks will
   3136	 * use CS.RPL.
   3137	 */
   3138	set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
   3139	set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
   3140	set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
   3141	set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
   3142	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
   3143	set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
   3144	set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
   3145
   3146	/*
   3147	 * If we're switching between Protected Mode and VM86, we need to make
   3148	 * sure to update the mode before loading the segment descriptors so
   3149	 * that the selectors are interpreted correctly.
   3150	 */
   3151	if (ctxt->eflags & X86_EFLAGS_VM) {
   3152		ctxt->mode = X86EMUL_MODE_VM86;
   3153		cpl = 3;
   3154	} else {
   3155		ctxt->mode = X86EMUL_MODE_PROT32;
   3156		cpl = tss->cs & 3;
   3157	}
   3158
   3159	/*
   3160	 * Now load segment descriptors. If fault happens at this stage
   3161	 * it is handled in a context of new task
   3162	 */
   3163	ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
   3164					cpl, X86_TRANSFER_TASK_SWITCH, NULL);
   3165	if (ret != X86EMUL_CONTINUE)
   3166		return ret;
   3167	ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
   3168					X86_TRANSFER_TASK_SWITCH, NULL);
   3169	if (ret != X86EMUL_CONTINUE)
   3170		return ret;
   3171	ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
   3172					X86_TRANSFER_TASK_SWITCH, NULL);
   3173	if (ret != X86EMUL_CONTINUE)
   3174		return ret;
   3175	ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
   3176					X86_TRANSFER_TASK_SWITCH, NULL);
   3177	if (ret != X86EMUL_CONTINUE)
   3178		return ret;
   3179	ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
   3180					X86_TRANSFER_TASK_SWITCH, NULL);
   3181	if (ret != X86EMUL_CONTINUE)
   3182		return ret;
   3183	ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
   3184					X86_TRANSFER_TASK_SWITCH, NULL);
   3185	if (ret != X86EMUL_CONTINUE)
   3186		return ret;
   3187	ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
   3188					X86_TRANSFER_TASK_SWITCH, NULL);
   3189
   3190	return ret;
   3191}
   3192
   3193static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
   3194			  ulong old_tss_base, struct desc_struct *new_desc)
   3195{
   3196	struct tss_segment_32 tss_seg;
   3197	int ret;
   3198	u32 new_tss_base = get_desc_base(new_desc);
   3199	u32 eip_offset = offsetof(struct tss_segment_32, eip);
   3200	u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
   3201
   3202	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
   3203	if (ret != X86EMUL_CONTINUE)
   3204		return ret;
   3205
   3206	save_state_to_tss32(ctxt, &tss_seg);
   3207
   3208	/* Only GP registers and segment selectors are saved */
   3209	ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
   3210				  ldt_sel_offset - eip_offset);
   3211	if (ret != X86EMUL_CONTINUE)
   3212		return ret;
   3213
   3214	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
   3215	if (ret != X86EMUL_CONTINUE)
   3216		return ret;
   3217
   3218	if (old_tss_sel != 0xffff) {
   3219		tss_seg.prev_task_link = old_tss_sel;
   3220
   3221		ret = linear_write_system(ctxt, new_tss_base,
   3222					  &tss_seg.prev_task_link,
   3223					  sizeof(tss_seg.prev_task_link));
   3224		if (ret != X86EMUL_CONTINUE)
   3225			return ret;
   3226	}
   3227
   3228	return load_state_from_tss32(ctxt, &tss_seg);
   3229}
   3230
   3231static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
   3232				   u16 tss_selector, int idt_index, int reason,
   3233				   bool has_error_code, u32 error_code)
   3234{
   3235	const struct x86_emulate_ops *ops = ctxt->ops;
   3236	struct desc_struct curr_tss_desc, next_tss_desc;
   3237	int ret;
   3238	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
   3239	ulong old_tss_base =
   3240		ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
   3241	u32 desc_limit;
   3242	ulong desc_addr, dr7;
   3243
   3244	/* FIXME: old_tss_base == ~0 ? */
   3245
   3246	ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
   3247	if (ret != X86EMUL_CONTINUE)
   3248		return ret;
   3249	ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
   3250	if (ret != X86EMUL_CONTINUE)
   3251		return ret;
   3252
   3253	/* FIXME: check that next_tss_desc is tss */
   3254
   3255	/*
   3256	 * Check privileges. The three cases are task switch caused by...
   3257	 *
   3258	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
   3259	 * 2. Exception/IRQ/iret: No check is performed
   3260	 * 3. jmp/call to TSS/task-gate: No check is performed since the
   3261	 *    hardware checks it before exiting.
   3262	 */
   3263	if (reason == TASK_SWITCH_GATE) {
   3264		if (idt_index != -1) {
   3265			/* Software interrupts */
   3266			struct desc_struct task_gate_desc;
   3267			int dpl;
   3268
   3269			ret = read_interrupt_descriptor(ctxt, idt_index,
   3270							&task_gate_desc);
   3271			if (ret != X86EMUL_CONTINUE)
   3272				return ret;
   3273
   3274			dpl = task_gate_desc.dpl;
   3275			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
   3276				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
   3277		}
   3278	}
   3279
   3280	desc_limit = desc_limit_scaled(&next_tss_desc);
   3281	if (!next_tss_desc.p ||
   3282	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
   3283	     desc_limit < 0x2b)) {
   3284		return emulate_ts(ctxt, tss_selector & 0xfffc);
   3285	}
   3286
   3287	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
   3288		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
   3289		write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
   3290	}
   3291
   3292	if (reason == TASK_SWITCH_IRET)
   3293		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
   3294
   3295	/* set back link to prev task only if NT bit is set in eflags
   3296	   note that old_tss_sel is not used after this point */
   3297	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
   3298		old_tss_sel = 0xffff;
   3299
   3300	if (next_tss_desc.type & 8)
   3301		ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
   3302	else
   3303		ret = task_switch_16(ctxt, old_tss_sel,
   3304				     old_tss_base, &next_tss_desc);
   3305	if (ret != X86EMUL_CONTINUE)
   3306		return ret;
   3307
   3308	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
   3309		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
   3310
   3311	if (reason != TASK_SWITCH_IRET) {
   3312		next_tss_desc.type |= (1 << 1); /* set busy flag */
   3313		write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
   3314	}
   3315
   3316	ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
   3317	ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
   3318
   3319	if (has_error_code) {
   3320		ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
   3321		ctxt->lock_prefix = 0;
   3322		ctxt->src.val = (unsigned long) error_code;
   3323		ret = em_push(ctxt);
   3324	}
   3325
   3326	ops->get_dr(ctxt, 7, &dr7);
   3327	ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
   3328
   3329	return ret;
   3330}
   3331
   3332int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
   3333			 u16 tss_selector, int idt_index, int reason,
   3334			 bool has_error_code, u32 error_code)
   3335{
   3336	int rc;
   3337
   3338	invalidate_registers(ctxt);
   3339	ctxt->_eip = ctxt->eip;
   3340	ctxt->dst.type = OP_NONE;
   3341
   3342	rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
   3343				     has_error_code, error_code);
   3344
   3345	if (rc == X86EMUL_CONTINUE) {
   3346		ctxt->eip = ctxt->_eip;
   3347		writeback_registers(ctxt);
   3348	}
   3349
   3350	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
   3351}
   3352
   3353static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
   3354		struct operand *op)
   3355{
   3356	int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
   3357
   3358	register_address_increment(ctxt, reg, df * op->bytes);
   3359	op->addr.mem.ea = register_address(ctxt, reg);
   3360}
   3361
   3362static int em_das(struct x86_emulate_ctxt *ctxt)
   3363{
   3364	u8 al, old_al;
   3365	bool af, cf, old_cf;
   3366
   3367	cf = ctxt->eflags & X86_EFLAGS_CF;
   3368	al = ctxt->dst.val;
   3369
   3370	old_al = al;
   3371	old_cf = cf;
   3372	cf = false;
   3373	af = ctxt->eflags & X86_EFLAGS_AF;
   3374	if ((al & 0x0f) > 9 || af) {
   3375		al -= 6;
   3376		cf = old_cf | (al >= 250);
   3377		af = true;
   3378	} else {
   3379		af = false;
   3380	}
   3381	if (old_al > 0x99 || old_cf) {
   3382		al -= 0x60;
   3383		cf = true;
   3384	}
   3385
   3386	ctxt->dst.val = al;
   3387	/* Set PF, ZF, SF */
   3388	ctxt->src.type = OP_IMM;
   3389	ctxt->src.val = 0;
   3390	ctxt->src.bytes = 1;
   3391	fastop(ctxt, em_or);
   3392	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
   3393	if (cf)
   3394		ctxt->eflags |= X86_EFLAGS_CF;
   3395	if (af)
   3396		ctxt->eflags |= X86_EFLAGS_AF;
   3397	return X86EMUL_CONTINUE;
   3398}
   3399
   3400static int em_aam(struct x86_emulate_ctxt *ctxt)
   3401{
   3402	u8 al, ah;
   3403
   3404	if (ctxt->src.val == 0)
   3405		return emulate_de(ctxt);
   3406
   3407	al = ctxt->dst.val & 0xff;
   3408	ah = al / ctxt->src.val;
   3409	al %= ctxt->src.val;
   3410
   3411	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
   3412
   3413	/* Set PF, ZF, SF */
   3414	ctxt->src.type = OP_IMM;
   3415	ctxt->src.val = 0;
   3416	ctxt->src.bytes = 1;
   3417	fastop(ctxt, em_or);
   3418
   3419	return X86EMUL_CONTINUE;
   3420}
   3421
   3422static int em_aad(struct x86_emulate_ctxt *ctxt)
   3423{
   3424	u8 al = ctxt->dst.val & 0xff;
   3425	u8 ah = (ctxt->dst.val >> 8) & 0xff;
   3426
   3427	al = (al + (ah * ctxt->src.val)) & 0xff;
   3428
   3429	ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
   3430
   3431	/* Set PF, ZF, SF */
   3432	ctxt->src.type = OP_IMM;
   3433	ctxt->src.val = 0;
   3434	ctxt->src.bytes = 1;
   3435	fastop(ctxt, em_or);
   3436
   3437	return X86EMUL_CONTINUE;
   3438}
   3439
   3440static int em_call(struct x86_emulate_ctxt *ctxt)
   3441{
   3442	int rc;
   3443	long rel = ctxt->src.val;
   3444
   3445	ctxt->src.val = (unsigned long)ctxt->_eip;
   3446	rc = jmp_rel(ctxt, rel);
   3447	if (rc != X86EMUL_CONTINUE)
   3448		return rc;
   3449	return em_push(ctxt);
   3450}
   3451
   3452static int em_call_far(struct x86_emulate_ctxt *ctxt)
   3453{
   3454	u16 sel, old_cs;
   3455	ulong old_eip;
   3456	int rc;
   3457	struct desc_struct old_desc, new_desc;
   3458	const struct x86_emulate_ops *ops = ctxt->ops;
   3459	int cpl = ctxt->ops->cpl(ctxt);
   3460	enum x86emul_mode prev_mode = ctxt->mode;
   3461
   3462	old_eip = ctxt->_eip;
   3463	ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
   3464
   3465	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
   3466	rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
   3467				       X86_TRANSFER_CALL_JMP, &new_desc);
   3468	if (rc != X86EMUL_CONTINUE)
   3469		return rc;
   3470
   3471	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
   3472	if (rc != X86EMUL_CONTINUE)
   3473		goto fail;
   3474
   3475	ctxt->src.val = old_cs;
   3476	rc = em_push(ctxt);
   3477	if (rc != X86EMUL_CONTINUE)
   3478		goto fail;
   3479
   3480	ctxt->src.val = old_eip;
   3481	rc = em_push(ctxt);
   3482	/* If we failed, we tainted the memory, but the very least we should
   3483	   restore cs */
   3484	if (rc != X86EMUL_CONTINUE) {
   3485		pr_warn_once("faulting far call emulation tainted memory\n");
   3486		goto fail;
   3487	}
   3488	return rc;
   3489fail:
   3490	ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
   3491	ctxt->mode = prev_mode;
   3492	return rc;
   3493
   3494}
   3495
   3496static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
   3497{
   3498	int rc;
   3499	unsigned long eip;
   3500
   3501	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
   3502	if (rc != X86EMUL_CONTINUE)
   3503		return rc;
   3504	rc = assign_eip_near(ctxt, eip);
   3505	if (rc != X86EMUL_CONTINUE)
   3506		return rc;
   3507	rsp_increment(ctxt, ctxt->src.val);
   3508	return X86EMUL_CONTINUE;
   3509}
   3510
   3511static int em_xchg(struct x86_emulate_ctxt *ctxt)
   3512{
   3513	/* Write back the register source. */
   3514	ctxt->src.val = ctxt->dst.val;
   3515	write_register_operand(&ctxt->src);
   3516
   3517	/* Write back the memory destination with implicit LOCK prefix. */
   3518	ctxt->dst.val = ctxt->src.orig_val;
   3519	ctxt->lock_prefix = 1;
   3520	return X86EMUL_CONTINUE;
   3521}
   3522
   3523static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
   3524{
   3525	ctxt->dst.val = ctxt->src2.val;
   3526	return fastop(ctxt, em_imul);
   3527}
   3528
   3529static int em_cwd(struct x86_emulate_ctxt *ctxt)
   3530{
   3531	ctxt->dst.type = OP_REG;
   3532	ctxt->dst.bytes = ctxt->src.bytes;
   3533	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
   3534	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
   3535
   3536	return X86EMUL_CONTINUE;
   3537}
   3538
   3539static int em_rdpid(struct x86_emulate_ctxt *ctxt)
   3540{
   3541	u64 tsc_aux = 0;
   3542
   3543	if (!ctxt->ops->guest_has_rdpid(ctxt))
   3544		return emulate_ud(ctxt);
   3545
   3546	ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
   3547	ctxt->dst.val = tsc_aux;
   3548	return X86EMUL_CONTINUE;
   3549}
   3550
   3551static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
   3552{
   3553	u64 tsc = 0;
   3554
   3555	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
   3556	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
   3557	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
   3558	return X86EMUL_CONTINUE;
   3559}
   3560
   3561static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
   3562{
   3563	u64 pmc;
   3564
   3565	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
   3566		return emulate_gp(ctxt, 0);
   3567	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
   3568	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
   3569	return X86EMUL_CONTINUE;
   3570}
   3571
   3572static int em_mov(struct x86_emulate_ctxt *ctxt)
   3573{
   3574	memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
   3575	return X86EMUL_CONTINUE;
   3576}
   3577
   3578static int em_movbe(struct x86_emulate_ctxt *ctxt)
   3579{
   3580	u16 tmp;
   3581
   3582	if (!ctxt->ops->guest_has_movbe(ctxt))
   3583		return emulate_ud(ctxt);
   3584
   3585	switch (ctxt->op_bytes) {
   3586	case 2:
   3587		/*
   3588		 * From MOVBE definition: "...When the operand size is 16 bits,
   3589		 * the upper word of the destination register remains unchanged
   3590		 * ..."
   3591		 *
   3592		 * Both casting ->valptr and ->val to u16 breaks strict aliasing
   3593		 * rules so we have to do the operation almost per hand.
   3594		 */
   3595		tmp = (u16)ctxt->src.val;
   3596		ctxt->dst.val &= ~0xffffUL;
   3597		ctxt->dst.val |= (unsigned long)swab16(tmp);
   3598		break;
   3599	case 4:
   3600		ctxt->dst.val = swab32((u32)ctxt->src.val);
   3601		break;
   3602	case 8:
   3603		ctxt->dst.val = swab64(ctxt->src.val);
   3604		break;
   3605	default:
   3606		BUG();
   3607	}
   3608	return X86EMUL_CONTINUE;
   3609}
   3610
   3611static int em_cr_write(struct x86_emulate_ctxt *ctxt)
   3612{
   3613	if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
   3614		return emulate_gp(ctxt, 0);
   3615
   3616	/* Disable writeback. */
   3617	ctxt->dst.type = OP_NONE;
   3618	return X86EMUL_CONTINUE;
   3619}
   3620
   3621static int em_dr_write(struct x86_emulate_ctxt *ctxt)
   3622{
   3623	unsigned long val;
   3624
   3625	if (ctxt->mode == X86EMUL_MODE_PROT64)
   3626		val = ctxt->src.val & ~0ULL;
   3627	else
   3628		val = ctxt->src.val & ~0U;
   3629
   3630	/* #UD condition is already handled. */
   3631	if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
   3632		return emulate_gp(ctxt, 0);
   3633
   3634	/* Disable writeback. */
   3635	ctxt->dst.type = OP_NONE;
   3636	return X86EMUL_CONTINUE;
   3637}
   3638
   3639static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
   3640{
   3641	u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
   3642	u64 msr_data;
   3643	int r;
   3644
   3645	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
   3646		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
   3647	r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
   3648
   3649	if (r == X86EMUL_IO_NEEDED)
   3650		return r;
   3651
   3652	if (r > 0)
   3653		return emulate_gp(ctxt, 0);
   3654
   3655	return r < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
   3656}
   3657
   3658static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
   3659{
   3660	u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
   3661	u64 msr_data;
   3662	int r;
   3663
   3664	r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
   3665
   3666	if (r == X86EMUL_IO_NEEDED)
   3667		return r;
   3668
   3669	if (r)
   3670		return emulate_gp(ctxt, 0);
   3671
   3672	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
   3673	*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
   3674	return X86EMUL_CONTINUE;
   3675}
   3676
   3677static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
   3678{
   3679	if (segment > VCPU_SREG_GS &&
   3680	    (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
   3681	    ctxt->ops->cpl(ctxt) > 0)
   3682		return emulate_gp(ctxt, 0);
   3683
   3684	ctxt->dst.val = get_segment_selector(ctxt, segment);
   3685	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
   3686		ctxt->dst.bytes = 2;
   3687	return X86EMUL_CONTINUE;
   3688}
   3689
   3690static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
   3691{
   3692	if (ctxt->modrm_reg > VCPU_SREG_GS)
   3693		return emulate_ud(ctxt);
   3694
   3695	return em_store_sreg(ctxt, ctxt->modrm_reg);
   3696}
   3697
   3698static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
   3699{
   3700	u16 sel = ctxt->src.val;
   3701
   3702	if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
   3703		return emulate_ud(ctxt);
   3704
   3705	if (ctxt->modrm_reg == VCPU_SREG_SS)
   3706		ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
   3707
   3708	/* Disable writeback. */
   3709	ctxt->dst.type = OP_NONE;
   3710	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
   3711}
   3712
   3713static int em_sldt(struct x86_emulate_ctxt *ctxt)
   3714{
   3715	return em_store_sreg(ctxt, VCPU_SREG_LDTR);
   3716}
   3717
   3718static int em_lldt(struct x86_emulate_ctxt *ctxt)
   3719{
   3720	u16 sel = ctxt->src.val;
   3721
   3722	/* Disable writeback. */
   3723	ctxt->dst.type = OP_NONE;
   3724	return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
   3725}
   3726
   3727static int em_str(struct x86_emulate_ctxt *ctxt)
   3728{
   3729	return em_store_sreg(ctxt, VCPU_SREG_TR);
   3730}
   3731
   3732static int em_ltr(struct x86_emulate_ctxt *ctxt)
   3733{
   3734	u16 sel = ctxt->src.val;
   3735
   3736	/* Disable writeback. */
   3737	ctxt->dst.type = OP_NONE;
   3738	return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
   3739}
   3740
   3741static int em_invlpg(struct x86_emulate_ctxt *ctxt)
   3742{
   3743	int rc;
   3744	ulong linear;
   3745
   3746	rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
   3747	if (rc == X86EMUL_CONTINUE)
   3748		ctxt->ops->invlpg(ctxt, linear);
   3749	/* Disable writeback. */
   3750	ctxt->dst.type = OP_NONE;
   3751	return X86EMUL_CONTINUE;
   3752}
   3753
   3754static int em_clts(struct x86_emulate_ctxt *ctxt)
   3755{
   3756	ulong cr0;
   3757
   3758	cr0 = ctxt->ops->get_cr(ctxt, 0);
   3759	cr0 &= ~X86_CR0_TS;
   3760	ctxt->ops->set_cr(ctxt, 0, cr0);
   3761	return X86EMUL_CONTINUE;
   3762}
   3763
   3764static int em_hypercall(struct x86_emulate_ctxt *ctxt)
   3765{
   3766	int rc = ctxt->ops->fix_hypercall(ctxt);
   3767
   3768	if (rc != X86EMUL_CONTINUE)
   3769		return rc;
   3770
   3771	/* Let the processor re-execute the fixed hypercall */
   3772	ctxt->_eip = ctxt->eip;
   3773	/* Disable writeback. */
   3774	ctxt->dst.type = OP_NONE;
   3775	return X86EMUL_CONTINUE;
   3776}
   3777
   3778static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
   3779				  void (*get)(struct x86_emulate_ctxt *ctxt,
   3780					      struct desc_ptr *ptr))
   3781{
   3782	struct desc_ptr desc_ptr;
   3783
   3784	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
   3785	    ctxt->ops->cpl(ctxt) > 0)
   3786		return emulate_gp(ctxt, 0);
   3787
   3788	if (ctxt->mode == X86EMUL_MODE_PROT64)
   3789		ctxt->op_bytes = 8;
   3790	get(ctxt, &desc_ptr);
   3791	if (ctxt->op_bytes == 2) {
   3792		ctxt->op_bytes = 4;
   3793		desc_ptr.address &= 0x00ffffff;
   3794	}
   3795	/* Disable writeback. */
   3796	ctxt->dst.type = OP_NONE;
   3797	return segmented_write_std(ctxt, ctxt->dst.addr.mem,
   3798				   &desc_ptr, 2 + ctxt->op_bytes);
   3799}
   3800
   3801static int em_sgdt(struct x86_emulate_ctxt *ctxt)
   3802{
   3803	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
   3804}
   3805
   3806static int em_sidt(struct x86_emulate_ctxt *ctxt)
   3807{
   3808	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
   3809}
   3810
   3811static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
   3812{
   3813	struct desc_ptr desc_ptr;
   3814	int rc;
   3815
   3816	if (ctxt->mode == X86EMUL_MODE_PROT64)
   3817		ctxt->op_bytes = 8;
   3818	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
   3819			     &desc_ptr.size, &desc_ptr.address,
   3820			     ctxt->op_bytes);
   3821	if (rc != X86EMUL_CONTINUE)
   3822		return rc;
   3823	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
   3824	    emul_is_noncanonical_address(desc_ptr.address, ctxt))
   3825		return emulate_gp(ctxt, 0);
   3826	if (lgdt)
   3827		ctxt->ops->set_gdt(ctxt, &desc_ptr);
   3828	else
   3829		ctxt->ops->set_idt(ctxt, &desc_ptr);
   3830	/* Disable writeback. */
   3831	ctxt->dst.type = OP_NONE;
   3832	return X86EMUL_CONTINUE;
   3833}
   3834
   3835static int em_lgdt(struct x86_emulate_ctxt *ctxt)
   3836{
   3837	return em_lgdt_lidt(ctxt, true);
   3838}
   3839
   3840static int em_lidt(struct x86_emulate_ctxt *ctxt)
   3841{
   3842	return em_lgdt_lidt(ctxt, false);
   3843}
   3844
   3845static int em_smsw(struct x86_emulate_ctxt *ctxt)
   3846{
   3847	if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
   3848	    ctxt->ops->cpl(ctxt) > 0)
   3849		return emulate_gp(ctxt, 0);
   3850
   3851	if (ctxt->dst.type == OP_MEM)
   3852		ctxt->dst.bytes = 2;
   3853	ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
   3854	return X86EMUL_CONTINUE;
   3855}
   3856
   3857static int em_lmsw(struct x86_emulate_ctxt *ctxt)
   3858{
   3859	ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
   3860			  | (ctxt->src.val & 0x0f));
   3861	ctxt->dst.type = OP_NONE;
   3862	return X86EMUL_CONTINUE;
   3863}
   3864
   3865static int em_loop(struct x86_emulate_ctxt *ctxt)
   3866{
   3867	int rc = X86EMUL_CONTINUE;
   3868
   3869	register_address_increment(ctxt, VCPU_REGS_RCX, -1);
   3870	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
   3871	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
   3872		rc = jmp_rel(ctxt, ctxt->src.val);
   3873
   3874	return rc;
   3875}
   3876
   3877static int em_jcxz(struct x86_emulate_ctxt *ctxt)
   3878{
   3879	int rc = X86EMUL_CONTINUE;
   3880
   3881	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
   3882		rc = jmp_rel(ctxt, ctxt->src.val);
   3883
   3884	return rc;
   3885}
   3886
   3887static int em_in(struct x86_emulate_ctxt *ctxt)
   3888{
   3889	if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
   3890			     &ctxt->dst.val))
   3891		return X86EMUL_IO_NEEDED;
   3892
   3893	return X86EMUL_CONTINUE;
   3894}
   3895
   3896static int em_out(struct x86_emulate_ctxt *ctxt)
   3897{
   3898	ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
   3899				    &ctxt->src.val, 1);
   3900	/* Disable writeback. */
   3901	ctxt->dst.type = OP_NONE;
   3902	return X86EMUL_CONTINUE;
   3903}
   3904
   3905static int em_cli(struct x86_emulate_ctxt *ctxt)
   3906{
   3907	if (emulator_bad_iopl(ctxt))
   3908		return emulate_gp(ctxt, 0);
   3909
   3910	ctxt->eflags &= ~X86_EFLAGS_IF;
   3911	return X86EMUL_CONTINUE;
   3912}
   3913
   3914static int em_sti(struct x86_emulate_ctxt *ctxt)
   3915{
   3916	if (emulator_bad_iopl(ctxt))
   3917		return emulate_gp(ctxt, 0);
   3918
   3919	ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
   3920	ctxt->eflags |= X86_EFLAGS_IF;
   3921	return X86EMUL_CONTINUE;
   3922}
   3923
   3924static int em_cpuid(struct x86_emulate_ctxt *ctxt)
   3925{
   3926	u32 eax, ebx, ecx, edx;
   3927	u64 msr = 0;
   3928
   3929	ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
   3930	if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
   3931	    ctxt->ops->cpl(ctxt)) {
   3932		return emulate_gp(ctxt, 0);
   3933	}
   3934
   3935	eax = reg_read(ctxt, VCPU_REGS_RAX);
   3936	ecx = reg_read(ctxt, VCPU_REGS_RCX);
   3937	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
   3938	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
   3939	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
   3940	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
   3941	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
   3942	return X86EMUL_CONTINUE;
   3943}
   3944
   3945static int em_sahf(struct x86_emulate_ctxt *ctxt)
   3946{
   3947	u32 flags;
   3948
   3949	flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
   3950		X86_EFLAGS_SF;
   3951	flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
   3952
   3953	ctxt->eflags &= ~0xffUL;
   3954	ctxt->eflags |= flags | X86_EFLAGS_FIXED;
   3955	return X86EMUL_CONTINUE;
   3956}
   3957
   3958static int em_lahf(struct x86_emulate_ctxt *ctxt)
   3959{
   3960	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
   3961	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
   3962	return X86EMUL_CONTINUE;
   3963}
   3964
   3965static int em_bswap(struct x86_emulate_ctxt *ctxt)
   3966{
   3967	switch (ctxt->op_bytes) {
   3968#ifdef CONFIG_X86_64
   3969	case 8:
   3970		asm("bswap %0" : "+r"(ctxt->dst.val));
   3971		break;
   3972#endif
   3973	default:
   3974		asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
   3975		break;
   3976	}
   3977	return X86EMUL_CONTINUE;
   3978}
   3979
   3980static int em_clflush(struct x86_emulate_ctxt *ctxt)
   3981{
   3982	/* emulating clflush regardless of cpuid */
   3983	return X86EMUL_CONTINUE;
   3984}
   3985
   3986static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
   3987{
   3988	/* emulating clflushopt regardless of cpuid */
   3989	return X86EMUL_CONTINUE;
   3990}
   3991
   3992static int em_movsxd(struct x86_emulate_ctxt *ctxt)
   3993{
   3994	ctxt->dst.val = (s32) ctxt->src.val;
   3995	return X86EMUL_CONTINUE;
   3996}
   3997
   3998static int check_fxsr(struct x86_emulate_ctxt *ctxt)
   3999{
   4000	if (!ctxt->ops->guest_has_fxsr(ctxt))
   4001		return emulate_ud(ctxt);
   4002
   4003	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
   4004		return emulate_nm(ctxt);
   4005
   4006	/*
   4007	 * Don't emulate a case that should never be hit, instead of working
   4008	 * around a lack of fxsave64/fxrstor64 on old compilers.
   4009	 */
   4010	if (ctxt->mode >= X86EMUL_MODE_PROT64)
   4011		return X86EMUL_UNHANDLEABLE;
   4012
   4013	return X86EMUL_CONTINUE;
   4014}
   4015
   4016/*
   4017 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
   4018 * and restore MXCSR.
   4019 */
   4020static size_t __fxstate_size(int nregs)
   4021{
   4022	return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
   4023}
   4024
   4025static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
   4026{
   4027	bool cr4_osfxsr;
   4028	if (ctxt->mode == X86EMUL_MODE_PROT64)
   4029		return __fxstate_size(16);
   4030
   4031	cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
   4032	return __fxstate_size(cr4_osfxsr ? 8 : 0);
   4033}
   4034
   4035/*
   4036 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
   4037 *  1) 16 bit mode
   4038 *  2) 32 bit mode
   4039 *     - like (1), but FIP and FDP (foo) are only 16 bit.  At least Intel CPUs
   4040 *       preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
   4041 *       save and restore
   4042 *  3) 64-bit mode with REX.W prefix
   4043 *     - like (2), but XMM 8-15 are being saved and restored
   4044 *  4) 64-bit mode without REX.W prefix
   4045 *     - like (3), but FIP and FDP are 64 bit
   4046 *
   4047 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
   4048 * desired result.  (4) is not emulated.
   4049 *
   4050 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
   4051 * and FPU DS) should match.
   4052 */
   4053static int em_fxsave(struct x86_emulate_ctxt *ctxt)
   4054{
   4055	struct fxregs_state fx_state;
   4056	int rc;
   4057
   4058	rc = check_fxsr(ctxt);
   4059	if (rc != X86EMUL_CONTINUE)
   4060		return rc;
   4061
   4062	kvm_fpu_get();
   4063
   4064	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
   4065
   4066	kvm_fpu_put();
   4067
   4068	if (rc != X86EMUL_CONTINUE)
   4069		return rc;
   4070
   4071	return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
   4072		                   fxstate_size(ctxt));
   4073}
   4074
   4075/*
   4076 * FXRSTOR might restore XMM registers not provided by the guest. Fill
   4077 * in the host registers (via FXSAVE) instead, so they won't be modified.
   4078 * (preemption has to stay disabled until FXRSTOR).
   4079 *
   4080 * Use noinline to keep the stack for other functions called by callers small.
   4081 */
   4082static noinline int fxregs_fixup(struct fxregs_state *fx_state,
   4083				 const size_t used_size)
   4084{
   4085	struct fxregs_state fx_tmp;
   4086	int rc;
   4087
   4088	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
   4089	memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
   4090	       __fxstate_size(16) - used_size);
   4091
   4092	return rc;
   4093}
   4094
   4095static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
   4096{
   4097	struct fxregs_state fx_state;
   4098	int rc;
   4099	size_t size;
   4100
   4101	rc = check_fxsr(ctxt);
   4102	if (rc != X86EMUL_CONTINUE)
   4103		return rc;
   4104
   4105	size = fxstate_size(ctxt);
   4106	rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
   4107	if (rc != X86EMUL_CONTINUE)
   4108		return rc;
   4109
   4110	kvm_fpu_get();
   4111
   4112	if (size < __fxstate_size(16)) {
   4113		rc = fxregs_fixup(&fx_state, size);
   4114		if (rc != X86EMUL_CONTINUE)
   4115			goto out;
   4116	}
   4117
   4118	if (fx_state.mxcsr >> 16) {
   4119		rc = emulate_gp(ctxt, 0);
   4120		goto out;
   4121	}
   4122
   4123	if (rc == X86EMUL_CONTINUE)
   4124		rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
   4125
   4126out:
   4127	kvm_fpu_put();
   4128
   4129	return rc;
   4130}
   4131
   4132static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
   4133{
   4134	u32 eax, ecx, edx;
   4135
   4136	eax = reg_read(ctxt, VCPU_REGS_RAX);
   4137	edx = reg_read(ctxt, VCPU_REGS_RDX);
   4138	ecx = reg_read(ctxt, VCPU_REGS_RCX);
   4139
   4140	if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
   4141		return emulate_gp(ctxt, 0);
   4142
   4143	return X86EMUL_CONTINUE;
   4144}
   4145
   4146static bool valid_cr(int nr)
   4147{
   4148	switch (nr) {
   4149	case 0:
   4150	case 2 ... 4:
   4151	case 8:
   4152		return true;
   4153	default:
   4154		return false;
   4155	}
   4156}
   4157
   4158static int check_cr_access(struct x86_emulate_ctxt *ctxt)
   4159{
   4160	if (!valid_cr(ctxt->modrm_reg))
   4161		return emulate_ud(ctxt);
   4162
   4163	return X86EMUL_CONTINUE;
   4164}
   4165
   4166static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
   4167{
   4168	unsigned long dr7;
   4169
   4170	ctxt->ops->get_dr(ctxt, 7, &dr7);
   4171
   4172	/* Check if DR7.Global_Enable is set */
   4173	return dr7 & (1 << 13);
   4174}
   4175
   4176static int check_dr_read(struct x86_emulate_ctxt *ctxt)
   4177{
   4178	int dr = ctxt->modrm_reg;
   4179	u64 cr4;
   4180
   4181	if (dr > 7)
   4182		return emulate_ud(ctxt);
   4183
   4184	cr4 = ctxt->ops->get_cr(ctxt, 4);
   4185	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
   4186		return emulate_ud(ctxt);
   4187
   4188	if (check_dr7_gd(ctxt)) {
   4189		ulong dr6;
   4190
   4191		ctxt->ops->get_dr(ctxt, 6, &dr6);
   4192		dr6 &= ~DR_TRAP_BITS;
   4193		dr6 |= DR6_BD | DR6_ACTIVE_LOW;
   4194		ctxt->ops->set_dr(ctxt, 6, dr6);
   4195		return emulate_db(ctxt);
   4196	}
   4197
   4198	return X86EMUL_CONTINUE;
   4199}
   4200
   4201static int check_dr_write(struct x86_emulate_ctxt *ctxt)
   4202{
   4203	u64 new_val = ctxt->src.val64;
   4204	int dr = ctxt->modrm_reg;
   4205
   4206	if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
   4207		return emulate_gp(ctxt, 0);
   4208
   4209	return check_dr_read(ctxt);
   4210}
   4211
   4212static int check_svme(struct x86_emulate_ctxt *ctxt)
   4213{
   4214	u64 efer = 0;
   4215
   4216	ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
   4217
   4218	if (!(efer & EFER_SVME))
   4219		return emulate_ud(ctxt);
   4220
   4221	return X86EMUL_CONTINUE;
   4222}
   4223
   4224static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
   4225{
   4226	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
   4227
   4228	/* Valid physical address? */
   4229	if (rax & 0xffff000000000000ULL)
   4230		return emulate_gp(ctxt, 0);
   4231
   4232	return check_svme(ctxt);
   4233}
   4234
   4235static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
   4236{
   4237	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
   4238
   4239	if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
   4240		return emulate_gp(ctxt, 0);
   4241
   4242	return X86EMUL_CONTINUE;
   4243}
   4244
   4245static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
   4246{
   4247	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
   4248	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
   4249
   4250	/*
   4251	 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
   4252	 * in Ring3 when CR4.PCE=0.
   4253	 */
   4254	if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
   4255		return X86EMUL_CONTINUE;
   4256
   4257	/*
   4258	 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0.  The CR0.PE
   4259	 * check however is unnecessary because CPL is always 0 outside
   4260	 * protected mode.
   4261	 */
   4262	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
   4263	    ctxt->ops->check_pmc(ctxt, rcx))
   4264		return emulate_gp(ctxt, 0);
   4265
   4266	return X86EMUL_CONTINUE;
   4267}
   4268
   4269static int check_perm_in(struct x86_emulate_ctxt *ctxt)
   4270{
   4271	ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
   4272	if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
   4273		return emulate_gp(ctxt, 0);
   4274
   4275	return X86EMUL_CONTINUE;
   4276}
   4277
   4278static int check_perm_out(struct x86_emulate_ctxt *ctxt)
   4279{
   4280	ctxt->src.bytes = min(ctxt->src.bytes, 4u);
   4281	if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
   4282		return emulate_gp(ctxt, 0);
   4283
   4284	return X86EMUL_CONTINUE;
   4285}
   4286
   4287#define D(_y) { .flags = (_y) }
   4288#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
   4289#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
   4290		      .intercept = x86_intercept_##_i, .check_perm = (_p) }
   4291#define N    D(NotImpl)
   4292#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
   4293#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
   4294#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
   4295#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
   4296#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
   4297#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
   4298#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
   4299#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
   4300#define II(_f, _e, _i) \
   4301	{ .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
   4302#define IIP(_f, _e, _i, _p) \
   4303	{ .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
   4304	  .intercept = x86_intercept_##_i, .check_perm = (_p) }
   4305#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
   4306
   4307#define D2bv(_f)      D((_f) | ByteOp), D(_f)
   4308#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
   4309#define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
   4310#define F2bv(_f, _e)  F((_f) | ByteOp, _e), F(_f, _e)
   4311#define I2bvIP(_f, _e, _i, _p) \
   4312	IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
   4313
   4314#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e),		\
   4315		F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
   4316		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
   4317
   4318static const struct opcode group7_rm0[] = {
   4319	N,
   4320	I(SrcNone | Priv | EmulateOnUD,	em_hypercall),
   4321	N, N, N, N, N, N,
   4322};
   4323
   4324static const struct opcode group7_rm1[] = {
   4325	DI(SrcNone | Priv, monitor),
   4326	DI(SrcNone | Priv, mwait),
   4327	N, N, N, N, N, N,
   4328};
   4329
   4330static const struct opcode group7_rm2[] = {
   4331	N,
   4332	II(ImplicitOps | Priv,			em_xsetbv,	xsetbv),
   4333	N, N, N, N, N, N,
   4334};
   4335
   4336static const struct opcode group7_rm3[] = {
   4337	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
   4338	II(SrcNone  | Prot | EmulateOnUD,	em_hypercall,	vmmcall),
   4339	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
   4340	DIP(SrcNone | Prot | Priv,		vmsave,		check_svme_pa),
   4341	DIP(SrcNone | Prot | Priv,		stgi,		check_svme),
   4342	DIP(SrcNone | Prot | Priv,		clgi,		check_svme),
   4343	DIP(SrcNone | Prot | Priv,		skinit,		check_svme),
   4344	DIP(SrcNone | Prot | Priv,		invlpga,	check_svme),
   4345};
   4346
   4347static const struct opcode group7_rm7[] = {
   4348	N,
   4349	DIP(SrcNone, rdtscp, check_rdtsc),
   4350	N, N, N, N, N, N,
   4351};
   4352
   4353static const struct opcode group1[] = {
   4354	F(Lock, em_add),
   4355	F(Lock | PageTable, em_or),
   4356	F(Lock, em_adc),
   4357	F(Lock, em_sbb),
   4358	F(Lock | PageTable, em_and),
   4359	F(Lock, em_sub),
   4360	F(Lock, em_xor),
   4361	F(NoWrite, em_cmp),
   4362};
   4363
   4364static const struct opcode group1A[] = {
   4365	I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
   4366};
   4367
   4368static const struct opcode group2[] = {
   4369	F(DstMem | ModRM, em_rol),
   4370	F(DstMem | ModRM, em_ror),
   4371	F(DstMem | ModRM, em_rcl),
   4372	F(DstMem | ModRM, em_rcr),
   4373	F(DstMem | ModRM, em_shl),
   4374	F(DstMem | ModRM, em_shr),
   4375	F(DstMem | ModRM, em_shl),
   4376	F(DstMem | ModRM, em_sar),
   4377};
   4378
   4379static const struct opcode group3[] = {
   4380	F(DstMem | SrcImm | NoWrite, em_test),
   4381	F(DstMem | SrcImm | NoWrite, em_test),
   4382	F(DstMem | SrcNone | Lock, em_not),
   4383	F(DstMem | SrcNone | Lock, em_neg),
   4384	F(DstXacc | Src2Mem, em_mul_ex),
   4385	F(DstXacc | Src2Mem, em_imul_ex),
   4386	F(DstXacc | Src2Mem, em_div_ex),
   4387	F(DstXacc | Src2Mem, em_idiv_ex),
   4388};
   4389
   4390static const struct opcode group4[] = {
   4391	F(ByteOp | DstMem | SrcNone | Lock, em_inc),
   4392	F(ByteOp | DstMem | SrcNone | Lock, em_dec),
   4393	N, N, N, N, N, N,
   4394};
   4395
   4396static const struct opcode group5[] = {
   4397	F(DstMem | SrcNone | Lock,		em_inc),
   4398	F(DstMem | SrcNone | Lock,		em_dec),
   4399	I(SrcMem | NearBranch | IsBranch,       em_call_near_abs),
   4400	I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
   4401	I(SrcMem | NearBranch | IsBranch,       em_jmp_abs),
   4402	I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
   4403	I(SrcMem | Stack | TwoMemOp,		em_push), D(Undefined),
   4404};
   4405
   4406static const struct opcode group6[] = {
   4407	II(Prot | DstMem,	   em_sldt, sldt),
   4408	II(Prot | DstMem,	   em_str, str),
   4409	II(Prot | Priv | SrcMem16, em_lldt, lldt),
   4410	II(Prot | Priv | SrcMem16, em_ltr, ltr),
   4411	N, N, N, N,
   4412};
   4413
   4414static const struct group_dual group7 = { {
   4415	II(Mov | DstMem,			em_sgdt, sgdt),
   4416	II(Mov | DstMem,			em_sidt, sidt),
   4417	II(SrcMem | Priv,			em_lgdt, lgdt),
   4418	II(SrcMem | Priv,			em_lidt, lidt),
   4419	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
   4420	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
   4421	II(SrcMem | ByteOp | Priv | NoAccess,	em_invlpg, invlpg),
   4422}, {
   4423	EXT(0, group7_rm0),
   4424	EXT(0, group7_rm1),
   4425	EXT(0, group7_rm2),
   4426	EXT(0, group7_rm3),
   4427	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
   4428	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
   4429	EXT(0, group7_rm7),
   4430} };
   4431
   4432static const struct opcode group8[] = {
   4433	N, N, N, N,
   4434	F(DstMem | SrcImmByte | NoWrite,		em_bt),
   4435	F(DstMem | SrcImmByte | Lock | PageTable,	em_bts),
   4436	F(DstMem | SrcImmByte | Lock,			em_btr),
   4437	F(DstMem | SrcImmByte | Lock | PageTable,	em_btc),
   4438};
   4439
   4440/*
   4441 * The "memory" destination is actually always a register, since we come
   4442 * from the register case of group9.
   4443 */
   4444static const struct gprefix pfx_0f_c7_7 = {
   4445	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
   4446};
   4447
   4448
   4449static const struct group_dual group9 = { {
   4450	N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
   4451}, {
   4452	N, N, N, N, N, N, N,
   4453	GP(0, &pfx_0f_c7_7),
   4454} };
   4455
   4456static const struct opcode group11[] = {
   4457	I(DstMem | SrcImm | Mov | PageTable, em_mov),
   4458	X7(D(Undefined)),
   4459};
   4460
   4461static const struct gprefix pfx_0f_ae_7 = {
   4462	I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
   4463};
   4464
   4465static const struct group_dual group15 = { {
   4466	I(ModRM | Aligned16, em_fxsave),
   4467	I(ModRM | Aligned16, em_fxrstor),
   4468	N, N, N, N, N, GP(0, &pfx_0f_ae_7),
   4469}, {
   4470	N, N, N, N, N, N, N, N,
   4471} };
   4472
   4473static const struct gprefix pfx_0f_6f_0f_7f = {
   4474	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
   4475};
   4476
   4477static const struct instr_dual instr_dual_0f_2b = {
   4478	I(0, em_mov), N
   4479};
   4480
   4481static const struct gprefix pfx_0f_2b = {
   4482	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
   4483};
   4484
   4485static const struct gprefix pfx_0f_10_0f_11 = {
   4486	I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
   4487};
   4488
   4489static const struct gprefix pfx_0f_28_0f_29 = {
   4490	I(Aligned, em_mov), I(Aligned, em_mov), N, N,
   4491};
   4492
   4493static const struct gprefix pfx_0f_e7 = {
   4494	N, I(Sse, em_mov), N, N,
   4495};
   4496
   4497static const struct escape escape_d9 = { {
   4498	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
   4499}, {
   4500	/* 0xC0 - 0xC7 */
   4501	N, N, N, N, N, N, N, N,
   4502	/* 0xC8 - 0xCF */
   4503	N, N, N, N, N, N, N, N,
   4504	/* 0xD0 - 0xC7 */
   4505	N, N, N, N, N, N, N, N,
   4506	/* 0xD8 - 0xDF */
   4507	N, N, N, N, N, N, N, N,
   4508	/* 0xE0 - 0xE7 */
   4509	N, N, N, N, N, N, N, N,
   4510	/* 0xE8 - 0xEF */
   4511	N, N, N, N, N, N, N, N,
   4512	/* 0xF0 - 0xF7 */
   4513	N, N, N, N, N, N, N, N,
   4514	/* 0xF8 - 0xFF */
   4515	N, N, N, N, N, N, N, N,
   4516} };
   4517
   4518static const struct escape escape_db = { {
   4519	N, N, N, N, N, N, N, N,
   4520}, {
   4521	/* 0xC0 - 0xC7 */
   4522	N, N, N, N, N, N, N, N,
   4523	/* 0xC8 - 0xCF */
   4524	N, N, N, N, N, N, N, N,
   4525	/* 0xD0 - 0xC7 */
   4526	N, N, N, N, N, N, N, N,
   4527	/* 0xD8 - 0xDF */
   4528	N, N, N, N, N, N, N, N,
   4529	/* 0xE0 - 0xE7 */
   4530	N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
   4531	/* 0xE8 - 0xEF */
   4532	N, N, N, N, N, N, N, N,
   4533	/* 0xF0 - 0xF7 */
   4534	N, N, N, N, N, N, N, N,
   4535	/* 0xF8 - 0xFF */
   4536	N, N, N, N, N, N, N, N,
   4537} };
   4538
   4539static const struct escape escape_dd = { {
   4540	N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
   4541}, {
   4542	/* 0xC0 - 0xC7 */
   4543	N, N, N, N, N, N, N, N,
   4544	/* 0xC8 - 0xCF */
   4545	N, N, N, N, N, N, N, N,
   4546	/* 0xD0 - 0xC7 */
   4547	N, N, N, N, N, N, N, N,
   4548	/* 0xD8 - 0xDF */
   4549	N, N, N, N, N, N, N, N,
   4550	/* 0xE0 - 0xE7 */
   4551	N, N, N, N, N, N, N, N,
   4552	/* 0xE8 - 0xEF */
   4553	N, N, N, N, N, N, N, N,
   4554	/* 0xF0 - 0xF7 */
   4555	N, N, N, N, N, N, N, N,
   4556	/* 0xF8 - 0xFF */
   4557	N, N, N, N, N, N, N, N,
   4558} };
   4559
   4560static const struct instr_dual instr_dual_0f_c3 = {
   4561	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
   4562};
   4563
   4564static const struct mode_dual mode_dual_63 = {
   4565	N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
   4566};
   4567
   4568static const struct opcode opcode_table[256] = {
   4569	/* 0x00 - 0x07 */
   4570	F6ALU(Lock, em_add),
   4571	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
   4572	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
   4573	/* 0x08 - 0x0F */
   4574	F6ALU(Lock | PageTable, em_or),
   4575	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
   4576	N,
   4577	/* 0x10 - 0x17 */
   4578	F6ALU(Lock, em_adc),
   4579	I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
   4580	I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
   4581	/* 0x18 - 0x1F */
   4582	F6ALU(Lock, em_sbb),
   4583	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
   4584	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
   4585	/* 0x20 - 0x27 */
   4586	F6ALU(Lock | PageTable, em_and), N, N,
   4587	/* 0x28 - 0x2F */
   4588	F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
   4589	/* 0x30 - 0x37 */
   4590	F6ALU(Lock, em_xor), N, N,
   4591	/* 0x38 - 0x3F */
   4592	F6ALU(NoWrite, em_cmp), N, N,
   4593	/* 0x40 - 0x4F */
   4594	X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
   4595	/* 0x50 - 0x57 */
   4596	X8(I(SrcReg | Stack, em_push)),
   4597	/* 0x58 - 0x5F */
   4598	X8(I(DstReg | Stack, em_pop)),
   4599	/* 0x60 - 0x67 */
   4600	I(ImplicitOps | Stack | No64, em_pusha),
   4601	I(ImplicitOps | Stack | No64, em_popa),
   4602	N, MD(ModRM, &mode_dual_63),
   4603	N, N, N, N,
   4604	/* 0x68 - 0x6F */
   4605	I(SrcImm | Mov | Stack, em_push),
   4606	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
   4607	I(SrcImmByte | Mov | Stack, em_push),
   4608	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
   4609	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
   4610	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
   4611	/* 0x70 - 0x7F */
   4612	X16(D(SrcImmByte | NearBranch | IsBranch)),
   4613	/* 0x80 - 0x87 */
   4614	G(ByteOp | DstMem | SrcImm, group1),
   4615	G(DstMem | SrcImm, group1),
   4616	G(ByteOp | DstMem | SrcImm | No64, group1),
   4617	G(DstMem | SrcImmByte, group1),
   4618	F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
   4619	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
   4620	/* 0x88 - 0x8F */
   4621	I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
   4622	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
   4623	I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
   4624	D(ModRM | SrcMem | NoAccess | DstReg),
   4625	I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
   4626	G(0, group1A),
   4627	/* 0x90 - 0x97 */
   4628	DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
   4629	/* 0x98 - 0x9F */
   4630	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
   4631	I(SrcImmFAddr | No64 | IsBranch, em_call_far), N,
   4632	II(ImplicitOps | Stack, em_pushf, pushf),
   4633	II(ImplicitOps | Stack, em_popf, popf),
   4634	I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
   4635	/* 0xA0 - 0xA7 */
   4636	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
   4637	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
   4638	I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
   4639	F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
   4640	/* 0xA8 - 0xAF */
   4641	F2bv(DstAcc | SrcImm | NoWrite, em_test),
   4642	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
   4643	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
   4644	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
   4645	/* 0xB0 - 0xB7 */
   4646	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
   4647	/* 0xB8 - 0xBF */
   4648	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
   4649	/* 0xC0 - 0xC7 */
   4650	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
   4651	I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm),
   4652	I(ImplicitOps | NearBranch | IsBranch, em_ret),
   4653	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
   4654	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
   4655	G(ByteOp, group11), G(0, group11),
   4656	/* 0xC8 - 0xCF */
   4657	I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
   4658	I(Stack | IsBranch, em_leave),
   4659	I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
   4660	I(ImplicitOps | IsBranch, em_ret_far),
   4661	D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
   4662	D(ImplicitOps | No64 | IsBranch),
   4663	II(ImplicitOps | IsBranch, em_iret, iret),
   4664	/* 0xD0 - 0xD7 */
   4665	G(Src2One | ByteOp, group2), G(Src2One, group2),
   4666	G(Src2CL | ByteOp, group2), G(Src2CL, group2),
   4667	I(DstAcc | SrcImmUByte | No64, em_aam),
   4668	I(DstAcc | SrcImmUByte | No64, em_aad),
   4669	F(DstAcc | ByteOp | No64, em_salc),
   4670	I(DstAcc | SrcXLat | ByteOp, em_mov),
   4671	/* 0xD8 - 0xDF */
   4672	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
   4673	/* 0xE0 - 0xE7 */
   4674	X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
   4675	I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
   4676	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
   4677	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
   4678	/* 0xE8 - 0xEF */
   4679	I(SrcImm | NearBranch | IsBranch, em_call),
   4680	D(SrcImm | ImplicitOps | NearBranch | IsBranch),
   4681	I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
   4682	D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
   4683	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
   4684	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
   4685	/* 0xF0 - 0xF7 */
   4686	N, DI(ImplicitOps, icebp), N, N,
   4687	DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
   4688	G(ByteOp, group3), G(0, group3),
   4689	/* 0xF8 - 0xFF */
   4690	D(ImplicitOps), D(ImplicitOps),
   4691	I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
   4692	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
   4693};
   4694
   4695static const struct opcode twobyte_table[256] = {
   4696	/* 0x00 - 0x0F */
   4697	G(0, group6), GD(0, &group7), N, N,
   4698	N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
   4699	II(ImplicitOps | Priv, em_clts, clts), N,
   4700	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
   4701	N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
   4702	/* 0x10 - 0x1F */
   4703	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
   4704	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
   4705	N, N, N, N, N, N,
   4706	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
   4707	D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
   4708	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
   4709	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
   4710	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
   4711	D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
   4712	/* 0x20 - 0x2F */
   4713	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
   4714	DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
   4715	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
   4716						check_cr_access),
   4717	IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
   4718						check_dr_write),
   4719	N, N, N, N,
   4720	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
   4721	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
   4722	N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
   4723	N, N, N, N,
   4724	/* 0x30 - 0x3F */
   4725	II(ImplicitOps | Priv, em_wrmsr, wrmsr),
   4726	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
   4727	II(ImplicitOps | Priv, em_rdmsr, rdmsr),
   4728	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
   4729	I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
   4730	I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
   4731	N, N,
   4732	N, N, N, N, N, N, N, N,
   4733	/* 0x40 - 0x4F */
   4734	X16(D(DstReg | SrcMem | ModRM)),
   4735	/* 0x50 - 0x5F */
   4736	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
   4737	/* 0x60 - 0x6F */
   4738	N, N, N, N,
   4739	N, N, N, N,
   4740	N, N, N, N,
   4741	N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
   4742	/* 0x70 - 0x7F */
   4743	N, N, N, N,
   4744	N, N, N, N,
   4745	N, N, N, N,
   4746	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
   4747	/* 0x80 - 0x8F */
   4748	X16(D(SrcImm | NearBranch | IsBranch)),
   4749	/* 0x90 - 0x9F */
   4750	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
   4751	/* 0xA0 - 0xA7 */
   4752	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
   4753	II(ImplicitOps, em_cpuid, cpuid),
   4754	F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
   4755	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
   4756	F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
   4757	/* 0xA8 - 0xAF */
   4758	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
   4759	II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
   4760	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
   4761	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
   4762	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
   4763	GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
   4764	/* 0xB0 - 0xB7 */
   4765	I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
   4766	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
   4767	F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
   4768	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
   4769	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
   4770	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
   4771	/* 0xB8 - 0xBF */
   4772	N, N,
   4773	G(BitOp, group8),
   4774	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
   4775	I(DstReg | SrcMem | ModRM, em_bsf_c),
   4776	I(DstReg | SrcMem | ModRM, em_bsr_c),
   4777	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
   4778	/* 0xC0 - 0xC7 */
   4779	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
   4780	N, ID(0, &instr_dual_0f_c3),
   4781	N, N, N, GD(0, &group9),
   4782	/* 0xC8 - 0xCF */
   4783	X8(I(DstReg, em_bswap)),
   4784	/* 0xD0 - 0xDF */
   4785	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
   4786	/* 0xE0 - 0xEF */
   4787	N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
   4788	N, N, N, N, N, N, N, N,
   4789	/* 0xF0 - 0xFF */
   4790	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
   4791};
   4792
   4793static const struct instr_dual instr_dual_0f_38_f0 = {
   4794	I(DstReg | SrcMem | Mov, em_movbe), N
   4795};
   4796
   4797static const struct instr_dual instr_dual_0f_38_f1 = {
   4798	I(DstMem | SrcReg | Mov, em_movbe), N
   4799};
   4800
   4801static const struct gprefix three_byte_0f_38_f0 = {
   4802	ID(0, &instr_dual_0f_38_f0), N, N, N
   4803};
   4804
   4805static const struct gprefix three_byte_0f_38_f1 = {
   4806	ID(0, &instr_dual_0f_38_f1), N, N, N
   4807};
   4808
   4809/*
   4810 * Insns below are selected by the prefix which indexed by the third opcode
   4811 * byte.
   4812 */
   4813static const struct opcode opcode_map_0f_38[256] = {
   4814	/* 0x00 - 0x7f */
   4815	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
   4816	/* 0x80 - 0xef */
   4817	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
   4818	/* 0xf0 - 0xf1 */
   4819	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
   4820	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
   4821	/* 0xf2 - 0xff */
   4822	N, N, X4(N), X8(N)
   4823};
   4824
   4825#undef D
   4826#undef N
   4827#undef G
   4828#undef GD
   4829#undef I
   4830#undef GP
   4831#undef EXT
   4832#undef MD
   4833#undef ID
   4834
   4835#undef D2bv
   4836#undef D2bvIP
   4837#undef I2bv
   4838#undef I2bvIP
   4839#undef I6ALU
   4840
   4841static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
   4842{
   4843	unsigned size;
   4844
   4845	size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
   4846	if (size == 8)
   4847		size = 4;
   4848	return size;
   4849}
   4850
   4851static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
   4852		      unsigned size, bool sign_extension)
   4853{
   4854	int rc = X86EMUL_CONTINUE;
   4855
   4856	op->type = OP_IMM;
   4857	op->bytes = size;
   4858	op->addr.mem.ea = ctxt->_eip;
   4859	/* NB. Immediates are sign-extended as necessary. */
   4860	switch (op->bytes) {
   4861	case 1:
   4862		op->val = insn_fetch(s8, ctxt);
   4863		break;
   4864	case 2:
   4865		op->val = insn_fetch(s16, ctxt);
   4866		break;
   4867	case 4:
   4868		op->val = insn_fetch(s32, ctxt);
   4869		break;
   4870	case 8:
   4871		op->val = insn_fetch(s64, ctxt);
   4872		break;
   4873	}
   4874	if (!sign_extension) {
   4875		switch (op->bytes) {
   4876		case 1:
   4877			op->val &= 0xff;
   4878			break;
   4879		case 2:
   4880			op->val &= 0xffff;
   4881			break;
   4882		case 4:
   4883			op->val &= 0xffffffff;
   4884			break;
   4885		}
   4886	}
   4887done:
   4888	return rc;
   4889}
   4890
   4891static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
   4892			  unsigned d)
   4893{
   4894	int rc = X86EMUL_CONTINUE;
   4895
   4896	switch (d) {
   4897	case OpReg:
   4898		decode_register_operand(ctxt, op);
   4899		break;
   4900	case OpImmUByte:
   4901		rc = decode_imm(ctxt, op, 1, false);
   4902		break;
   4903	case OpMem:
   4904		ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
   4905	mem_common:
   4906		*op = ctxt->memop;
   4907		ctxt->memopp = op;
   4908		if (ctxt->d & BitOp)
   4909			fetch_bit_operand(ctxt);
   4910		op->orig_val = op->val;
   4911		break;
   4912	case OpMem64:
   4913		ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
   4914		goto mem_common;
   4915	case OpAcc:
   4916		op->type = OP_REG;
   4917		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
   4918		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
   4919		fetch_register_operand(op);
   4920		op->orig_val = op->val;
   4921		break;
   4922	case OpAccLo:
   4923		op->type = OP_REG;
   4924		op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
   4925		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
   4926		fetch_register_operand(op);
   4927		op->orig_val = op->val;
   4928		break;
   4929	case OpAccHi:
   4930		if (ctxt->d & ByteOp) {
   4931			op->type = OP_NONE;
   4932			break;
   4933		}
   4934		op->type = OP_REG;
   4935		op->bytes = ctxt->op_bytes;
   4936		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
   4937		fetch_register_operand(op);
   4938		op->orig_val = op->val;
   4939		break;
   4940	case OpDI:
   4941		op->type = OP_MEM;
   4942		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
   4943		op->addr.mem.ea =
   4944			register_address(ctxt, VCPU_REGS_RDI);
   4945		op->addr.mem.seg = VCPU_SREG_ES;
   4946		op->val = 0;
   4947		op->count = 1;
   4948		break;
   4949	case OpDX:
   4950		op->type = OP_REG;
   4951		op->bytes = 2;
   4952		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
   4953		fetch_register_operand(op);
   4954		break;
   4955	case OpCL:
   4956		op->type = OP_IMM;
   4957		op->bytes = 1;
   4958		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
   4959		break;
   4960	case OpImmByte:
   4961		rc = decode_imm(ctxt, op, 1, true);
   4962		break;
   4963	case OpOne:
   4964		op->type = OP_IMM;
   4965		op->bytes = 1;
   4966		op->val = 1;
   4967		break;
   4968	case OpImm:
   4969		rc = decode_imm(ctxt, op, imm_size(ctxt), true);
   4970		break;
   4971	case OpImm64:
   4972		rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
   4973		break;
   4974	case OpMem8:
   4975		ctxt->memop.bytes = 1;
   4976		if (ctxt->memop.type == OP_REG) {
   4977			ctxt->memop.addr.reg = decode_register(ctxt,
   4978					ctxt->modrm_rm, true);
   4979			fetch_register_operand(&ctxt->memop);
   4980		}
   4981		goto mem_common;
   4982	case OpMem16:
   4983		ctxt->memop.bytes = 2;
   4984		goto mem_common;
   4985	case OpMem32:
   4986		ctxt->memop.bytes = 4;
   4987		goto mem_common;
   4988	case OpImmU16:
   4989		rc = decode_imm(ctxt, op, 2, false);
   4990		break;
   4991	case OpImmU:
   4992		rc = decode_imm(ctxt, op, imm_size(ctxt), false);
   4993		break;
   4994	case OpSI:
   4995		op->type = OP_MEM;
   4996		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
   4997		op->addr.mem.ea =
   4998			register_address(ctxt, VCPU_REGS_RSI);
   4999		op->addr.mem.seg = ctxt->seg_override;
   5000		op->val = 0;
   5001		op->count = 1;
   5002		break;
   5003	case OpXLat:
   5004		op->type = OP_MEM;
   5005		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
   5006		op->addr.mem.ea =
   5007			address_mask(ctxt,
   5008				reg_read(ctxt, VCPU_REGS_RBX) +
   5009				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
   5010		op->addr.mem.seg = ctxt->seg_override;
   5011		op->val = 0;
   5012		break;
   5013	case OpImmFAddr:
   5014		op->type = OP_IMM;
   5015		op->addr.mem.ea = ctxt->_eip;
   5016		op->bytes = ctxt->op_bytes + 2;
   5017		insn_fetch_arr(op->valptr, op->bytes, ctxt);
   5018		break;
   5019	case OpMemFAddr:
   5020		ctxt->memop.bytes = ctxt->op_bytes + 2;
   5021		goto mem_common;
   5022	case OpES:
   5023		op->type = OP_IMM;
   5024		op->val = VCPU_SREG_ES;
   5025		break;
   5026	case OpCS:
   5027		op->type = OP_IMM;
   5028		op->val = VCPU_SREG_CS;
   5029		break;
   5030	case OpSS:
   5031		op->type = OP_IMM;
   5032		op->val = VCPU_SREG_SS;
   5033		break;
   5034	case OpDS:
   5035		op->type = OP_IMM;
   5036		op->val = VCPU_SREG_DS;
   5037		break;
   5038	case OpFS:
   5039		op->type = OP_IMM;
   5040		op->val = VCPU_SREG_FS;
   5041		break;
   5042	case OpGS:
   5043		op->type = OP_IMM;
   5044		op->val = VCPU_SREG_GS;
   5045		break;
   5046	case OpImplicit:
   5047		/* Special instructions do their own operand decoding. */
   5048	default:
   5049		op->type = OP_NONE; /* Disable writeback. */
   5050		break;
   5051	}
   5052
   5053done:
   5054	return rc;
   5055}
   5056
   5057int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
   5058{
   5059	int rc = X86EMUL_CONTINUE;
   5060	int mode = ctxt->mode;
   5061	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
   5062	bool op_prefix = false;
   5063	bool has_seg_override = false;
   5064	struct opcode opcode;
   5065	u16 dummy;
   5066	struct desc_struct desc;
   5067
   5068	ctxt->memop.type = OP_NONE;
   5069	ctxt->memopp = NULL;
   5070	ctxt->_eip = ctxt->eip;
   5071	ctxt->fetch.ptr = ctxt->fetch.data;
   5072	ctxt->fetch.end = ctxt->fetch.data + insn_len;
   5073	ctxt->opcode_len = 1;
   5074	ctxt->intercept = x86_intercept_none;
   5075	if (insn_len > 0)
   5076		memcpy(ctxt->fetch.data, insn, insn_len);
   5077	else {
   5078		rc = __do_insn_fetch_bytes(ctxt, 1);
   5079		if (rc != X86EMUL_CONTINUE)
   5080			goto done;
   5081	}
   5082
   5083	switch (mode) {
   5084	case X86EMUL_MODE_REAL:
   5085	case X86EMUL_MODE_VM86:
   5086		def_op_bytes = def_ad_bytes = 2;
   5087		ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
   5088		if (desc.d)
   5089			def_op_bytes = def_ad_bytes = 4;
   5090		break;
   5091	case X86EMUL_MODE_PROT16:
   5092		def_op_bytes = def_ad_bytes = 2;
   5093		break;
   5094	case X86EMUL_MODE_PROT32:
   5095		def_op_bytes = def_ad_bytes = 4;
   5096		break;
   5097#ifdef CONFIG_X86_64
   5098	case X86EMUL_MODE_PROT64:
   5099		def_op_bytes = 4;
   5100		def_ad_bytes = 8;
   5101		break;
   5102#endif
   5103	default:
   5104		return EMULATION_FAILED;
   5105	}
   5106
   5107	ctxt->op_bytes = def_op_bytes;
   5108	ctxt->ad_bytes = def_ad_bytes;
   5109
   5110	/* Legacy prefixes. */
   5111	for (;;) {
   5112		switch (ctxt->b = insn_fetch(u8, ctxt)) {
   5113		case 0x66:	/* operand-size override */
   5114			op_prefix = true;
   5115			/* switch between 2/4 bytes */
   5116			ctxt->op_bytes = def_op_bytes ^ 6;
   5117			break;
   5118		case 0x67:	/* address-size override */
   5119			if (mode == X86EMUL_MODE_PROT64)
   5120				/* switch between 4/8 bytes */
   5121				ctxt->ad_bytes = def_ad_bytes ^ 12;
   5122			else
   5123				/* switch between 2/4 bytes */
   5124				ctxt->ad_bytes = def_ad_bytes ^ 6;
   5125			break;
   5126		case 0x26:	/* ES override */
   5127			has_seg_override = true;
   5128			ctxt->seg_override = VCPU_SREG_ES;
   5129			break;
   5130		case 0x2e:	/* CS override */
   5131			has_seg_override = true;
   5132			ctxt->seg_override = VCPU_SREG_CS;
   5133			break;
   5134		case 0x36:	/* SS override */
   5135			has_seg_override = true;
   5136			ctxt->seg_override = VCPU_SREG_SS;
   5137			break;
   5138		case 0x3e:	/* DS override */
   5139			has_seg_override = true;
   5140			ctxt->seg_override = VCPU_SREG_DS;
   5141			break;
   5142		case 0x64:	/* FS override */
   5143			has_seg_override = true;
   5144			ctxt->seg_override = VCPU_SREG_FS;
   5145			break;
   5146		case 0x65:	/* GS override */
   5147			has_seg_override = true;
   5148			ctxt->seg_override = VCPU_SREG_GS;
   5149			break;
   5150		case 0x40 ... 0x4f: /* REX */
   5151			if (mode != X86EMUL_MODE_PROT64)
   5152				goto done_prefixes;
   5153			ctxt->rex_prefix = ctxt->b;
   5154			continue;
   5155		case 0xf0:	/* LOCK */
   5156			ctxt->lock_prefix = 1;
   5157			break;
   5158		case 0xf2:	/* REPNE/REPNZ */
   5159		case 0xf3:	/* REP/REPE/REPZ */
   5160			ctxt->rep_prefix = ctxt->b;
   5161			break;
   5162		default:
   5163			goto done_prefixes;
   5164		}
   5165
   5166		/* Any legacy prefix after a REX prefix nullifies its effect. */
   5167
   5168		ctxt->rex_prefix = 0;
   5169	}
   5170
   5171done_prefixes:
   5172
   5173	/* REX prefix. */
   5174	if (ctxt->rex_prefix & 8)
   5175		ctxt->op_bytes = 8;	/* REX.W */
   5176
   5177	/* Opcode byte(s). */
   5178	opcode = opcode_table[ctxt->b];
   5179	/* Two-byte opcode? */
   5180	if (ctxt->b == 0x0f) {
   5181		ctxt->opcode_len = 2;
   5182		ctxt->b = insn_fetch(u8, ctxt);
   5183		opcode = twobyte_table[ctxt->b];
   5184
   5185		/* 0F_38 opcode map */
   5186		if (ctxt->b == 0x38) {
   5187			ctxt->opcode_len = 3;
   5188			ctxt->b = insn_fetch(u8, ctxt);
   5189			opcode = opcode_map_0f_38[ctxt->b];
   5190		}
   5191	}
   5192	ctxt->d = opcode.flags;
   5193
   5194	if (ctxt->d & ModRM)
   5195		ctxt->modrm = insn_fetch(u8, ctxt);
   5196
   5197	/* vex-prefix instructions are not implemented */
   5198	if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
   5199	    (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
   5200		ctxt->d = NotImpl;
   5201	}
   5202
   5203	while (ctxt->d & GroupMask) {
   5204		switch (ctxt->d & GroupMask) {
   5205		case Group:
   5206			goffset = (ctxt->modrm >> 3) & 7;
   5207			opcode = opcode.u.group[goffset];
   5208			break;
   5209		case GroupDual:
   5210			goffset = (ctxt->modrm >> 3) & 7;
   5211			if ((ctxt->modrm >> 6) == 3)
   5212				opcode = opcode.u.gdual->mod3[goffset];
   5213			else
   5214				opcode = opcode.u.gdual->mod012[goffset];
   5215			break;
   5216		case RMExt:
   5217			goffset = ctxt->modrm & 7;
   5218			opcode = opcode.u.group[goffset];
   5219			break;
   5220		case Prefix:
   5221			if (ctxt->rep_prefix && op_prefix)
   5222				return EMULATION_FAILED;
   5223			simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
   5224			switch (simd_prefix) {
   5225			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
   5226			case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
   5227			case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
   5228			case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
   5229			}
   5230			break;
   5231		case Escape:
   5232			if (ctxt->modrm > 0xbf) {
   5233				size_t size = ARRAY_SIZE(opcode.u.esc->high);
   5234				u32 index = array_index_nospec(
   5235					ctxt->modrm - 0xc0, size);
   5236
   5237				opcode = opcode.u.esc->high[index];
   5238			} else {
   5239				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
   5240			}
   5241			break;
   5242		case InstrDual:
   5243			if ((ctxt->modrm >> 6) == 3)
   5244				opcode = opcode.u.idual->mod3;
   5245			else
   5246				opcode = opcode.u.idual->mod012;
   5247			break;
   5248		case ModeDual:
   5249			if (ctxt->mode == X86EMUL_MODE_PROT64)
   5250				opcode = opcode.u.mdual->mode64;
   5251			else
   5252				opcode = opcode.u.mdual->mode32;
   5253			break;
   5254		default:
   5255			return EMULATION_FAILED;
   5256		}
   5257
   5258		ctxt->d &= ~(u64)GroupMask;
   5259		ctxt->d |= opcode.flags;
   5260	}
   5261
   5262	ctxt->is_branch = opcode.flags & IsBranch;
   5263
   5264	/* Unrecognised? */
   5265	if (ctxt->d == 0)
   5266		return EMULATION_FAILED;
   5267
   5268	ctxt->execute = opcode.u.execute;
   5269
   5270	if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
   5271	    likely(!(ctxt->d & EmulateOnUD)))
   5272		return EMULATION_FAILED;
   5273
   5274	if (unlikely(ctxt->d &
   5275	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
   5276	     No16))) {
   5277		/*
   5278		 * These are copied unconditionally here, and checked unconditionally
   5279		 * in x86_emulate_insn.
   5280		 */
   5281		ctxt->check_perm = opcode.check_perm;
   5282		ctxt->intercept = opcode.intercept;
   5283
   5284		if (ctxt->d & NotImpl)
   5285			return EMULATION_FAILED;
   5286
   5287		if (mode == X86EMUL_MODE_PROT64) {
   5288			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
   5289				ctxt->op_bytes = 8;
   5290			else if (ctxt->d & NearBranch)
   5291				ctxt->op_bytes = 8;
   5292		}
   5293
   5294		if (ctxt->d & Op3264) {
   5295			if (mode == X86EMUL_MODE_PROT64)
   5296				ctxt->op_bytes = 8;
   5297			else
   5298				ctxt->op_bytes = 4;
   5299		}
   5300
   5301		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
   5302			ctxt->op_bytes = 4;
   5303
   5304		if (ctxt->d & Sse)
   5305			ctxt->op_bytes = 16;
   5306		else if (ctxt->d & Mmx)
   5307			ctxt->op_bytes = 8;
   5308	}
   5309
   5310	/* ModRM and SIB bytes. */
   5311	if (ctxt->d & ModRM) {
   5312		rc = decode_modrm(ctxt, &ctxt->memop);
   5313		if (!has_seg_override) {
   5314			has_seg_override = true;
   5315			ctxt->seg_override = ctxt->modrm_seg;
   5316		}
   5317	} else if (ctxt->d & MemAbs)
   5318		rc = decode_abs(ctxt, &ctxt->memop);
   5319	if (rc != X86EMUL_CONTINUE)
   5320		goto done;
   5321
   5322	if (!has_seg_override)
   5323		ctxt->seg_override = VCPU_SREG_DS;
   5324
   5325	ctxt->memop.addr.mem.seg = ctxt->seg_override;
   5326
   5327	/*
   5328	 * Decode and fetch the source operand: register, memory
   5329	 * or immediate.
   5330	 */
   5331	rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
   5332	if (rc != X86EMUL_CONTINUE)
   5333		goto done;
   5334
   5335	/*
   5336	 * Decode and fetch the second source operand: register, memory
   5337	 * or immediate.
   5338	 */
   5339	rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
   5340	if (rc != X86EMUL_CONTINUE)
   5341		goto done;
   5342
   5343	/* Decode and fetch the destination operand: register or memory. */
   5344	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
   5345
   5346	if (ctxt->rip_relative && likely(ctxt->memopp))
   5347		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
   5348					ctxt->memopp->addr.mem.ea + ctxt->_eip);
   5349
   5350done:
   5351	if (rc == X86EMUL_PROPAGATE_FAULT)
   5352		ctxt->have_exception = true;
   5353	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
   5354}
   5355
   5356bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
   5357{
   5358	return ctxt->d & PageTable;
   5359}
   5360
   5361static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
   5362{
   5363	/* The second termination condition only applies for REPE
   5364	 * and REPNE. Test if the repeat string operation prefix is
   5365	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
   5366	 * corresponding termination condition according to:
   5367	 * 	- if REPE/REPZ and ZF = 0 then done
   5368	 * 	- if REPNE/REPNZ and ZF = 1 then done
   5369	 */
   5370	if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
   5371	     (ctxt->b == 0xae) || (ctxt->b == 0xaf))
   5372	    && (((ctxt->rep_prefix == REPE_PREFIX) &&
   5373		 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
   5374		|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
   5375		    ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
   5376		return true;
   5377
   5378	return false;
   5379}
   5380
   5381static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
   5382{
   5383	int rc;
   5384
   5385	kvm_fpu_get();
   5386	rc = asm_safe("fwait");
   5387	kvm_fpu_put();
   5388
   5389	if (unlikely(rc != X86EMUL_CONTINUE))
   5390		return emulate_exception(ctxt, MF_VECTOR, 0, false);
   5391
   5392	return X86EMUL_CONTINUE;
   5393}
   5394
   5395static void fetch_possible_mmx_operand(struct operand *op)
   5396{
   5397	if (op->type == OP_MM)
   5398		kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
   5399}
   5400
   5401static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
   5402{
   5403	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
   5404
   5405	if (!(ctxt->d & ByteOp))
   5406		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
   5407
   5408	asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
   5409	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
   5410	      [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
   5411	    : "c"(ctxt->src2.val));
   5412
   5413	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
   5414	if (!fop) /* exception is returned in fop variable */
   5415		return emulate_de(ctxt);
   5416	return X86EMUL_CONTINUE;
   5417}
   5418
   5419void init_decode_cache(struct x86_emulate_ctxt *ctxt)
   5420{
   5421	/* Clear fields that are set conditionally but read without a guard. */
   5422	ctxt->rip_relative = false;
   5423	ctxt->rex_prefix = 0;
   5424	ctxt->lock_prefix = 0;
   5425	ctxt->rep_prefix = 0;
   5426	ctxt->regs_valid = 0;
   5427	ctxt->regs_dirty = 0;
   5428
   5429	ctxt->io_read.pos = 0;
   5430	ctxt->io_read.end = 0;
   5431	ctxt->mem_read.end = 0;
   5432}
   5433
   5434int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
   5435{
   5436	const struct x86_emulate_ops *ops = ctxt->ops;
   5437	int rc = X86EMUL_CONTINUE;
   5438	int saved_dst_type = ctxt->dst.type;
   5439	unsigned emul_flags;
   5440
   5441	ctxt->mem_read.pos = 0;
   5442
   5443	/* LOCK prefix is allowed only with some instructions */
   5444	if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
   5445		rc = emulate_ud(ctxt);
   5446		goto done;
   5447	}
   5448
   5449	if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
   5450		rc = emulate_ud(ctxt);
   5451		goto done;
   5452	}
   5453
   5454	emul_flags = ctxt->ops->get_hflags(ctxt);
   5455	if (unlikely(ctxt->d &
   5456		     (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
   5457		if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
   5458				(ctxt->d & Undefined)) {
   5459			rc = emulate_ud(ctxt);
   5460			goto done;
   5461		}
   5462
   5463		if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
   5464		    || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
   5465			rc = emulate_ud(ctxt);
   5466			goto done;
   5467		}
   5468
   5469		if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
   5470			rc = emulate_nm(ctxt);
   5471			goto done;
   5472		}
   5473
   5474		if (ctxt->d & Mmx) {
   5475			rc = flush_pending_x87_faults(ctxt);
   5476			if (rc != X86EMUL_CONTINUE)
   5477				goto done;
   5478			/*
   5479			 * Now that we know the fpu is exception safe, we can fetch
   5480			 * operands from it.
   5481			 */
   5482			fetch_possible_mmx_operand(&ctxt->src);
   5483			fetch_possible_mmx_operand(&ctxt->src2);
   5484			if (!(ctxt->d & Mov))
   5485				fetch_possible_mmx_operand(&ctxt->dst);
   5486		}
   5487
   5488		if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
   5489			rc = emulator_check_intercept(ctxt, ctxt->intercept,
   5490						      X86_ICPT_PRE_EXCEPT);
   5491			if (rc != X86EMUL_CONTINUE)
   5492				goto done;
   5493		}
   5494
   5495		/* Instruction can only be executed in protected mode */
   5496		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
   5497			rc = emulate_ud(ctxt);
   5498			goto done;
   5499		}
   5500
   5501		/* Privileged instruction can be executed only in CPL=0 */
   5502		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
   5503			if (ctxt->d & PrivUD)
   5504				rc = emulate_ud(ctxt);
   5505			else
   5506				rc = emulate_gp(ctxt, 0);
   5507			goto done;
   5508		}
   5509
   5510		/* Do instruction specific permission checks */
   5511		if (ctxt->d & CheckPerm) {
   5512			rc = ctxt->check_perm(ctxt);
   5513			if (rc != X86EMUL_CONTINUE)
   5514				goto done;
   5515		}
   5516
   5517		if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
   5518			rc = emulator_check_intercept(ctxt, ctxt->intercept,
   5519						      X86_ICPT_POST_EXCEPT);
   5520			if (rc != X86EMUL_CONTINUE)
   5521				goto done;
   5522		}
   5523
   5524		if (ctxt->rep_prefix && (ctxt->d & String)) {
   5525			/* All REP prefixes have the same first termination condition */
   5526			if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
   5527				string_registers_quirk(ctxt);
   5528				ctxt->eip = ctxt->_eip;
   5529				ctxt->eflags &= ~X86_EFLAGS_RF;
   5530				goto done;
   5531			}
   5532		}
   5533	}
   5534
   5535	if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
   5536		rc = segmented_read(ctxt, ctxt->src.addr.mem,
   5537				    ctxt->src.valptr, ctxt->src.bytes);
   5538		if (rc != X86EMUL_CONTINUE)
   5539			goto done;
   5540		ctxt->src.orig_val64 = ctxt->src.val64;
   5541	}
   5542
   5543	if (ctxt->src2.type == OP_MEM) {
   5544		rc = segmented_read(ctxt, ctxt->src2.addr.mem,
   5545				    &ctxt->src2.val, ctxt->src2.bytes);
   5546		if (rc != X86EMUL_CONTINUE)
   5547			goto done;
   5548	}
   5549
   5550	if ((ctxt->d & DstMask) == ImplicitOps)
   5551		goto special_insn;
   5552
   5553
   5554	if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
   5555		/* optimisation - avoid slow emulated read if Mov */
   5556		rc = segmented_read(ctxt, ctxt->dst.addr.mem,
   5557				   &ctxt->dst.val, ctxt->dst.bytes);
   5558		if (rc != X86EMUL_CONTINUE) {
   5559			if (!(ctxt->d & NoWrite) &&
   5560			    rc == X86EMUL_PROPAGATE_FAULT &&
   5561			    ctxt->exception.vector == PF_VECTOR)
   5562				ctxt->exception.error_code |= PFERR_WRITE_MASK;
   5563			goto done;
   5564		}
   5565	}
   5566	/* Copy full 64-bit value for CMPXCHG8B.  */
   5567	ctxt->dst.orig_val64 = ctxt->dst.val64;
   5568
   5569special_insn:
   5570
   5571	if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
   5572		rc = emulator_check_intercept(ctxt, ctxt->intercept,
   5573					      X86_ICPT_POST_MEMACCESS);
   5574		if (rc != X86EMUL_CONTINUE)
   5575			goto done;
   5576	}
   5577
   5578	if (ctxt->rep_prefix && (ctxt->d & String))
   5579		ctxt->eflags |= X86_EFLAGS_RF;
   5580	else
   5581		ctxt->eflags &= ~X86_EFLAGS_RF;
   5582
   5583	if (ctxt->execute) {
   5584		if (ctxt->d & Fastop)
   5585			rc = fastop(ctxt, ctxt->fop);
   5586		else
   5587			rc = ctxt->execute(ctxt);
   5588		if (rc != X86EMUL_CONTINUE)
   5589			goto done;
   5590		goto writeback;
   5591	}
   5592
   5593	if (ctxt->opcode_len == 2)
   5594		goto twobyte_insn;
   5595	else if (ctxt->opcode_len == 3)
   5596		goto threebyte_insn;
   5597
   5598	switch (ctxt->b) {
   5599	case 0x70 ... 0x7f: /* jcc (short) */
   5600		if (test_cc(ctxt->b, ctxt->eflags))
   5601			rc = jmp_rel(ctxt, ctxt->src.val);
   5602		break;
   5603	case 0x8d: /* lea r16/r32, m */
   5604		ctxt->dst.val = ctxt->src.addr.mem.ea;
   5605		break;
   5606	case 0x90 ... 0x97: /* nop / xchg reg, rax */
   5607		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
   5608			ctxt->dst.type = OP_NONE;
   5609		else
   5610			rc = em_xchg(ctxt);
   5611		break;
   5612	case 0x98: /* cbw/cwde/cdqe */
   5613		switch (ctxt->op_bytes) {
   5614		case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
   5615		case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
   5616		case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
   5617		}
   5618		break;
   5619	case 0xcc:		/* int3 */
   5620		rc = emulate_int(ctxt, 3);
   5621		break;
   5622	case 0xcd:		/* int n */
   5623		rc = emulate_int(ctxt, ctxt->src.val);
   5624		break;
   5625	case 0xce:		/* into */
   5626		if (ctxt->eflags & X86_EFLAGS_OF)
   5627			rc = emulate_int(ctxt, 4);
   5628		break;
   5629	case 0xe9: /* jmp rel */
   5630	case 0xeb: /* jmp rel short */
   5631		rc = jmp_rel(ctxt, ctxt->src.val);
   5632		ctxt->dst.type = OP_NONE; /* Disable writeback. */
   5633		break;
   5634	case 0xf4:              /* hlt */
   5635		ctxt->ops->halt(ctxt);
   5636		break;
   5637	case 0xf5:	/* cmc */
   5638		/* complement carry flag from eflags reg */
   5639		ctxt->eflags ^= X86_EFLAGS_CF;
   5640		break;
   5641	case 0xf8: /* clc */
   5642		ctxt->eflags &= ~X86_EFLAGS_CF;
   5643		break;
   5644	case 0xf9: /* stc */
   5645		ctxt->eflags |= X86_EFLAGS_CF;
   5646		break;
   5647	case 0xfc: /* cld */
   5648		ctxt->eflags &= ~X86_EFLAGS_DF;
   5649		break;
   5650	case 0xfd: /* std */
   5651		ctxt->eflags |= X86_EFLAGS_DF;
   5652		break;
   5653	default:
   5654		goto cannot_emulate;
   5655	}
   5656
   5657	if (rc != X86EMUL_CONTINUE)
   5658		goto done;
   5659
   5660writeback:
   5661	if (ctxt->d & SrcWrite) {
   5662		BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
   5663		rc = writeback(ctxt, &ctxt->src);
   5664		if (rc != X86EMUL_CONTINUE)
   5665			goto done;
   5666	}
   5667	if (!(ctxt->d & NoWrite)) {
   5668		rc = writeback(ctxt, &ctxt->dst);
   5669		if (rc != X86EMUL_CONTINUE)
   5670			goto done;
   5671	}
   5672
   5673	/*
   5674	 * restore dst type in case the decoding will be reused
   5675	 * (happens for string instruction )
   5676	 */
   5677	ctxt->dst.type = saved_dst_type;
   5678
   5679	if ((ctxt->d & SrcMask) == SrcSI)
   5680		string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
   5681
   5682	if ((ctxt->d & DstMask) == DstDI)
   5683		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
   5684
   5685	if (ctxt->rep_prefix && (ctxt->d & String)) {
   5686		unsigned int count;
   5687		struct read_cache *r = &ctxt->io_read;
   5688		if ((ctxt->d & SrcMask) == SrcSI)
   5689			count = ctxt->src.count;
   5690		else
   5691			count = ctxt->dst.count;
   5692		register_address_increment(ctxt, VCPU_REGS_RCX, -count);
   5693
   5694		if (!string_insn_completed(ctxt)) {
   5695			/*
   5696			 * Re-enter guest when pio read ahead buffer is empty
   5697			 * or, if it is not used, after each 1024 iteration.
   5698			 */
   5699			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
   5700			    (r->end == 0 || r->end != r->pos)) {
   5701				/*
   5702				 * Reset read cache. Usually happens before
   5703				 * decode, but since instruction is restarted
   5704				 * we have to do it here.
   5705				 */
   5706				ctxt->mem_read.end = 0;
   5707				writeback_registers(ctxt);
   5708				return EMULATION_RESTART;
   5709			}
   5710			goto done; /* skip rip writeback */
   5711		}
   5712		ctxt->eflags &= ~X86_EFLAGS_RF;
   5713	}
   5714
   5715	ctxt->eip = ctxt->_eip;
   5716	if (ctxt->mode != X86EMUL_MODE_PROT64)
   5717		ctxt->eip = (u32)ctxt->_eip;
   5718
   5719done:
   5720	if (rc == X86EMUL_PROPAGATE_FAULT) {
   5721		WARN_ON(ctxt->exception.vector > 0x1f);
   5722		ctxt->have_exception = true;
   5723	}
   5724	if (rc == X86EMUL_INTERCEPTED)
   5725		return EMULATION_INTERCEPTED;
   5726
   5727	if (rc == X86EMUL_CONTINUE)
   5728		writeback_registers(ctxt);
   5729
   5730	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
   5731
   5732twobyte_insn:
   5733	switch (ctxt->b) {
   5734	case 0x09:		/* wbinvd */
   5735		(ctxt->ops->wbinvd)(ctxt);
   5736		break;
   5737	case 0x08:		/* invd */
   5738	case 0x0d:		/* GrpP (prefetch) */
   5739	case 0x18:		/* Grp16 (prefetch/nop) */
   5740	case 0x1f:		/* nop */
   5741		break;
   5742	case 0x20: /* mov cr, reg */
   5743		ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
   5744		break;
   5745	case 0x21: /* mov from dr to reg */
   5746		ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
   5747		break;
   5748	case 0x40 ... 0x4f:	/* cmov */
   5749		if (test_cc(ctxt->b, ctxt->eflags))
   5750			ctxt->dst.val = ctxt->src.val;
   5751		else if (ctxt->op_bytes != 4)
   5752			ctxt->dst.type = OP_NONE; /* no writeback */
   5753		break;
   5754	case 0x80 ... 0x8f: /* jnz rel, etc*/
   5755		if (test_cc(ctxt->b, ctxt->eflags))
   5756			rc = jmp_rel(ctxt, ctxt->src.val);
   5757		break;
   5758	case 0x90 ... 0x9f:     /* setcc r/m8 */
   5759		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
   5760		break;
   5761	case 0xb6 ... 0xb7:	/* movzx */
   5762		ctxt->dst.bytes = ctxt->op_bytes;
   5763		ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
   5764						       : (u16) ctxt->src.val;
   5765		break;
   5766	case 0xbe ... 0xbf:	/* movsx */
   5767		ctxt->dst.bytes = ctxt->op_bytes;
   5768		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
   5769							(s16) ctxt->src.val;
   5770		break;
   5771	default:
   5772		goto cannot_emulate;
   5773	}
   5774
   5775threebyte_insn:
   5776
   5777	if (rc != X86EMUL_CONTINUE)
   5778		goto done;
   5779
   5780	goto writeback;
   5781
   5782cannot_emulate:
   5783	return EMULATION_FAILED;
   5784}
   5785
   5786void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
   5787{
   5788	invalidate_registers(ctxt);
   5789}
   5790
   5791void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
   5792{
   5793	writeback_registers(ctxt);
   5794}
   5795
   5796bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
   5797{
   5798	if (ctxt->rep_prefix && (ctxt->d & String))
   5799		return false;
   5800
   5801	if (ctxt->d & TwoMemOp)
   5802		return false;
   5803
   5804	return true;
   5805}