cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

bpf_jit_comp32.c (68697B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
      4 *
      5 * Author: Wang YanQing (udknight@gmail.com)
      6 * The code based on code and ideas from:
      7 * Eric Dumazet (eric.dumazet@gmail.com)
      8 * and from:
      9 * Shubham Bansal <illusionist.neo@gmail.com>
     10 */
     11
     12#include <linux/netdevice.h>
     13#include <linux/filter.h>
     14#include <linux/if_vlan.h>
     15#include <asm/cacheflush.h>
     16#include <asm/set_memory.h>
     17#include <asm/nospec-branch.h>
     18#include <asm/asm-prototypes.h>
     19#include <linux/bpf.h>
     20
     21/*
     22 * eBPF prog stack layout:
     23 *
     24 *                         high
     25 * original ESP =>        +-----+
     26 *                        |     | callee saved registers
     27 *                        +-----+
     28 *                        | ... | eBPF JIT scratch space
     29 * BPF_FP,IA32_EBP  =>    +-----+
     30 *                        | ... | eBPF prog stack
     31 *                        +-----+
     32 *                        |RSVD | JIT scratchpad
     33 * current ESP =>         +-----+
     34 *                        |     |
     35 *                        | ... | Function call stack
     36 *                        |     |
     37 *                        +-----+
     38 *                          low
     39 *
     40 * The callee saved registers:
     41 *
     42 *                                high
     43 * original ESP =>        +------------------+ \
     44 *                        |        ebp       | |
     45 * current EBP =>         +------------------+ } callee saved registers
     46 *                        |    ebx,esi,edi   | |
     47 *                        +------------------+ /
     48 *                                low
     49 */
     50
     51static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
     52{
     53	if (len == 1)
     54		*ptr = bytes;
     55	else if (len == 2)
     56		*(u16 *)ptr = bytes;
     57	else {
     58		*(u32 *)ptr = bytes;
     59		barrier();
     60	}
     61	return ptr + len;
     62}
     63
     64#define EMIT(bytes, len) \
     65	do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
     66
     67#define EMIT1(b1)		EMIT(b1, 1)
     68#define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
     69#define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
     70#define EMIT4(b1, b2, b3, b4)   \
     71	EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
     72
     73#define EMIT1_off32(b1, off) \
     74	do { EMIT1(b1); EMIT(off, 4); } while (0)
     75#define EMIT2_off32(b1, b2, off) \
     76	do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
     77#define EMIT3_off32(b1, b2, b3, off) \
     78	do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
     79#define EMIT4_off32(b1, b2, b3, b4, off) \
     80	do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
     81
     82#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
     83
     84static bool is_imm8(int value)
     85{
     86	return value <= 127 && value >= -128;
     87}
     88
     89static bool is_simm32(s64 value)
     90{
     91	return value == (s64) (s32) value;
     92}
     93
     94#define STACK_OFFSET(k)	(k)
     95#define TCALL_CNT	(MAX_BPF_JIT_REG + 0)	/* Tail Call Count */
     96
     97#define IA32_EAX	(0x0)
     98#define IA32_EBX	(0x3)
     99#define IA32_ECX	(0x1)
    100#define IA32_EDX	(0x2)
    101#define IA32_ESI	(0x6)
    102#define IA32_EDI	(0x7)
    103#define IA32_EBP	(0x5)
    104#define IA32_ESP	(0x4)
    105
    106/*
    107 * List of x86 cond jumps opcodes (. + s8)
    108 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
    109 */
    110#define IA32_JB  0x72
    111#define IA32_JAE 0x73
    112#define IA32_JE  0x74
    113#define IA32_JNE 0x75
    114#define IA32_JBE 0x76
    115#define IA32_JA  0x77
    116#define IA32_JL  0x7C
    117#define IA32_JGE 0x7D
    118#define IA32_JLE 0x7E
    119#define IA32_JG  0x7F
    120
    121#define COND_JMP_OPCODE_INVALID	(0xFF)
    122
    123/*
    124 * Map eBPF registers to IA32 32bit registers or stack scratch space.
    125 *
    126 * 1. All the registers, R0-R10, are mapped to scratch space on stack.
    127 * 2. We need two 64 bit temp registers to do complex operations on eBPF
    128 *    registers.
    129 * 3. For performance reason, the BPF_REG_AX for blinding constant, is
    130 *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
    131 *
    132 * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
    133 * registers, we have to map each eBPF registers with two IA32 32 bit regs
    134 * or scratch memory space and we have to build eBPF 64 bit register from those.
    135 *
    136 * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
    137 */
    138static const u8 bpf2ia32[][2] = {
    139	/* Return value from in-kernel function, and exit value from eBPF */
    140	[BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
    141
    142	/* The arguments from eBPF program to in-kernel function */
    143	/* Stored on stack scratch space */
    144	[BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
    145	[BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
    146	[BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
    147	[BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
    148	[BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
    149
    150	/* Callee saved registers that in-kernel function will preserve */
    151	/* Stored on stack scratch space */
    152	[BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
    153	[BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
    154	[BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
    155	[BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
    156
    157	/* Read only Frame Pointer to access Stack */
    158	[BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
    159
    160	/* Temporary register for blinding constants. */
    161	[BPF_REG_AX] = {IA32_ESI, IA32_EDI},
    162
    163	/* Tail call count. Stored on stack scratch space. */
    164	[TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
    165};
    166
    167#define dst_lo	dst[0]
    168#define dst_hi	dst[1]
    169#define src_lo	src[0]
    170#define src_hi	src[1]
    171
    172#define STACK_ALIGNMENT	8
    173/*
    174 * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
    175 * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
    176 * BPF_REG_FP, BPF_REG_AX and Tail call counts.
    177 */
    178#define SCRATCH_SIZE 96
    179
    180/* Total stack size used in JITed code */
    181#define _STACK_SIZE	(stack_depth + SCRATCH_SIZE)
    182
    183#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
    184
    185/* Get the offset of eBPF REGISTERs stored on scratch space. */
    186#define STACK_VAR(off) (off)
    187
    188/* Encode 'dst_reg' register into IA32 opcode 'byte' */
    189static u8 add_1reg(u8 byte, u32 dst_reg)
    190{
    191	return byte + dst_reg;
    192}
    193
    194/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
    195static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
    196{
    197	return byte + dst_reg + (src_reg << 3);
    198}
    199
    200static void jit_fill_hole(void *area, unsigned int size)
    201{
    202	/* Fill whole space with int3 instructions */
    203	memset(area, 0xcc, size);
    204}
    205
    206static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
    207				   u8 **pprog)
    208{
    209	u8 *prog = *pprog;
    210	int cnt = 0;
    211
    212	if (dstk) {
    213		if (val == 0) {
    214			/* xor eax,eax */
    215			EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
    216			/* mov dword ptr [ebp+off],eax */
    217			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
    218			      STACK_VAR(dst));
    219		} else {
    220			EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
    221				    STACK_VAR(dst), val);
    222		}
    223	} else {
    224		if (val == 0)
    225			EMIT2(0x33, add_2reg(0xC0, dst, dst));
    226		else
    227			EMIT2_off32(0xC7, add_1reg(0xC0, dst),
    228				    val);
    229	}
    230	*pprog = prog;
    231}
    232
    233/* dst = imm (4 bytes)*/
    234static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
    235				   bool sstk, u8 **pprog)
    236{
    237	u8 *prog = *pprog;
    238	int cnt = 0;
    239	u8 sreg = sstk ? IA32_EAX : src;
    240
    241	if (sstk)
    242		/* mov eax,dword ptr [ebp+off] */
    243		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
    244	if (dstk)
    245		/* mov dword ptr [ebp+off],eax */
    246		EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
    247	else
    248		/* mov dst,sreg */
    249		EMIT2(0x89, add_2reg(0xC0, dst, sreg));
    250
    251	*pprog = prog;
    252}
    253
    254/* dst = src */
    255static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
    256				     const u8 src[], bool dstk,
    257				     bool sstk, u8 **pprog,
    258				     const struct bpf_prog_aux *aux)
    259{
    260	emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
    261	if (is64)
    262		/* complete 8 byte move */
    263		emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
    264	else if (!aux->verifier_zext)
    265		/* zero out high 4 bytes */
    266		emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
    267}
    268
    269/* Sign extended move */
    270static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
    271				     const u32 val, bool dstk, u8 **pprog)
    272{
    273	u32 hi = 0;
    274
    275	if (is64 && (val & (1<<31)))
    276		hi = (u32)~0;
    277	emit_ia32_mov_i(dst_lo, val, dstk, pprog);
    278	emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
    279}
    280
    281/*
    282 * ALU operation (32 bit)
    283 * dst = dst * src
    284 */
    285static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
    286				   bool sstk, u8 **pprog)
    287{
    288	u8 *prog = *pprog;
    289	int cnt = 0;
    290	u8 sreg = sstk ? IA32_ECX : src;
    291
    292	if (sstk)
    293		/* mov ecx,dword ptr [ebp+off] */
    294		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
    295
    296	if (dstk)
    297		/* mov eax,dword ptr [ebp+off] */
    298		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
    299	else
    300		/* mov eax,dst */
    301		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
    302
    303
    304	EMIT2(0xF7, add_1reg(0xE0, sreg));
    305
    306	if (dstk)
    307		/* mov dword ptr [ebp+off],eax */
    308		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
    309		      STACK_VAR(dst));
    310	else
    311		/* mov dst,eax */
    312		EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
    313
    314	*pprog = prog;
    315}
    316
    317static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
    318					 bool dstk, u8 **pprog,
    319					 const struct bpf_prog_aux *aux)
    320{
    321	u8 *prog = *pprog;
    322	int cnt = 0;
    323	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    324	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    325
    326	if (dstk && val != 64) {
    327		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    328		      STACK_VAR(dst_lo));
    329		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    330		      STACK_VAR(dst_hi));
    331	}
    332	switch (val) {
    333	case 16:
    334		/*
    335		 * Emit 'movzwl eax,ax' to zero extend 16-bit
    336		 * into 64 bit
    337		 */
    338		EMIT2(0x0F, 0xB7);
    339		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
    340		if (!aux->verifier_zext)
    341			/* xor dreg_hi,dreg_hi */
    342			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
    343		break;
    344	case 32:
    345		if (!aux->verifier_zext)
    346			/* xor dreg_hi,dreg_hi */
    347			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
    348		break;
    349	case 64:
    350		/* nop */
    351		break;
    352	}
    353
    354	if (dstk && val != 64) {
    355		/* mov dword ptr [ebp+off],dreg_lo */
    356		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
    357		      STACK_VAR(dst_lo));
    358		/* mov dword ptr [ebp+off],dreg_hi */
    359		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
    360		      STACK_VAR(dst_hi));
    361	}
    362	*pprog = prog;
    363}
    364
    365static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
    366				       bool dstk, u8 **pprog,
    367				       const struct bpf_prog_aux *aux)
    368{
    369	u8 *prog = *pprog;
    370	int cnt = 0;
    371	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    372	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    373
    374	if (dstk) {
    375		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    376		      STACK_VAR(dst_lo));
    377		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    378		      STACK_VAR(dst_hi));
    379	}
    380	switch (val) {
    381	case 16:
    382		/* Emit 'ror %ax, 8' to swap lower 2 bytes */
    383		EMIT1(0x66);
    384		EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
    385
    386		EMIT2(0x0F, 0xB7);
    387		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
    388
    389		if (!aux->verifier_zext)
    390			/* xor dreg_hi,dreg_hi */
    391			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
    392		break;
    393	case 32:
    394		/* Emit 'bswap eax' to swap lower 4 bytes */
    395		EMIT1(0x0F);
    396		EMIT1(add_1reg(0xC8, dreg_lo));
    397
    398		if (!aux->verifier_zext)
    399			/* xor dreg_hi,dreg_hi */
    400			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
    401		break;
    402	case 64:
    403		/* Emit 'bswap eax' to swap lower 4 bytes */
    404		EMIT1(0x0F);
    405		EMIT1(add_1reg(0xC8, dreg_lo));
    406
    407		/* Emit 'bswap edx' to swap lower 4 bytes */
    408		EMIT1(0x0F);
    409		EMIT1(add_1reg(0xC8, dreg_hi));
    410
    411		/* mov ecx,dreg_hi */
    412		EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
    413		/* mov dreg_hi,dreg_lo */
    414		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
    415		/* mov dreg_lo,ecx */
    416		EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
    417
    418		break;
    419	}
    420	if (dstk) {
    421		/* mov dword ptr [ebp+off],dreg_lo */
    422		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
    423		      STACK_VAR(dst_lo));
    424		/* mov dword ptr [ebp+off],dreg_hi */
    425		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
    426		      STACK_VAR(dst_hi));
    427	}
    428	*pprog = prog;
    429}
    430
    431/*
    432 * ALU operation (32 bit)
    433 * dst = dst (div|mod) src
    434 */
    435static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
    436				       bool dstk, bool sstk, u8 **pprog)
    437{
    438	u8 *prog = *pprog;
    439	int cnt = 0;
    440
    441	if (sstk)
    442		/* mov ecx,dword ptr [ebp+off] */
    443		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
    444		      STACK_VAR(src));
    445	else if (src != IA32_ECX)
    446		/* mov ecx,src */
    447		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
    448
    449	if (dstk)
    450		/* mov eax,dword ptr [ebp+off] */
    451		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    452		      STACK_VAR(dst));
    453	else
    454		/* mov eax,dst */
    455		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
    456
    457	/* xor edx,edx */
    458	EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
    459	/* div ecx */
    460	EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
    461
    462	if (op == BPF_MOD) {
    463		if (dstk)
    464			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
    465			      STACK_VAR(dst));
    466		else
    467			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
    468	} else {
    469		if (dstk)
    470			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
    471			      STACK_VAR(dst));
    472		else
    473			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
    474	}
    475	*pprog = prog;
    476}
    477
    478/*
    479 * ALU operation (32 bit)
    480 * dst = dst (shift) src
    481 */
    482static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
    483				     bool dstk, bool sstk, u8 **pprog)
    484{
    485	u8 *prog = *pprog;
    486	int cnt = 0;
    487	u8 dreg = dstk ? IA32_EAX : dst;
    488	u8 b2;
    489
    490	if (dstk)
    491		/* mov eax,dword ptr [ebp+off] */
    492		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
    493
    494	if (sstk)
    495		/* mov ecx,dword ptr [ebp+off] */
    496		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
    497	else if (src != IA32_ECX)
    498		/* mov ecx,src */
    499		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
    500
    501	switch (op) {
    502	case BPF_LSH:
    503		b2 = 0xE0; break;
    504	case BPF_RSH:
    505		b2 = 0xE8; break;
    506	case BPF_ARSH:
    507		b2 = 0xF8; break;
    508	default:
    509		return;
    510	}
    511	EMIT2(0xD3, add_1reg(b2, dreg));
    512
    513	if (dstk)
    514		/* mov dword ptr [ebp+off],dreg */
    515		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
    516	*pprog = prog;
    517}
    518
    519/*
    520 * ALU operation (32 bit)
    521 * dst = dst (op) src
    522 */
    523static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
    524				   const u8 dst, const u8 src, bool dstk,
    525				   bool sstk, u8 **pprog)
    526{
    527	u8 *prog = *pprog;
    528	int cnt = 0;
    529	u8 sreg = sstk ? IA32_EAX : src;
    530	u8 dreg = dstk ? IA32_EDX : dst;
    531
    532	if (sstk)
    533		/* mov eax,dword ptr [ebp+off] */
    534		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
    535
    536	if (dstk)
    537		/* mov eax,dword ptr [ebp+off] */
    538		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
    539
    540	switch (BPF_OP(op)) {
    541	/* dst = dst + src */
    542	case BPF_ADD:
    543		if (hi && is64)
    544			EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
    545		else
    546			EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
    547		break;
    548	/* dst = dst - src */
    549	case BPF_SUB:
    550		if (hi && is64)
    551			EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
    552		else
    553			EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
    554		break;
    555	/* dst = dst | src */
    556	case BPF_OR:
    557		EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
    558		break;
    559	/* dst = dst & src */
    560	case BPF_AND:
    561		EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
    562		break;
    563	/* dst = dst ^ src */
    564	case BPF_XOR:
    565		EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
    566		break;
    567	}
    568
    569	if (dstk)
    570		/* mov dword ptr [ebp+off],dreg */
    571		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
    572		      STACK_VAR(dst));
    573	*pprog = prog;
    574}
    575
    576/* ALU operation (64 bit) */
    577static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
    578				     const u8 dst[], const u8 src[],
    579				     bool dstk,  bool sstk,
    580				     u8 **pprog, const struct bpf_prog_aux *aux)
    581{
    582	u8 *prog = *pprog;
    583
    584	emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
    585	if (is64)
    586		emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
    587				&prog);
    588	else if (!aux->verifier_zext)
    589		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
    590	*pprog = prog;
    591}
    592
    593/*
    594 * ALU operation (32 bit)
    595 * dst = dst (op) val
    596 */
    597static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
    598				   const u8 dst, const s32 val, bool dstk,
    599				   u8 **pprog)
    600{
    601	u8 *prog = *pprog;
    602	int cnt = 0;
    603	u8 dreg = dstk ? IA32_EAX : dst;
    604	u8 sreg = IA32_EDX;
    605
    606	if (dstk)
    607		/* mov eax,dword ptr [ebp+off] */
    608		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
    609
    610	if (!is_imm8(val))
    611		/* mov edx,imm32*/
    612		EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
    613
    614	switch (op) {
    615	/* dst = dst + val */
    616	case BPF_ADD:
    617		if (hi && is64) {
    618			if (is_imm8(val))
    619				EMIT3(0x83, add_1reg(0xD0, dreg), val);
    620			else
    621				EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
    622		} else {
    623			if (is_imm8(val))
    624				EMIT3(0x83, add_1reg(0xC0, dreg), val);
    625			else
    626				EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
    627		}
    628		break;
    629	/* dst = dst - val */
    630	case BPF_SUB:
    631		if (hi && is64) {
    632			if (is_imm8(val))
    633				EMIT3(0x83, add_1reg(0xD8, dreg), val);
    634			else
    635				EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
    636		} else {
    637			if (is_imm8(val))
    638				EMIT3(0x83, add_1reg(0xE8, dreg), val);
    639			else
    640				EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
    641		}
    642		break;
    643	/* dst = dst | val */
    644	case BPF_OR:
    645		if (is_imm8(val))
    646			EMIT3(0x83, add_1reg(0xC8, dreg), val);
    647		else
    648			EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
    649		break;
    650	/* dst = dst & val */
    651	case BPF_AND:
    652		if (is_imm8(val))
    653			EMIT3(0x83, add_1reg(0xE0, dreg), val);
    654		else
    655			EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
    656		break;
    657	/* dst = dst ^ val */
    658	case BPF_XOR:
    659		if (is_imm8(val))
    660			EMIT3(0x83, add_1reg(0xF0, dreg), val);
    661		else
    662			EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
    663		break;
    664	case BPF_NEG:
    665		EMIT2(0xF7, add_1reg(0xD8, dreg));
    666		break;
    667	}
    668
    669	if (dstk)
    670		/* mov dword ptr [ebp+off],dreg */
    671		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
    672		      STACK_VAR(dst));
    673	*pprog = prog;
    674}
    675
    676/* ALU operation (64 bit) */
    677static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
    678				     const u8 dst[], const u32 val,
    679				     bool dstk, u8 **pprog,
    680				     const struct bpf_prog_aux *aux)
    681{
    682	u8 *prog = *pprog;
    683	u32 hi = 0;
    684
    685	if (is64 && (val & (1<<31)))
    686		hi = (u32)~0;
    687
    688	emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
    689	if (is64)
    690		emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
    691	else if (!aux->verifier_zext)
    692		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
    693
    694	*pprog = prog;
    695}
    696
    697/* dst = ~dst (64 bit) */
    698static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
    699{
    700	u8 *prog = *pprog;
    701	int cnt = 0;
    702	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    703	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    704
    705	if (dstk) {
    706		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    707		      STACK_VAR(dst_lo));
    708		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    709		      STACK_VAR(dst_hi));
    710	}
    711
    712	/* neg dreg_lo */
    713	EMIT2(0xF7, add_1reg(0xD8, dreg_lo));
    714	/* adc dreg_hi,0x0 */
    715	EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00);
    716	/* neg dreg_hi */
    717	EMIT2(0xF7, add_1reg(0xD8, dreg_hi));
    718
    719	if (dstk) {
    720		/* mov dword ptr [ebp+off],dreg_lo */
    721		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
    722		      STACK_VAR(dst_lo));
    723		/* mov dword ptr [ebp+off],dreg_hi */
    724		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
    725		      STACK_VAR(dst_hi));
    726	}
    727	*pprog = prog;
    728}
    729
    730/* dst = dst << src */
    731static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
    732				     bool dstk, bool sstk, u8 **pprog)
    733{
    734	u8 *prog = *pprog;
    735	int cnt = 0;
    736	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    737	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    738
    739	if (dstk) {
    740		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    741		      STACK_VAR(dst_lo));
    742		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    743		      STACK_VAR(dst_hi));
    744	}
    745
    746	if (sstk)
    747		/* mov ecx,dword ptr [ebp+off] */
    748		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
    749		      STACK_VAR(src_lo));
    750	else
    751		/* mov ecx,src_lo */
    752		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
    753
    754	/* shld dreg_hi,dreg_lo,cl */
    755	EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo));
    756	/* shl dreg_lo,cl */
    757	EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
    758
    759	/* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */
    760
    761	/* cmp ecx,32 */
    762	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
    763	/* skip the next two instructions (4 bytes) when < 32 */
    764	EMIT2(IA32_JB, 4);
    765
    766	/* mov dreg_hi,dreg_lo */
    767	EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
    768	/* xor dreg_lo,dreg_lo */
    769	EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
    770
    771	if (dstk) {
    772		/* mov dword ptr [ebp+off],dreg_lo */
    773		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
    774		      STACK_VAR(dst_lo));
    775		/* mov dword ptr [ebp+off],dreg_hi */
    776		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
    777		      STACK_VAR(dst_hi));
    778	}
    779	/* out: */
    780	*pprog = prog;
    781}
    782
    783/* dst = dst >> src (signed)*/
    784static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
    785				      bool dstk, bool sstk, u8 **pprog)
    786{
    787	u8 *prog = *pprog;
    788	int cnt = 0;
    789	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    790	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    791
    792	if (dstk) {
    793		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    794		      STACK_VAR(dst_lo));
    795		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    796		      STACK_VAR(dst_hi));
    797	}
    798
    799	if (sstk)
    800		/* mov ecx,dword ptr [ebp+off] */
    801		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
    802		      STACK_VAR(src_lo));
    803	else
    804		/* mov ecx,src_lo */
    805		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
    806
    807	/* shrd dreg_lo,dreg_hi,cl */
    808	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
    809	/* sar dreg_hi,cl */
    810	EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
    811
    812	/* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */
    813
    814	/* cmp ecx,32 */
    815	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
    816	/* skip the next two instructions (5 bytes) when < 32 */
    817	EMIT2(IA32_JB, 5);
    818
    819	/* mov dreg_lo,dreg_hi */
    820	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
    821	/* sar dreg_hi,31 */
    822	EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
    823
    824	if (dstk) {
    825		/* mov dword ptr [ebp+off],dreg_lo */
    826		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
    827		      STACK_VAR(dst_lo));
    828		/* mov dword ptr [ebp+off],dreg_hi */
    829		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
    830		      STACK_VAR(dst_hi));
    831	}
    832	/* out: */
    833	*pprog = prog;
    834}
    835
    836/* dst = dst >> src */
    837static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
    838				     bool sstk, u8 **pprog)
    839{
    840	u8 *prog = *pprog;
    841	int cnt = 0;
    842	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    843	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    844
    845	if (dstk) {
    846		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    847		      STACK_VAR(dst_lo));
    848		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    849		      STACK_VAR(dst_hi));
    850	}
    851
    852	if (sstk)
    853		/* mov ecx,dword ptr [ebp+off] */
    854		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
    855		      STACK_VAR(src_lo));
    856	else
    857		/* mov ecx,src_lo */
    858		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
    859
    860	/* shrd dreg_lo,dreg_hi,cl */
    861	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
    862	/* shr dreg_hi,cl */
    863	EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
    864
    865	/* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */
    866
    867	/* cmp ecx,32 */
    868	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
    869	/* skip the next two instructions (4 bytes) when < 32 */
    870	EMIT2(IA32_JB, 4);
    871
    872	/* mov dreg_lo,dreg_hi */
    873	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
    874	/* xor dreg_hi,dreg_hi */
    875	EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
    876
    877	if (dstk) {
    878		/* mov dword ptr [ebp+off],dreg_lo */
    879		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
    880		      STACK_VAR(dst_lo));
    881		/* mov dword ptr [ebp+off],dreg_hi */
    882		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
    883		      STACK_VAR(dst_hi));
    884	}
    885	/* out: */
    886	*pprog = prog;
    887}
    888
    889/* dst = dst << val */
    890static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
    891				     bool dstk, u8 **pprog)
    892{
    893	u8 *prog = *pprog;
    894	int cnt = 0;
    895	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    896	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    897
    898	if (dstk) {
    899		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    900		      STACK_VAR(dst_lo));
    901		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    902		      STACK_VAR(dst_hi));
    903	}
    904	/* Do LSH operation */
    905	if (val < 32) {
    906		/* shld dreg_hi,dreg_lo,imm8 */
    907		EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val);
    908		/* shl dreg_lo,imm8 */
    909		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
    910	} else if (val >= 32 && val < 64) {
    911		u32 value = val - 32;
    912
    913		/* shl dreg_lo,imm8 */
    914		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
    915		/* mov dreg_hi,dreg_lo */
    916		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
    917		/* xor dreg_lo,dreg_lo */
    918		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
    919	} else {
    920		/* xor dreg_lo,dreg_lo */
    921		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
    922		/* xor dreg_hi,dreg_hi */
    923		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
    924	}
    925
    926	if (dstk) {
    927		/* mov dword ptr [ebp+off],dreg_lo */
    928		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
    929		      STACK_VAR(dst_lo));
    930		/* mov dword ptr [ebp+off],dreg_hi */
    931		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
    932		      STACK_VAR(dst_hi));
    933	}
    934	*pprog = prog;
    935}
    936
    937/* dst = dst >> val */
    938static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
    939				     bool dstk, u8 **pprog)
    940{
    941	u8 *prog = *pprog;
    942	int cnt = 0;
    943	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    944	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    945
    946	if (dstk) {
    947		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    948		      STACK_VAR(dst_lo));
    949		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    950		      STACK_VAR(dst_hi));
    951	}
    952
    953	/* Do RSH operation */
    954	if (val < 32) {
    955		/* shrd dreg_lo,dreg_hi,imm8 */
    956		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
    957		/* shr dreg_hi,imm8 */
    958		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
    959	} else if (val >= 32 && val < 64) {
    960		u32 value = val - 32;
    961
    962		/* shr dreg_hi,imm8 */
    963		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
    964		/* mov dreg_lo,dreg_hi */
    965		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
    966		/* xor dreg_hi,dreg_hi */
    967		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
    968	} else {
    969		/* xor dreg_lo,dreg_lo */
    970		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
    971		/* xor dreg_hi,dreg_hi */
    972		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
    973	}
    974
    975	if (dstk) {
    976		/* mov dword ptr [ebp+off],dreg_lo */
    977		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
    978		      STACK_VAR(dst_lo));
    979		/* mov dword ptr [ebp+off],dreg_hi */
    980		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
    981		      STACK_VAR(dst_hi));
    982	}
    983	*pprog = prog;
    984}
    985
    986/* dst = dst >> val (signed) */
    987static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
    988				      bool dstk, u8 **pprog)
    989{
    990	u8 *prog = *pprog;
    991	int cnt = 0;
    992	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
    993	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
    994
    995	if (dstk) {
    996		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
    997		      STACK_VAR(dst_lo));
    998		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
    999		      STACK_VAR(dst_hi));
   1000	}
   1001	/* Do RSH operation */
   1002	if (val < 32) {
   1003		/* shrd dreg_lo,dreg_hi,imm8 */
   1004		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
   1005		/* ashr dreg_hi,imm8 */
   1006		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
   1007	} else if (val >= 32 && val < 64) {
   1008		u32 value = val - 32;
   1009
   1010		/* ashr dreg_hi,imm8 */
   1011		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
   1012		/* mov dreg_lo,dreg_hi */
   1013		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
   1014
   1015		/* ashr dreg_hi,imm8 */
   1016		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
   1017	} else {
   1018		/* ashr dreg_hi,imm8 */
   1019		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
   1020		/* mov dreg_lo,dreg_hi */
   1021		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
   1022	}
   1023
   1024	if (dstk) {
   1025		/* mov dword ptr [ebp+off],dreg_lo */
   1026		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
   1027		      STACK_VAR(dst_lo));
   1028		/* mov dword ptr [ebp+off],dreg_hi */
   1029		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
   1030		      STACK_VAR(dst_hi));
   1031	}
   1032	*pprog = prog;
   1033}
   1034
   1035static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
   1036				     bool sstk, u8 **pprog)
   1037{
   1038	u8 *prog = *pprog;
   1039	int cnt = 0;
   1040
   1041	if (dstk)
   1042		/* mov eax,dword ptr [ebp+off] */
   1043		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   1044		      STACK_VAR(dst_hi));
   1045	else
   1046		/* mov eax,dst_hi */
   1047		EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
   1048
   1049	if (sstk)
   1050		/* mul dword ptr [ebp+off] */
   1051		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
   1052	else
   1053		/* mul src_lo */
   1054		EMIT2(0xF7, add_1reg(0xE0, src_lo));
   1055
   1056	/* mov ecx,eax */
   1057	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
   1058
   1059	if (dstk)
   1060		/* mov eax,dword ptr [ebp+off] */
   1061		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   1062		      STACK_VAR(dst_lo));
   1063	else
   1064		/* mov eax,dst_lo */
   1065		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
   1066
   1067	if (sstk)
   1068		/* mul dword ptr [ebp+off] */
   1069		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
   1070	else
   1071		/* mul src_hi */
   1072		EMIT2(0xF7, add_1reg(0xE0, src_hi));
   1073
   1074	/* add eax,eax */
   1075	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
   1076
   1077	if (dstk)
   1078		/* mov eax,dword ptr [ebp+off] */
   1079		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   1080		      STACK_VAR(dst_lo));
   1081	else
   1082		/* mov eax,dst_lo */
   1083		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
   1084
   1085	if (sstk)
   1086		/* mul dword ptr [ebp+off] */
   1087		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
   1088	else
   1089		/* mul src_lo */
   1090		EMIT2(0xF7, add_1reg(0xE0, src_lo));
   1091
   1092	/* add ecx,edx */
   1093	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
   1094
   1095	if (dstk) {
   1096		/* mov dword ptr [ebp+off],eax */
   1097		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
   1098		      STACK_VAR(dst_lo));
   1099		/* mov dword ptr [ebp+off],ecx */
   1100		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
   1101		      STACK_VAR(dst_hi));
   1102	} else {
   1103		/* mov dst_lo,eax */
   1104		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
   1105		/* mov dst_hi,ecx */
   1106		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
   1107	}
   1108
   1109	*pprog = prog;
   1110}
   1111
   1112static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
   1113				     bool dstk, u8 **pprog)
   1114{
   1115	u8 *prog = *pprog;
   1116	int cnt = 0;
   1117	u32 hi;
   1118
   1119	hi = val & (1<<31) ? (u32)~0 : 0;
   1120	/* movl eax,imm32 */
   1121	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
   1122	if (dstk)
   1123		/* mul dword ptr [ebp+off] */
   1124		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
   1125	else
   1126		/* mul dst_hi */
   1127		EMIT2(0xF7, add_1reg(0xE0, dst_hi));
   1128
   1129	/* mov ecx,eax */
   1130	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
   1131
   1132	/* movl eax,imm32 */
   1133	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
   1134	if (dstk)
   1135		/* mul dword ptr [ebp+off] */
   1136		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
   1137	else
   1138		/* mul dst_lo */
   1139		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
   1140	/* add ecx,eax */
   1141	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
   1142
   1143	/* movl eax,imm32 */
   1144	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
   1145	if (dstk)
   1146		/* mul dword ptr [ebp+off] */
   1147		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
   1148	else
   1149		/* mul dst_lo */
   1150		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
   1151
   1152	/* add ecx,edx */
   1153	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
   1154
   1155	if (dstk) {
   1156		/* mov dword ptr [ebp+off],eax */
   1157		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
   1158		      STACK_VAR(dst_lo));
   1159		/* mov dword ptr [ebp+off],ecx */
   1160		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
   1161		      STACK_VAR(dst_hi));
   1162	} else {
   1163		/* mov dword ptr [ebp+off],eax */
   1164		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
   1165		/* mov dword ptr [ebp+off],ecx */
   1166		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
   1167	}
   1168
   1169	*pprog = prog;
   1170}
   1171
   1172static int bpf_size_to_x86_bytes(int bpf_size)
   1173{
   1174	if (bpf_size == BPF_W)
   1175		return 4;
   1176	else if (bpf_size == BPF_H)
   1177		return 2;
   1178	else if (bpf_size == BPF_B)
   1179		return 1;
   1180	else if (bpf_size == BPF_DW)
   1181		return 4; /* imm32 */
   1182	else
   1183		return 0;
   1184}
   1185
   1186struct jit_context {
   1187	int cleanup_addr; /* Epilogue code offset */
   1188};
   1189
   1190/* Maximum number of bytes emitted while JITing one eBPF insn */
   1191#define BPF_MAX_INSN_SIZE	128
   1192#define BPF_INSN_SAFETY		64
   1193
   1194#define PROLOGUE_SIZE 35
   1195
   1196/*
   1197 * Emit prologue code for BPF program and check it's size.
   1198 * bpf_tail_call helper will skip it while jumping into another program.
   1199 */
   1200static void emit_prologue(u8 **pprog, u32 stack_depth)
   1201{
   1202	u8 *prog = *pprog;
   1203	int cnt = 0;
   1204	const u8 *r1 = bpf2ia32[BPF_REG_1];
   1205	const u8 fplo = bpf2ia32[BPF_REG_FP][0];
   1206	const u8 fphi = bpf2ia32[BPF_REG_FP][1];
   1207	const u8 *tcc = bpf2ia32[TCALL_CNT];
   1208
   1209	/* push ebp */
   1210	EMIT1(0x55);
   1211	/* mov ebp,esp */
   1212	EMIT2(0x89, 0xE5);
   1213	/* push edi */
   1214	EMIT1(0x57);
   1215	/* push esi */
   1216	EMIT1(0x56);
   1217	/* push ebx */
   1218	EMIT1(0x53);
   1219
   1220	/* sub esp,STACK_SIZE */
   1221	EMIT2_off32(0x81, 0xEC, STACK_SIZE);
   1222	/* sub ebp,SCRATCH_SIZE+12*/
   1223	EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
   1224	/* xor ebx,ebx */
   1225	EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
   1226
   1227	/* Set up BPF prog stack base register */
   1228	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
   1229	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
   1230
   1231	/* Move BPF_CTX (EAX) to BPF_REG_R1 */
   1232	/* mov dword ptr [ebp+off],eax */
   1233	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
   1234	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
   1235
   1236	/* Initialize Tail Count */
   1237	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
   1238	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
   1239
   1240	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
   1241	*pprog = prog;
   1242}
   1243
   1244/* Emit epilogue code for BPF program */
   1245static void emit_epilogue(u8 **pprog, u32 stack_depth)
   1246{
   1247	u8 *prog = *pprog;
   1248	const u8 *r0 = bpf2ia32[BPF_REG_0];
   1249	int cnt = 0;
   1250
   1251	/* mov eax,dword ptr [ebp+off]*/
   1252	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
   1253	/* mov edx,dword ptr [ebp+off]*/
   1254	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
   1255
   1256	/* add ebp,SCRATCH_SIZE+12*/
   1257	EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
   1258
   1259	/* mov ebx,dword ptr [ebp-12]*/
   1260	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
   1261	/* mov esi,dword ptr [ebp-8]*/
   1262	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
   1263	/* mov edi,dword ptr [ebp-4]*/
   1264	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
   1265
   1266	EMIT1(0xC9); /* leave */
   1267	EMIT1(0xC3); /* ret */
   1268	*pprog = prog;
   1269}
   1270
   1271static int emit_jmp_edx(u8 **pprog, u8 *ip)
   1272{
   1273	u8 *prog = *pprog;
   1274	int cnt = 0;
   1275
   1276#ifdef CONFIG_RETPOLINE
   1277	EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
   1278#else
   1279	EMIT2(0xFF, 0xE2);
   1280#endif
   1281	*pprog = prog;
   1282
   1283	return cnt;
   1284}
   1285
   1286/*
   1287 * Generate the following code:
   1288 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
   1289 *   if (index >= array->map.max_entries)
   1290 *     goto out;
   1291 *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
   1292 *     goto out;
   1293 *   prog = array->ptrs[index];
   1294 *   if (prog == NULL)
   1295 *     goto out;
   1296 *   goto *(prog->bpf_func + prologue_size);
   1297 * out:
   1298 */
   1299static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
   1300{
   1301	u8 *prog = *pprog;
   1302	int cnt = 0;
   1303	const u8 *r1 = bpf2ia32[BPF_REG_1];
   1304	const u8 *r2 = bpf2ia32[BPF_REG_2];
   1305	const u8 *r3 = bpf2ia32[BPF_REG_3];
   1306	const u8 *tcc = bpf2ia32[TCALL_CNT];
   1307	u32 lo, hi;
   1308	static int jmp_label1 = -1;
   1309
   1310	/*
   1311	 * if (index >= array->map.max_entries)
   1312	 *     goto out;
   1313	 */
   1314	/* mov eax,dword ptr [ebp+off] */
   1315	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
   1316	/* mov edx,dword ptr [ebp+off] */
   1317	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
   1318
   1319	/* cmp dword ptr [eax+off],edx */
   1320	EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
   1321	      offsetof(struct bpf_array, map.max_entries));
   1322	/* jbe out */
   1323	EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
   1324
   1325	/*
   1326	 * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
   1327	 *     goto out;
   1328	 */
   1329	lo = (u32)MAX_TAIL_CALL_CNT;
   1330	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
   1331	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
   1332	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
   1333
   1334	/* cmp edx,hi */
   1335	EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
   1336	EMIT2(IA32_JNE, 3);
   1337	/* cmp ecx,lo */
   1338	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
   1339
   1340	/* jae out */
   1341	EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
   1342
   1343	/* add eax,0x1 */
   1344	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
   1345	/* adc ebx,0x0 */
   1346	EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
   1347
   1348	/* mov dword ptr [ebp+off],eax */
   1349	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
   1350	/* mov dword ptr [ebp+off],edx */
   1351	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
   1352
   1353	/* prog = array->ptrs[index]; */
   1354	/* mov edx, [eax + edx * 4 + offsetof(...)] */
   1355	EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
   1356
   1357	/*
   1358	 * if (prog == NULL)
   1359	 *     goto out;
   1360	 */
   1361	/* test edx,edx */
   1362	EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
   1363	/* je out */
   1364	EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
   1365
   1366	/* goto *(prog->bpf_func + prologue_size); */
   1367	/* mov edx, dword ptr [edx + 32] */
   1368	EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
   1369	      offsetof(struct bpf_prog, bpf_func));
   1370	/* add edx,prologue_size */
   1371	EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
   1372
   1373	/* mov eax,dword ptr [ebp+off] */
   1374	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
   1375
   1376	/*
   1377	 * Now we're ready to jump into next BPF program:
   1378	 * eax == ctx (1st arg)
   1379	 * edx == prog->bpf_func + prologue_size
   1380	 */
   1381	cnt += emit_jmp_edx(&prog, ip + cnt);
   1382
   1383	if (jmp_label1 == -1)
   1384		jmp_label1 = cnt;
   1385
   1386	/* out: */
   1387	*pprog = prog;
   1388}
   1389
   1390/* Push the scratch stack register on top of the stack. */
   1391static inline void emit_push_r64(const u8 src[], u8 **pprog)
   1392{
   1393	u8 *prog = *pprog;
   1394	int cnt = 0;
   1395
   1396	/* mov ecx,dword ptr [ebp+off] */
   1397	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
   1398	/* push ecx */
   1399	EMIT1(0x51);
   1400
   1401	/* mov ecx,dword ptr [ebp+off] */
   1402	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
   1403	/* push ecx */
   1404	EMIT1(0x51);
   1405
   1406	*pprog = prog;
   1407}
   1408
   1409static void emit_push_r32(const u8 src[], u8 **pprog)
   1410{
   1411	u8 *prog = *pprog;
   1412	int cnt = 0;
   1413
   1414	/* mov ecx,dword ptr [ebp+off] */
   1415	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
   1416	/* push ecx */
   1417	EMIT1(0x51);
   1418
   1419	*pprog = prog;
   1420}
   1421
   1422static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
   1423{
   1424	u8 jmp_cond;
   1425
   1426	/* Convert BPF opcode to x86 */
   1427	switch (op) {
   1428	case BPF_JEQ:
   1429		jmp_cond = IA32_JE;
   1430		break;
   1431	case BPF_JSET:
   1432	case BPF_JNE:
   1433		jmp_cond = IA32_JNE;
   1434		break;
   1435	case BPF_JGT:
   1436		/* GT is unsigned '>', JA in x86 */
   1437		jmp_cond = IA32_JA;
   1438		break;
   1439	case BPF_JLT:
   1440		/* LT is unsigned '<', JB in x86 */
   1441		jmp_cond = IA32_JB;
   1442		break;
   1443	case BPF_JGE:
   1444		/* GE is unsigned '>=', JAE in x86 */
   1445		jmp_cond = IA32_JAE;
   1446		break;
   1447	case BPF_JLE:
   1448		/* LE is unsigned '<=', JBE in x86 */
   1449		jmp_cond = IA32_JBE;
   1450		break;
   1451	case BPF_JSGT:
   1452		if (!is_cmp_lo)
   1453			/* Signed '>', GT in x86 */
   1454			jmp_cond = IA32_JG;
   1455		else
   1456			/* GT is unsigned '>', JA in x86 */
   1457			jmp_cond = IA32_JA;
   1458		break;
   1459	case BPF_JSLT:
   1460		if (!is_cmp_lo)
   1461			/* Signed '<', LT in x86 */
   1462			jmp_cond = IA32_JL;
   1463		else
   1464			/* LT is unsigned '<', JB in x86 */
   1465			jmp_cond = IA32_JB;
   1466		break;
   1467	case BPF_JSGE:
   1468		if (!is_cmp_lo)
   1469			/* Signed '>=', GE in x86 */
   1470			jmp_cond = IA32_JGE;
   1471		else
   1472			/* GE is unsigned '>=', JAE in x86 */
   1473			jmp_cond = IA32_JAE;
   1474		break;
   1475	case BPF_JSLE:
   1476		if (!is_cmp_lo)
   1477			/* Signed '<=', LE in x86 */
   1478			jmp_cond = IA32_JLE;
   1479		else
   1480			/* LE is unsigned '<=', JBE in x86 */
   1481			jmp_cond = IA32_JBE;
   1482		break;
   1483	default: /* to silence GCC warning */
   1484		jmp_cond = COND_JMP_OPCODE_INVALID;
   1485		break;
   1486	}
   1487
   1488	return jmp_cond;
   1489}
   1490
   1491/* i386 kernel compiles with "-mregparm=3".  From gcc document:
   1492 *
   1493 * ==== snippet ====
   1494 * regparm (number)
   1495 *	On x86-32 targets, the regparm attribute causes the compiler
   1496 *	to pass arguments number one to (number) if they are of integral
   1497 *	type in registers EAX, EDX, and ECX instead of on the stack.
   1498 *	Functions that take a variable number of arguments continue
   1499 *	to be passed all of their arguments on the stack.
   1500 * ==== snippet ====
   1501 *
   1502 * The first three args of a function will be considered for
   1503 * putting into the 32bit register EAX, EDX, and ECX.
   1504 *
   1505 * Two 32bit registers are used to pass a 64bit arg.
   1506 *
   1507 * For example,
   1508 * void foo(u32 a, u32 b, u32 c, u32 d):
   1509 *	u32 a: EAX
   1510 *	u32 b: EDX
   1511 *	u32 c: ECX
   1512 *	u32 d: stack
   1513 *
   1514 * void foo(u64 a, u32 b, u32 c):
   1515 *	u64 a: EAX (lo32) EDX (hi32)
   1516 *	u32 b: ECX
   1517 *	u32 c: stack
   1518 *
   1519 * void foo(u32 a, u64 b, u32 c):
   1520 *	u32 a: EAX
   1521 *	u64 b: EDX (lo32) ECX (hi32)
   1522 *	u32 c: stack
   1523 *
   1524 * void foo(u32 a, u32 b, u64 c):
   1525 *	u32 a: EAX
   1526 *	u32 b: EDX
   1527 *	u64 c: stack
   1528 *
   1529 * The return value will be stored in the EAX (and EDX for 64bit value).
   1530 *
   1531 * For example,
   1532 * u32 foo(u32 a, u32 b, u32 c):
   1533 *	return value: EAX
   1534 *
   1535 * u64 foo(u32 a, u32 b, u32 c):
   1536 *	return value: EAX (lo32) EDX (hi32)
   1537 *
   1538 * Notes:
   1539 *	The verifier only accepts function having integer and pointers
   1540 *	as its args and return value, so it does not have
   1541 *	struct-by-value.
   1542 *
   1543 * emit_kfunc_call() finds out the btf_func_model by calling
   1544 * bpf_jit_find_kfunc_model().  A btf_func_model
   1545 * has the details about the number of args, size of each arg,
   1546 * and the size of the return value.
   1547 *
   1548 * It first decides how many args can be passed by EAX, EDX, and ECX.
   1549 * That will decide what args should be pushed to the stack:
   1550 * [first_stack_regno, last_stack_regno] are the bpf regnos
   1551 * that should be pushed to the stack.
   1552 *
   1553 * It will first push all args to the stack because the push
   1554 * will need to use ECX.  Then, it moves
   1555 * [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX.
   1556 *
   1557 * When emitting a call (0xE8), it needs to figure out
   1558 * the jmp_offset relative to the jit-insn address immediately
   1559 * following the call (0xE8) instruction.  At this point, it knows
   1560 * the end of the jit-insn address after completely translated the
   1561 * current (BPF_JMP | BPF_CALL) bpf-insn.  It is passed as "end_addr"
   1562 * to the emit_kfunc_call().  Thus, it can learn the "immediate-follow-call"
   1563 * address by figuring out how many jit-insn is generated between
   1564 * the call (0xE8) and the end_addr:
   1565 *	- 0-1 jit-insn (3 bytes each) to restore the esp pointer if there
   1566 *	  is arg pushed to the stack.
   1567 *	- 0-2 jit-insns (3 bytes each) to handle the return value.
   1568 */
   1569static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr,
   1570			   const struct bpf_insn *insn, u8 **pprog)
   1571{
   1572	const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX };
   1573	int i, cnt = 0, first_stack_regno, last_stack_regno;
   1574	int free_arg_regs = ARRAY_SIZE(arg_regs);
   1575	const struct btf_func_model *fm;
   1576	int bytes_in_stack = 0;
   1577	const u8 *cur_arg_reg;
   1578	u8 *prog = *pprog;
   1579	s64 jmp_offset;
   1580
   1581	fm = bpf_jit_find_kfunc_model(bpf_prog, insn);
   1582	if (!fm)
   1583		return -EINVAL;
   1584
   1585	first_stack_regno = BPF_REG_1;
   1586	for (i = 0; i < fm->nr_args; i++) {
   1587		int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1;
   1588
   1589		if (regs_needed > free_arg_regs)
   1590			break;
   1591
   1592		free_arg_regs -= regs_needed;
   1593		first_stack_regno++;
   1594	}
   1595
   1596	/* Push the args to the stack */
   1597	last_stack_regno = BPF_REG_0 + fm->nr_args;
   1598	for (i = last_stack_regno; i >= first_stack_regno; i--) {
   1599		if (fm->arg_size[i - 1] > sizeof(u32)) {
   1600			emit_push_r64(bpf2ia32[i], &prog);
   1601			bytes_in_stack += 8;
   1602		} else {
   1603			emit_push_r32(bpf2ia32[i], &prog);
   1604			bytes_in_stack += 4;
   1605		}
   1606	}
   1607
   1608	cur_arg_reg = &arg_regs[0];
   1609	for (i = BPF_REG_1; i < first_stack_regno; i++) {
   1610		/* mov e[adc]x,dword ptr [ebp+off] */
   1611		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
   1612		      STACK_VAR(bpf2ia32[i][0]));
   1613		if (fm->arg_size[i - 1] > sizeof(u32))
   1614			/* mov e[adc]x,dword ptr [ebp+off] */
   1615			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
   1616			      STACK_VAR(bpf2ia32[i][1]));
   1617	}
   1618
   1619	if (bytes_in_stack)
   1620		/* add esp,"bytes_in_stack" */
   1621		end_addr -= 3;
   1622
   1623	/* mov dword ptr [ebp+off],edx */
   1624	if (fm->ret_size > sizeof(u32))
   1625		end_addr -= 3;
   1626
   1627	/* mov dword ptr [ebp+off],eax */
   1628	if (fm->ret_size)
   1629		end_addr -= 3;
   1630
   1631	jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr;
   1632	if (!is_simm32(jmp_offset)) {
   1633		pr_err("unsupported BPF kernel function jmp_offset:%lld\n",
   1634		       jmp_offset);
   1635		return -EINVAL;
   1636	}
   1637
   1638	EMIT1_off32(0xE8, jmp_offset);
   1639
   1640	if (fm->ret_size)
   1641		/* mov dword ptr [ebp+off],eax */
   1642		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
   1643		      STACK_VAR(bpf2ia32[BPF_REG_0][0]));
   1644
   1645	if (fm->ret_size > sizeof(u32))
   1646		/* mov dword ptr [ebp+off],edx */
   1647		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
   1648		      STACK_VAR(bpf2ia32[BPF_REG_0][1]));
   1649
   1650	if (bytes_in_stack)
   1651		/* add esp,"bytes_in_stack" */
   1652		EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack);
   1653
   1654	*pprog = prog;
   1655
   1656	return 0;
   1657}
   1658
   1659static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
   1660		  int oldproglen, struct jit_context *ctx)
   1661{
   1662	struct bpf_insn *insn = bpf_prog->insnsi;
   1663	int insn_cnt = bpf_prog->len;
   1664	bool seen_exit = false;
   1665	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
   1666	int i, cnt = 0;
   1667	int proglen = 0;
   1668	u8 *prog = temp;
   1669
   1670	emit_prologue(&prog, bpf_prog->aux->stack_depth);
   1671
   1672	for (i = 0; i < insn_cnt; i++, insn++) {
   1673		const s32 imm32 = insn->imm;
   1674		const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
   1675		const bool dstk = insn->dst_reg != BPF_REG_AX;
   1676		const bool sstk = insn->src_reg != BPF_REG_AX;
   1677		const u8 code = insn->code;
   1678		const u8 *dst = bpf2ia32[insn->dst_reg];
   1679		const u8 *src = bpf2ia32[insn->src_reg];
   1680		const u8 *r0 = bpf2ia32[BPF_REG_0];
   1681		s64 jmp_offset;
   1682		u8 jmp_cond;
   1683		int ilen;
   1684		u8 *func;
   1685
   1686		switch (code) {
   1687		/* ALU operations */
   1688		/* dst = src */
   1689		case BPF_ALU | BPF_MOV | BPF_K:
   1690		case BPF_ALU | BPF_MOV | BPF_X:
   1691		case BPF_ALU64 | BPF_MOV | BPF_K:
   1692		case BPF_ALU64 | BPF_MOV | BPF_X:
   1693			switch (BPF_SRC(code)) {
   1694			case BPF_X:
   1695				if (imm32 == 1) {
   1696					/* Special mov32 for zext. */
   1697					emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
   1698					break;
   1699				}
   1700				emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
   1701						  &prog, bpf_prog->aux);
   1702				break;
   1703			case BPF_K:
   1704				/* Sign-extend immediate value to dst reg */
   1705				emit_ia32_mov_i64(is64, dst, imm32,
   1706						  dstk, &prog);
   1707				break;
   1708			}
   1709			break;
   1710		/* dst = dst + src/imm */
   1711		/* dst = dst - src/imm */
   1712		/* dst = dst | src/imm */
   1713		/* dst = dst & src/imm */
   1714		/* dst = dst ^ src/imm */
   1715		/* dst = dst * src/imm */
   1716		/* dst = dst << src */
   1717		/* dst = dst >> src */
   1718		case BPF_ALU | BPF_ADD | BPF_K:
   1719		case BPF_ALU | BPF_ADD | BPF_X:
   1720		case BPF_ALU | BPF_SUB | BPF_K:
   1721		case BPF_ALU | BPF_SUB | BPF_X:
   1722		case BPF_ALU | BPF_OR | BPF_K:
   1723		case BPF_ALU | BPF_OR | BPF_X:
   1724		case BPF_ALU | BPF_AND | BPF_K:
   1725		case BPF_ALU | BPF_AND | BPF_X:
   1726		case BPF_ALU | BPF_XOR | BPF_K:
   1727		case BPF_ALU | BPF_XOR | BPF_X:
   1728		case BPF_ALU64 | BPF_ADD | BPF_K:
   1729		case BPF_ALU64 | BPF_ADD | BPF_X:
   1730		case BPF_ALU64 | BPF_SUB | BPF_K:
   1731		case BPF_ALU64 | BPF_SUB | BPF_X:
   1732		case BPF_ALU64 | BPF_OR | BPF_K:
   1733		case BPF_ALU64 | BPF_OR | BPF_X:
   1734		case BPF_ALU64 | BPF_AND | BPF_K:
   1735		case BPF_ALU64 | BPF_AND | BPF_X:
   1736		case BPF_ALU64 | BPF_XOR | BPF_K:
   1737		case BPF_ALU64 | BPF_XOR | BPF_X:
   1738			switch (BPF_SRC(code)) {
   1739			case BPF_X:
   1740				emit_ia32_alu_r64(is64, BPF_OP(code), dst,
   1741						  src, dstk, sstk, &prog,
   1742						  bpf_prog->aux);
   1743				break;
   1744			case BPF_K:
   1745				emit_ia32_alu_i64(is64, BPF_OP(code), dst,
   1746						  imm32, dstk, &prog,
   1747						  bpf_prog->aux);
   1748				break;
   1749			}
   1750			break;
   1751		case BPF_ALU | BPF_MUL | BPF_K:
   1752		case BPF_ALU | BPF_MUL | BPF_X:
   1753			switch (BPF_SRC(code)) {
   1754			case BPF_X:
   1755				emit_ia32_mul_r(dst_lo, src_lo, dstk,
   1756						sstk, &prog);
   1757				break;
   1758			case BPF_K:
   1759				/* mov ecx,imm32*/
   1760				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
   1761					    imm32);
   1762				emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
   1763						false, &prog);
   1764				break;
   1765			}
   1766			if (!bpf_prog->aux->verifier_zext)
   1767				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
   1768			break;
   1769		case BPF_ALU | BPF_LSH | BPF_X:
   1770		case BPF_ALU | BPF_RSH | BPF_X:
   1771		case BPF_ALU | BPF_ARSH | BPF_K:
   1772		case BPF_ALU | BPF_ARSH | BPF_X:
   1773			switch (BPF_SRC(code)) {
   1774			case BPF_X:
   1775				emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
   1776						  dstk, sstk, &prog);
   1777				break;
   1778			case BPF_K:
   1779				/* mov ecx,imm32*/
   1780				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
   1781					    imm32);
   1782				emit_ia32_shift_r(BPF_OP(code), dst_lo,
   1783						  IA32_ECX, dstk, false,
   1784						  &prog);
   1785				break;
   1786			}
   1787			if (!bpf_prog->aux->verifier_zext)
   1788				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
   1789			break;
   1790		/* dst = dst / src(imm) */
   1791		/* dst = dst % src(imm) */
   1792		case BPF_ALU | BPF_DIV | BPF_K:
   1793		case BPF_ALU | BPF_DIV | BPF_X:
   1794		case BPF_ALU | BPF_MOD | BPF_K:
   1795		case BPF_ALU | BPF_MOD | BPF_X:
   1796			switch (BPF_SRC(code)) {
   1797			case BPF_X:
   1798				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
   1799						    src_lo, dstk, sstk, &prog);
   1800				break;
   1801			case BPF_K:
   1802				/* mov ecx,imm32*/
   1803				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
   1804					    imm32);
   1805				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
   1806						    IA32_ECX, dstk, false,
   1807						    &prog);
   1808				break;
   1809			}
   1810			if (!bpf_prog->aux->verifier_zext)
   1811				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
   1812			break;
   1813		case BPF_ALU64 | BPF_DIV | BPF_K:
   1814		case BPF_ALU64 | BPF_DIV | BPF_X:
   1815		case BPF_ALU64 | BPF_MOD | BPF_K:
   1816		case BPF_ALU64 | BPF_MOD | BPF_X:
   1817			goto notyet;
   1818		/* dst = dst >> imm */
   1819		/* dst = dst << imm */
   1820		case BPF_ALU | BPF_RSH | BPF_K:
   1821		case BPF_ALU | BPF_LSH | BPF_K:
   1822			if (unlikely(imm32 > 31))
   1823				return -EINVAL;
   1824			/* mov ecx,imm32*/
   1825			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
   1826			emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
   1827					  false, &prog);
   1828			if (!bpf_prog->aux->verifier_zext)
   1829				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
   1830			break;
   1831		/* dst = dst << imm */
   1832		case BPF_ALU64 | BPF_LSH | BPF_K:
   1833			if (unlikely(imm32 > 63))
   1834				return -EINVAL;
   1835			emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
   1836			break;
   1837		/* dst = dst >> imm */
   1838		case BPF_ALU64 | BPF_RSH | BPF_K:
   1839			if (unlikely(imm32 > 63))
   1840				return -EINVAL;
   1841			emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
   1842			break;
   1843		/* dst = dst << src */
   1844		case BPF_ALU64 | BPF_LSH | BPF_X:
   1845			emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
   1846			break;
   1847		/* dst = dst >> src */
   1848		case BPF_ALU64 | BPF_RSH | BPF_X:
   1849			emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
   1850			break;
   1851		/* dst = dst >> src (signed) */
   1852		case BPF_ALU64 | BPF_ARSH | BPF_X:
   1853			emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
   1854			break;
   1855		/* dst = dst >> imm (signed) */
   1856		case BPF_ALU64 | BPF_ARSH | BPF_K:
   1857			if (unlikely(imm32 > 63))
   1858				return -EINVAL;
   1859			emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
   1860			break;
   1861		/* dst = ~dst */
   1862		case BPF_ALU | BPF_NEG:
   1863			emit_ia32_alu_i(is64, false, BPF_OP(code),
   1864					dst_lo, 0, dstk, &prog);
   1865			if (!bpf_prog->aux->verifier_zext)
   1866				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
   1867			break;
   1868		/* dst = ~dst (64 bit) */
   1869		case BPF_ALU64 | BPF_NEG:
   1870			emit_ia32_neg64(dst, dstk, &prog);
   1871			break;
   1872		/* dst = dst * src/imm */
   1873		case BPF_ALU64 | BPF_MUL | BPF_X:
   1874		case BPF_ALU64 | BPF_MUL | BPF_K:
   1875			switch (BPF_SRC(code)) {
   1876			case BPF_X:
   1877				emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
   1878				break;
   1879			case BPF_K:
   1880				emit_ia32_mul_i64(dst, imm32, dstk, &prog);
   1881				break;
   1882			}
   1883			break;
   1884		/* dst = htole(dst) */
   1885		case BPF_ALU | BPF_END | BPF_FROM_LE:
   1886			emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
   1887					    bpf_prog->aux);
   1888			break;
   1889		/* dst = htobe(dst) */
   1890		case BPF_ALU | BPF_END | BPF_FROM_BE:
   1891			emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
   1892					    bpf_prog->aux);
   1893			break;
   1894		/* dst = imm64 */
   1895		case BPF_LD | BPF_IMM | BPF_DW: {
   1896			s32 hi, lo = imm32;
   1897
   1898			hi = insn[1].imm;
   1899			emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
   1900			emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
   1901			insn++;
   1902			i++;
   1903			break;
   1904		}
   1905		/* speculation barrier */
   1906		case BPF_ST | BPF_NOSPEC:
   1907			if (boot_cpu_has(X86_FEATURE_XMM2))
   1908				/* Emit 'lfence' */
   1909				EMIT3(0x0F, 0xAE, 0xE8);
   1910			break;
   1911		/* ST: *(u8*)(dst_reg + off) = imm */
   1912		case BPF_ST | BPF_MEM | BPF_H:
   1913		case BPF_ST | BPF_MEM | BPF_B:
   1914		case BPF_ST | BPF_MEM | BPF_W:
   1915		case BPF_ST | BPF_MEM | BPF_DW:
   1916			if (dstk)
   1917				/* mov eax,dword ptr [ebp+off] */
   1918				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   1919				      STACK_VAR(dst_lo));
   1920			else
   1921				/* mov eax,dst_lo */
   1922				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
   1923
   1924			switch (BPF_SIZE(code)) {
   1925			case BPF_B:
   1926				EMIT(0xC6, 1); break;
   1927			case BPF_H:
   1928				EMIT2(0x66, 0xC7); break;
   1929			case BPF_W:
   1930			case BPF_DW:
   1931				EMIT(0xC7, 1); break;
   1932			}
   1933
   1934			if (is_imm8(insn->off))
   1935				EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
   1936			else
   1937				EMIT1_off32(add_1reg(0x80, IA32_EAX),
   1938					    insn->off);
   1939			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
   1940
   1941			if (BPF_SIZE(code) == BPF_DW) {
   1942				u32 hi;
   1943
   1944				hi = imm32 & (1<<31) ? (u32)~0 : 0;
   1945				EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
   1946					    insn->off + 4);
   1947				EMIT(hi, 4);
   1948			}
   1949			break;
   1950
   1951		/* STX: *(u8*)(dst_reg + off) = src_reg */
   1952		case BPF_STX | BPF_MEM | BPF_B:
   1953		case BPF_STX | BPF_MEM | BPF_H:
   1954		case BPF_STX | BPF_MEM | BPF_W:
   1955		case BPF_STX | BPF_MEM | BPF_DW:
   1956			if (dstk)
   1957				/* mov eax,dword ptr [ebp+off] */
   1958				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   1959				      STACK_VAR(dst_lo));
   1960			else
   1961				/* mov eax,dst_lo */
   1962				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
   1963
   1964			if (sstk)
   1965				/* mov edx,dword ptr [ebp+off] */
   1966				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
   1967				      STACK_VAR(src_lo));
   1968			else
   1969				/* mov edx,src_lo */
   1970				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
   1971
   1972			switch (BPF_SIZE(code)) {
   1973			case BPF_B:
   1974				EMIT(0x88, 1); break;
   1975			case BPF_H:
   1976				EMIT2(0x66, 0x89); break;
   1977			case BPF_W:
   1978			case BPF_DW:
   1979				EMIT(0x89, 1); break;
   1980			}
   1981
   1982			if (is_imm8(insn->off))
   1983				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
   1984				      insn->off);
   1985			else
   1986				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
   1987					    insn->off);
   1988
   1989			if (BPF_SIZE(code) == BPF_DW) {
   1990				if (sstk)
   1991					/* mov edi,dword ptr [ebp+off] */
   1992					EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
   1993							     IA32_EDX),
   1994					      STACK_VAR(src_hi));
   1995				else
   1996					/* mov edi,src_hi */
   1997					EMIT2(0x8B, add_2reg(0xC0, src_hi,
   1998							     IA32_EDX));
   1999				EMIT1(0x89);
   2000				if (is_imm8(insn->off + 4)) {
   2001					EMIT2(add_2reg(0x40, IA32_EAX,
   2002						       IA32_EDX),
   2003					      insn->off + 4);
   2004				} else {
   2005					EMIT1(add_2reg(0x80, IA32_EAX,
   2006						       IA32_EDX));
   2007					EMIT(insn->off + 4, 4);
   2008				}
   2009			}
   2010			break;
   2011
   2012		/* LDX: dst_reg = *(u8*)(src_reg + off) */
   2013		case BPF_LDX | BPF_MEM | BPF_B:
   2014		case BPF_LDX | BPF_MEM | BPF_H:
   2015		case BPF_LDX | BPF_MEM | BPF_W:
   2016		case BPF_LDX | BPF_MEM | BPF_DW:
   2017			if (sstk)
   2018				/* mov eax,dword ptr [ebp+off] */
   2019				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2020				      STACK_VAR(src_lo));
   2021			else
   2022				/* mov eax,dword ptr [ebp+off] */
   2023				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
   2024
   2025			switch (BPF_SIZE(code)) {
   2026			case BPF_B:
   2027				EMIT2(0x0F, 0xB6); break;
   2028			case BPF_H:
   2029				EMIT2(0x0F, 0xB7); break;
   2030			case BPF_W:
   2031			case BPF_DW:
   2032				EMIT(0x8B, 1); break;
   2033			}
   2034
   2035			if (is_imm8(insn->off))
   2036				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
   2037				      insn->off);
   2038			else
   2039				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
   2040					    insn->off);
   2041
   2042			if (dstk)
   2043				/* mov dword ptr [ebp+off],edx */
   2044				EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
   2045				      STACK_VAR(dst_lo));
   2046			else
   2047				/* mov dst_lo,edx */
   2048				EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
   2049			switch (BPF_SIZE(code)) {
   2050			case BPF_B:
   2051			case BPF_H:
   2052			case BPF_W:
   2053				if (bpf_prog->aux->verifier_zext)
   2054					break;
   2055				if (dstk) {
   2056					EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
   2057					      STACK_VAR(dst_hi));
   2058					EMIT(0x0, 4);
   2059				} else {
   2060					/* xor dst_hi,dst_hi */
   2061					EMIT2(0x33,
   2062					      add_2reg(0xC0, dst_hi, dst_hi));
   2063				}
   2064				break;
   2065			case BPF_DW:
   2066				EMIT2_off32(0x8B,
   2067					    add_2reg(0x80, IA32_EAX, IA32_EDX),
   2068					    insn->off + 4);
   2069				if (dstk)
   2070					EMIT3(0x89,
   2071					      add_2reg(0x40, IA32_EBP,
   2072						       IA32_EDX),
   2073					      STACK_VAR(dst_hi));
   2074				else
   2075					EMIT2(0x89,
   2076					      add_2reg(0xC0, dst_hi, IA32_EDX));
   2077				break;
   2078			default:
   2079				break;
   2080			}
   2081			break;
   2082		/* call */
   2083		case BPF_JMP | BPF_CALL:
   2084		{
   2085			const u8 *r1 = bpf2ia32[BPF_REG_1];
   2086			const u8 *r2 = bpf2ia32[BPF_REG_2];
   2087			const u8 *r3 = bpf2ia32[BPF_REG_3];
   2088			const u8 *r4 = bpf2ia32[BPF_REG_4];
   2089			const u8 *r5 = bpf2ia32[BPF_REG_5];
   2090
   2091			if (insn->src_reg == BPF_PSEUDO_CALL)
   2092				goto notyet;
   2093
   2094			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
   2095				int err;
   2096
   2097				err = emit_kfunc_call(bpf_prog,
   2098						      image + addrs[i],
   2099						      insn, &prog);
   2100
   2101				if (err)
   2102					return err;
   2103				break;
   2104			}
   2105
   2106			func = (u8 *) __bpf_call_base + imm32;
   2107			jmp_offset = func - (image + addrs[i]);
   2108
   2109			if (!imm32 || !is_simm32(jmp_offset)) {
   2110				pr_err("unsupported BPF func %d addr %p image %p\n",
   2111				       imm32, func, image);
   2112				return -EINVAL;
   2113			}
   2114
   2115			/* mov eax,dword ptr [ebp+off] */
   2116			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2117			      STACK_VAR(r1[0]));
   2118			/* mov edx,dword ptr [ebp+off] */
   2119			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
   2120			      STACK_VAR(r1[1]));
   2121
   2122			emit_push_r64(r5, &prog);
   2123			emit_push_r64(r4, &prog);
   2124			emit_push_r64(r3, &prog);
   2125			emit_push_r64(r2, &prog);
   2126
   2127			EMIT1_off32(0xE8, jmp_offset + 9);
   2128
   2129			/* mov dword ptr [ebp+off],eax */
   2130			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2131			      STACK_VAR(r0[0]));
   2132			/* mov dword ptr [ebp+off],edx */
   2133			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
   2134			      STACK_VAR(r0[1]));
   2135
   2136			/* add esp,32 */
   2137			EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
   2138			break;
   2139		}
   2140		case BPF_JMP | BPF_TAIL_CALL:
   2141			emit_bpf_tail_call(&prog, image + addrs[i - 1]);
   2142			break;
   2143
   2144		/* cond jump */
   2145		case BPF_JMP | BPF_JEQ | BPF_X:
   2146		case BPF_JMP | BPF_JNE | BPF_X:
   2147		case BPF_JMP | BPF_JGT | BPF_X:
   2148		case BPF_JMP | BPF_JLT | BPF_X:
   2149		case BPF_JMP | BPF_JGE | BPF_X:
   2150		case BPF_JMP | BPF_JLE | BPF_X:
   2151		case BPF_JMP32 | BPF_JEQ | BPF_X:
   2152		case BPF_JMP32 | BPF_JNE | BPF_X:
   2153		case BPF_JMP32 | BPF_JGT | BPF_X:
   2154		case BPF_JMP32 | BPF_JLT | BPF_X:
   2155		case BPF_JMP32 | BPF_JGE | BPF_X:
   2156		case BPF_JMP32 | BPF_JLE | BPF_X:
   2157		case BPF_JMP32 | BPF_JSGT | BPF_X:
   2158		case BPF_JMP32 | BPF_JSLE | BPF_X:
   2159		case BPF_JMP32 | BPF_JSLT | BPF_X:
   2160		case BPF_JMP32 | BPF_JSGE | BPF_X: {
   2161			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
   2162			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
   2163			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
   2164			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
   2165			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
   2166
   2167			if (dstk) {
   2168				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2169				      STACK_VAR(dst_lo));
   2170				if (is_jmp64)
   2171					EMIT3(0x8B,
   2172					      add_2reg(0x40, IA32_EBP,
   2173						       IA32_EDX),
   2174					      STACK_VAR(dst_hi));
   2175			}
   2176
   2177			if (sstk) {
   2178				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
   2179				      STACK_VAR(src_lo));
   2180				if (is_jmp64)
   2181					EMIT3(0x8B,
   2182					      add_2reg(0x40, IA32_EBP,
   2183						       IA32_EBX),
   2184					      STACK_VAR(src_hi));
   2185			}
   2186
   2187			if (is_jmp64) {
   2188				/* cmp dreg_hi,sreg_hi */
   2189				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
   2190				EMIT2(IA32_JNE, 2);
   2191			}
   2192			/* cmp dreg_lo,sreg_lo */
   2193			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
   2194			goto emit_cond_jmp;
   2195		}
   2196		case BPF_JMP | BPF_JSGT | BPF_X:
   2197		case BPF_JMP | BPF_JSLE | BPF_X:
   2198		case BPF_JMP | BPF_JSLT | BPF_X:
   2199		case BPF_JMP | BPF_JSGE | BPF_X: {
   2200			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
   2201			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
   2202			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
   2203			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
   2204
   2205			if (dstk) {
   2206				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2207				      STACK_VAR(dst_lo));
   2208				EMIT3(0x8B,
   2209				      add_2reg(0x40, IA32_EBP,
   2210					       IA32_EDX),
   2211				      STACK_VAR(dst_hi));
   2212			}
   2213
   2214			if (sstk) {
   2215				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
   2216				      STACK_VAR(src_lo));
   2217				EMIT3(0x8B,
   2218				      add_2reg(0x40, IA32_EBP,
   2219					       IA32_EBX),
   2220				      STACK_VAR(src_hi));
   2221			}
   2222
   2223			/* cmp dreg_hi,sreg_hi */
   2224			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
   2225			EMIT2(IA32_JNE, 10);
   2226			/* cmp dreg_lo,sreg_lo */
   2227			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
   2228			goto emit_cond_jmp_signed;
   2229		}
   2230		case BPF_JMP | BPF_JSET | BPF_X:
   2231		case BPF_JMP32 | BPF_JSET | BPF_X: {
   2232			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
   2233			u8 dreg_lo = IA32_EAX;
   2234			u8 dreg_hi = IA32_EDX;
   2235			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
   2236			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
   2237
   2238			if (dstk) {
   2239				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2240				      STACK_VAR(dst_lo));
   2241				if (is_jmp64)
   2242					EMIT3(0x8B,
   2243					      add_2reg(0x40, IA32_EBP,
   2244						       IA32_EDX),
   2245					      STACK_VAR(dst_hi));
   2246			} else {
   2247				/* mov dreg_lo,dst_lo */
   2248				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
   2249				if (is_jmp64)
   2250					/* mov dreg_hi,dst_hi */
   2251					EMIT2(0x89,
   2252					      add_2reg(0xC0, dreg_hi, dst_hi));
   2253			}
   2254
   2255			if (sstk) {
   2256				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
   2257				      STACK_VAR(src_lo));
   2258				if (is_jmp64)
   2259					EMIT3(0x8B,
   2260					      add_2reg(0x40, IA32_EBP,
   2261						       IA32_EBX),
   2262					      STACK_VAR(src_hi));
   2263			}
   2264			/* and dreg_lo,sreg_lo */
   2265			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
   2266			if (is_jmp64) {
   2267				/* and dreg_hi,sreg_hi */
   2268				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
   2269				/* or dreg_lo,dreg_hi */
   2270				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
   2271			}
   2272			goto emit_cond_jmp;
   2273		}
   2274		case BPF_JMP | BPF_JSET | BPF_K:
   2275		case BPF_JMP32 | BPF_JSET | BPF_K: {
   2276			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
   2277			u8 dreg_lo = IA32_EAX;
   2278			u8 dreg_hi = IA32_EDX;
   2279			u8 sreg_lo = IA32_ECX;
   2280			u8 sreg_hi = IA32_EBX;
   2281			u32 hi;
   2282
   2283			if (dstk) {
   2284				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2285				      STACK_VAR(dst_lo));
   2286				if (is_jmp64)
   2287					EMIT3(0x8B,
   2288					      add_2reg(0x40, IA32_EBP,
   2289						       IA32_EDX),
   2290					      STACK_VAR(dst_hi));
   2291			} else {
   2292				/* mov dreg_lo,dst_lo */
   2293				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
   2294				if (is_jmp64)
   2295					/* mov dreg_hi,dst_hi */
   2296					EMIT2(0x89,
   2297					      add_2reg(0xC0, dreg_hi, dst_hi));
   2298			}
   2299
   2300			/* mov ecx,imm32 */
   2301			EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
   2302
   2303			/* and dreg_lo,sreg_lo */
   2304			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
   2305			if (is_jmp64) {
   2306				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
   2307				/* mov ebx,imm32 */
   2308				EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
   2309				/* and dreg_hi,sreg_hi */
   2310				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
   2311				/* or dreg_lo,dreg_hi */
   2312				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
   2313			}
   2314			goto emit_cond_jmp;
   2315		}
   2316		case BPF_JMP | BPF_JEQ | BPF_K:
   2317		case BPF_JMP | BPF_JNE | BPF_K:
   2318		case BPF_JMP | BPF_JGT | BPF_K:
   2319		case BPF_JMP | BPF_JLT | BPF_K:
   2320		case BPF_JMP | BPF_JGE | BPF_K:
   2321		case BPF_JMP | BPF_JLE | BPF_K:
   2322		case BPF_JMP32 | BPF_JEQ | BPF_K:
   2323		case BPF_JMP32 | BPF_JNE | BPF_K:
   2324		case BPF_JMP32 | BPF_JGT | BPF_K:
   2325		case BPF_JMP32 | BPF_JLT | BPF_K:
   2326		case BPF_JMP32 | BPF_JGE | BPF_K:
   2327		case BPF_JMP32 | BPF_JLE | BPF_K:
   2328		case BPF_JMP32 | BPF_JSGT | BPF_K:
   2329		case BPF_JMP32 | BPF_JSLE | BPF_K:
   2330		case BPF_JMP32 | BPF_JSLT | BPF_K:
   2331		case BPF_JMP32 | BPF_JSGE | BPF_K: {
   2332			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
   2333			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
   2334			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
   2335			u8 sreg_lo = IA32_ECX;
   2336			u8 sreg_hi = IA32_EBX;
   2337			u32 hi;
   2338
   2339			if (dstk) {
   2340				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2341				      STACK_VAR(dst_lo));
   2342				if (is_jmp64)
   2343					EMIT3(0x8B,
   2344					      add_2reg(0x40, IA32_EBP,
   2345						       IA32_EDX),
   2346					      STACK_VAR(dst_hi));
   2347			}
   2348
   2349			/* mov ecx,imm32 */
   2350			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
   2351			if (is_jmp64) {
   2352				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
   2353				/* mov ebx,imm32 */
   2354				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
   2355				/* cmp dreg_hi,sreg_hi */
   2356				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
   2357				EMIT2(IA32_JNE, 2);
   2358			}
   2359			/* cmp dreg_lo,sreg_lo */
   2360			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
   2361
   2362emit_cond_jmp:		jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
   2363			if (jmp_cond == COND_JMP_OPCODE_INVALID)
   2364				return -EFAULT;
   2365			jmp_offset = addrs[i + insn->off] - addrs[i];
   2366			if (is_imm8(jmp_offset)) {
   2367				EMIT2(jmp_cond, jmp_offset);
   2368			} else if (is_simm32(jmp_offset)) {
   2369				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
   2370			} else {
   2371				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
   2372				return -EFAULT;
   2373			}
   2374			break;
   2375		}
   2376		case BPF_JMP | BPF_JSGT | BPF_K:
   2377		case BPF_JMP | BPF_JSLE | BPF_K:
   2378		case BPF_JMP | BPF_JSLT | BPF_K:
   2379		case BPF_JMP | BPF_JSGE | BPF_K: {
   2380			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
   2381			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
   2382			u8 sreg_lo = IA32_ECX;
   2383			u8 sreg_hi = IA32_EBX;
   2384			u32 hi;
   2385
   2386			if (dstk) {
   2387				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
   2388				      STACK_VAR(dst_lo));
   2389				EMIT3(0x8B,
   2390				      add_2reg(0x40, IA32_EBP,
   2391					       IA32_EDX),
   2392				      STACK_VAR(dst_hi));
   2393			}
   2394
   2395			/* mov ecx,imm32 */
   2396			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
   2397			hi = imm32 & (1 << 31) ? (u32)~0 : 0;
   2398			/* mov ebx,imm32 */
   2399			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
   2400			/* cmp dreg_hi,sreg_hi */
   2401			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
   2402			EMIT2(IA32_JNE, 10);
   2403			/* cmp dreg_lo,sreg_lo */
   2404			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
   2405
   2406			/*
   2407			 * For simplicity of branch offset computation,
   2408			 * let's use fixed jump coding here.
   2409			 */
   2410emit_cond_jmp_signed:	/* Check the condition for low 32-bit comparison */
   2411			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true);
   2412			if (jmp_cond == COND_JMP_OPCODE_INVALID)
   2413				return -EFAULT;
   2414			jmp_offset = addrs[i + insn->off] - addrs[i] + 8;
   2415			if (is_simm32(jmp_offset)) {
   2416				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
   2417			} else {
   2418				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
   2419				return -EFAULT;
   2420			}
   2421			EMIT2(0xEB, 6);
   2422
   2423			/* Check the condition for high 32-bit comparison */
   2424			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
   2425			if (jmp_cond == COND_JMP_OPCODE_INVALID)
   2426				return -EFAULT;
   2427			jmp_offset = addrs[i + insn->off] - addrs[i];
   2428			if (is_simm32(jmp_offset)) {
   2429				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
   2430			} else {
   2431				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
   2432				return -EFAULT;
   2433			}
   2434			break;
   2435		}
   2436		case BPF_JMP | BPF_JA:
   2437			if (insn->off == -1)
   2438				/* -1 jmp instructions will always jump
   2439				 * backwards two bytes. Explicitly handling
   2440				 * this case avoids wasting too many passes
   2441				 * when there are long sequences of replaced
   2442				 * dead code.
   2443				 */
   2444				jmp_offset = -2;
   2445			else
   2446				jmp_offset = addrs[i + insn->off] - addrs[i];
   2447
   2448			if (!jmp_offset)
   2449				/* Optimize out nop jumps */
   2450				break;
   2451emit_jmp:
   2452			if (is_imm8(jmp_offset)) {
   2453				EMIT2(0xEB, jmp_offset);
   2454			} else if (is_simm32(jmp_offset)) {
   2455				EMIT1_off32(0xE9, jmp_offset);
   2456			} else {
   2457				pr_err("jmp gen bug %llx\n", jmp_offset);
   2458				return -EFAULT;
   2459			}
   2460			break;
   2461		case BPF_STX | BPF_ATOMIC | BPF_W:
   2462		case BPF_STX | BPF_ATOMIC | BPF_DW:
   2463			goto notyet;
   2464		case BPF_JMP | BPF_EXIT:
   2465			if (seen_exit) {
   2466				jmp_offset = ctx->cleanup_addr - addrs[i];
   2467				goto emit_jmp;
   2468			}
   2469			seen_exit = true;
   2470			/* Update cleanup_addr */
   2471			ctx->cleanup_addr = proglen;
   2472			emit_epilogue(&prog, bpf_prog->aux->stack_depth);
   2473			break;
   2474notyet:
   2475			pr_info_once("*** NOT YET: opcode %02x ***\n", code);
   2476			return -EFAULT;
   2477		default:
   2478			/*
   2479			 * This error will be seen if new instruction was added
   2480			 * to interpreter, but not to JIT or if there is junk in
   2481			 * bpf_prog
   2482			 */
   2483			pr_err("bpf_jit: unknown opcode %02x\n", code);
   2484			return -EINVAL;
   2485		}
   2486
   2487		ilen = prog - temp;
   2488		if (ilen > BPF_MAX_INSN_SIZE) {
   2489			pr_err("bpf_jit: fatal insn size error\n");
   2490			return -EFAULT;
   2491		}
   2492
   2493		if (image) {
   2494			/*
   2495			 * When populating the image, assert that:
   2496			 *
   2497			 *  i) We do not write beyond the allocated space, and
   2498			 * ii) addrs[i] did not change from the prior run, in order
   2499			 *     to validate assumptions made for computing branch
   2500			 *     displacements.
   2501			 */
   2502			if (unlikely(proglen + ilen > oldproglen ||
   2503				     proglen + ilen != addrs[i])) {
   2504				pr_err("bpf_jit: fatal error\n");
   2505				return -EFAULT;
   2506			}
   2507			memcpy(image + proglen, temp, ilen);
   2508		}
   2509		proglen += ilen;
   2510		addrs[i] = proglen;
   2511		prog = temp;
   2512	}
   2513	return proglen;
   2514}
   2515
   2516bool bpf_jit_needs_zext(void)
   2517{
   2518	return true;
   2519}
   2520
   2521struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
   2522{
   2523	struct bpf_binary_header *header = NULL;
   2524	struct bpf_prog *tmp, *orig_prog = prog;
   2525	int proglen, oldproglen = 0;
   2526	struct jit_context ctx = {};
   2527	bool tmp_blinded = false;
   2528	u8 *image = NULL;
   2529	int *addrs;
   2530	int pass;
   2531	int i;
   2532
   2533	if (!prog->jit_requested)
   2534		return orig_prog;
   2535
   2536	tmp = bpf_jit_blind_constants(prog);
   2537	/*
   2538	 * If blinding was requested and we failed during blinding,
   2539	 * we must fall back to the interpreter.
   2540	 */
   2541	if (IS_ERR(tmp))
   2542		return orig_prog;
   2543	if (tmp != prog) {
   2544		tmp_blinded = true;
   2545		prog = tmp;
   2546	}
   2547
   2548	addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
   2549	if (!addrs) {
   2550		prog = orig_prog;
   2551		goto out;
   2552	}
   2553
   2554	/*
   2555	 * Before first pass, make a rough estimation of addrs[]
   2556	 * each BPF instruction is translated to less than 64 bytes
   2557	 */
   2558	for (proglen = 0, i = 0; i < prog->len; i++) {
   2559		proglen += 64;
   2560		addrs[i] = proglen;
   2561	}
   2562	ctx.cleanup_addr = proglen;
   2563
   2564	/*
   2565	 * JITed image shrinks with every pass and the loop iterates
   2566	 * until the image stops shrinking. Very large BPF programs
   2567	 * may converge on the last pass. In such case do one more
   2568	 * pass to emit the final image.
   2569	 */
   2570	for (pass = 0; pass < 20 || image; pass++) {
   2571		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
   2572		if (proglen <= 0) {
   2573out_image:
   2574			image = NULL;
   2575			if (header)
   2576				bpf_jit_binary_free(header);
   2577			prog = orig_prog;
   2578			goto out_addrs;
   2579		}
   2580		if (image) {
   2581			if (proglen != oldproglen) {
   2582				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
   2583				       proglen, oldproglen);
   2584				goto out_image;
   2585			}
   2586			break;
   2587		}
   2588		if (proglen == oldproglen) {
   2589			header = bpf_jit_binary_alloc(proglen, &image,
   2590						      1, jit_fill_hole);
   2591			if (!header) {
   2592				prog = orig_prog;
   2593				goto out_addrs;
   2594			}
   2595		}
   2596		oldproglen = proglen;
   2597		cond_resched();
   2598	}
   2599
   2600	if (bpf_jit_enable > 1)
   2601		bpf_jit_dump(prog->len, proglen, pass + 1, image);
   2602
   2603	if (image) {
   2604		bpf_jit_binary_lock_ro(header);
   2605		prog->bpf_func = (void *)image;
   2606		prog->jited = 1;
   2607		prog->jited_len = proglen;
   2608	} else {
   2609		prog = orig_prog;
   2610	}
   2611
   2612out_addrs:
   2613	kfree(addrs);
   2614out:
   2615	if (tmp_blinded)
   2616		bpf_jit_prog_release_other(prog, prog == orig_prog ?
   2617					   tmp : orig_prog);
   2618	return prog;
   2619}
   2620
   2621bool bpf_jit_supports_kfunc_call(void)
   2622{
   2623	return true;
   2624}