cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

bpf_jit_comp32.c (54702B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Just-In-Time compiler for eBPF bytecode on MIPS.
      4 * Implementation of JIT functions for 32-bit CPUs.
      5 *
      6 * Copyright (c) 2021 Anyfi Networks AB.
      7 * Author: Johan Almbladh <johan.almbladh@gmail.com>
      8 *
      9 * Based on code and ideas from
     10 * Copyright (c) 2017 Cavium, Inc.
     11 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
     12 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
     13 */
     14
     15#include <linux/math64.h>
     16#include <linux/errno.h>
     17#include <linux/filter.h>
     18#include <linux/bpf.h>
     19#include <asm/cpu-features.h>
     20#include <asm/isa-rev.h>
     21#include <asm/uasm.h>
     22
     23#include "bpf_jit_comp.h"
     24
     25/* MIPS a4-a7 are not available in the o32 ABI */
     26#undef MIPS_R_A4
     27#undef MIPS_R_A5
     28#undef MIPS_R_A6
     29#undef MIPS_R_A7
     30
     31/* Stack is 8-byte aligned in o32 ABI */
     32#define MIPS_STACK_ALIGNMENT 8
     33
     34/*
     35 * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI.
     36 * This corresponds to stack space for register arguments a0-a3.
     37 */
     38#define JIT_RESERVED_STACK 16
     39
     40/* Temporary 64-bit register used by JIT */
     41#define JIT_REG_TMP MAX_BPF_JIT_REG
     42
     43/*
     44 * Number of prologue bytes to skip when doing a tail call.
     45 * Tail call count (TCC) initialization (8 bytes) always, plus
     46 * R0-to-v0 assignment (4 bytes) if big endian.
     47 */
     48#ifdef __BIG_ENDIAN
     49#define JIT_TCALL_SKIP 12
     50#else
     51#define JIT_TCALL_SKIP 8
     52#endif
     53
     54/* CPU registers holding the callee return value */
     55#define JIT_RETURN_REGS	  \
     56	(BIT(MIPS_R_V0) | \
     57	 BIT(MIPS_R_V1))
     58
     59/* CPU registers arguments passed to callee directly */
     60#define JIT_ARG_REGS      \
     61	(BIT(MIPS_R_A0) | \
     62	 BIT(MIPS_R_A1) | \
     63	 BIT(MIPS_R_A2) | \
     64	 BIT(MIPS_R_A3))
     65
     66/* CPU register arguments passed to callee on stack */
     67#define JIT_STACK_REGS    \
     68	(BIT(MIPS_R_T0) | \
     69	 BIT(MIPS_R_T1) | \
     70	 BIT(MIPS_R_T2) | \
     71	 BIT(MIPS_R_T3) | \
     72	 BIT(MIPS_R_T4) | \
     73	 BIT(MIPS_R_T5))
     74
     75/* Caller-saved CPU registers */
     76#define JIT_CALLER_REGS    \
     77	(JIT_RETURN_REGS | \
     78	 JIT_ARG_REGS    | \
     79	 JIT_STACK_REGS)
     80
     81/* Callee-saved CPU registers */
     82#define JIT_CALLEE_REGS   \
     83	(BIT(MIPS_R_S0) | \
     84	 BIT(MIPS_R_S1) | \
     85	 BIT(MIPS_R_S2) | \
     86	 BIT(MIPS_R_S3) | \
     87	 BIT(MIPS_R_S4) | \
     88	 BIT(MIPS_R_S5) | \
     89	 BIT(MIPS_R_S6) | \
     90	 BIT(MIPS_R_S7) | \
     91	 BIT(MIPS_R_GP) | \
     92	 BIT(MIPS_R_FP) | \
     93	 BIT(MIPS_R_RA))
     94
     95/*
     96 * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers.
     97 *
     98 * 1) Native register pairs are ordered according to CPU endiannes, following
     99 *    the MIPS convention for passing 64-bit arguments and return values.
    100 * 2) The eBPF return value, arguments and callee-saved registers are mapped
    101 *    to their native MIPS equivalents.
    102 * 3) Since the 32 highest bits in the eBPF FP register are always zero,
    103 *    only one general-purpose register is actually needed for the mapping.
    104 *    We use the fp register for this purpose, and map the highest bits to
    105 *    the MIPS register r0 (zero).
    106 * 4) We use the MIPS gp and at registers as internal temporary registers
    107 *    for constant blinding. The gp register is callee-saved.
    108 * 5) One 64-bit temporary register is mapped for use when sign-extending
    109 *    immediate operands. MIPS registers t6-t9 are available to the JIT
    110 *    for as temporaries when implementing complex 64-bit operations.
    111 *
    112 * With this scheme all eBPF registers are being mapped to native MIPS
    113 * registers without having to use any stack scratch space. The direct
    114 * register mapping (2) simplifies the handling of function calls.
    115 */
    116static const u8 bpf2mips32[][2] = {
    117	/* Return value from in-kernel function, and exit value from eBPF */
    118	[BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0},
    119	/* Arguments from eBPF program to in-kernel function */
    120	[BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0},
    121	[BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2},
    122	/* Remaining arguments, to be passed on the stack per O32 ABI */
    123	[BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0},
    124	[BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2},
    125	[BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4},
    126	/* Callee-saved registers that in-kernel function will preserve */
    127	[BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0},
    128	[BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2},
    129	[BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4},
    130	[BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6},
    131	/* Read-only frame pointer to access the eBPF stack */
    132#ifdef __BIG_ENDIAN
    133	[BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO},
    134#else
    135	[BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP},
    136#endif
    137	/* Temporary register for blinding constants */
    138	[BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT},
    139	/* Temporary register for internal JIT use */
    140	[JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6},
    141};
    142
    143/* Get low CPU register for a 64-bit eBPF register mapping */
    144static inline u8 lo(const u8 reg[])
    145{
    146#ifdef __BIG_ENDIAN
    147	return reg[0];
    148#else
    149	return reg[1];
    150#endif
    151}
    152
    153/* Get high CPU register for a 64-bit eBPF register mapping */
    154static inline u8 hi(const u8 reg[])
    155{
    156#ifdef __BIG_ENDIAN
    157	return reg[1];
    158#else
    159	return reg[0];
    160#endif
    161}
    162
    163/*
    164 * Mark a 64-bit CPU register pair as clobbered, it needs to be
    165 * saved/restored by the program if callee-saved.
    166 */
    167static void clobber_reg64(struct jit_context *ctx, const u8 reg[])
    168{
    169	clobber_reg(ctx, reg[0]);
    170	clobber_reg(ctx, reg[1]);
    171}
    172
    173/* dst = imm (sign-extended) */
    174static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
    175{
    176	emit_mov_i(ctx, lo(dst), imm);
    177	if (imm < 0)
    178		emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
    179	else
    180		emit(ctx, move, hi(dst), MIPS_R_ZERO);
    181	clobber_reg64(ctx, dst);
    182}
    183
    184/* Zero extension, if verifier does not do it for us  */
    185static void emit_zext_ver(struct jit_context *ctx, const u8 dst[])
    186{
    187	if (!ctx->program->aux->verifier_zext) {
    188		emit(ctx, move, hi(dst), MIPS_R_ZERO);
    189		clobber_reg(ctx, hi(dst));
    190	}
    191}
    192
    193/* Load delay slot, if ISA mandates it */
    194static void emit_load_delay(struct jit_context *ctx)
    195{
    196	if (!cpu_has_mips_2_3_4_5_r)
    197		emit(ctx, nop);
    198}
    199
    200/* ALU immediate operation (64-bit) */
    201static void emit_alu_i64(struct jit_context *ctx,
    202			 const u8 dst[], s32 imm, u8 op)
    203{
    204	u8 src = MIPS_R_T6;
    205
    206	/*
    207	 * ADD/SUB with all but the max negative imm can be handled by
    208	 * inverting the operation and the imm value, saving one insn.
    209	 */
    210	if (imm > S32_MIN && imm < 0)
    211		switch (op) {
    212		case BPF_ADD:
    213			op = BPF_SUB;
    214			imm = -imm;
    215			break;
    216		case BPF_SUB:
    217			op = BPF_ADD;
    218			imm = -imm;
    219			break;
    220		}
    221
    222	/* Move immediate to temporary register */
    223	emit_mov_i(ctx, src, imm);
    224
    225	switch (op) {
    226	/* dst = dst + imm */
    227	case BPF_ADD:
    228		emit(ctx, addu, lo(dst), lo(dst), src);
    229		emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
    230		emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
    231		if (imm < 0)
    232			emit(ctx, addiu, hi(dst), hi(dst), -1);
    233		break;
    234	/* dst = dst - imm */
    235	case BPF_SUB:
    236		emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
    237		emit(ctx, subu, lo(dst), lo(dst), src);
    238		emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
    239		if (imm < 0)
    240			emit(ctx, addiu, hi(dst), hi(dst), 1);
    241		break;
    242	/* dst = dst | imm */
    243	case BPF_OR:
    244		emit(ctx, or, lo(dst), lo(dst), src);
    245		if (imm < 0)
    246			emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
    247		break;
    248	/* dst = dst & imm */
    249	case BPF_AND:
    250		emit(ctx, and, lo(dst), lo(dst), src);
    251		if (imm >= 0)
    252			emit(ctx, move, hi(dst), MIPS_R_ZERO);
    253		break;
    254	/* dst = dst ^ imm */
    255	case BPF_XOR:
    256		emit(ctx, xor, lo(dst), lo(dst), src);
    257		if (imm < 0) {
    258			emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
    259			emit(ctx, addiu, hi(dst), hi(dst), -1);
    260		}
    261		break;
    262	}
    263	clobber_reg64(ctx, dst);
    264}
    265
    266/* ALU register operation (64-bit) */
    267static void emit_alu_r64(struct jit_context *ctx,
    268			 const u8 dst[], const u8 src[], u8 op)
    269{
    270	switch (BPF_OP(op)) {
    271	/* dst = dst + src */
    272	case BPF_ADD:
    273		if (src == dst) {
    274			emit(ctx, srl, MIPS_R_T9, lo(dst), 31);
    275			emit(ctx, addu, lo(dst), lo(dst), lo(dst));
    276		} else {
    277			emit(ctx, addu, lo(dst), lo(dst), lo(src));
    278			emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
    279		}
    280		emit(ctx, addu, hi(dst), hi(dst), hi(src));
    281		emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
    282		break;
    283	/* dst = dst - src */
    284	case BPF_SUB:
    285		emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
    286		emit(ctx, subu, lo(dst), lo(dst), lo(src));
    287		emit(ctx, subu, hi(dst), hi(dst), hi(src));
    288		emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
    289		break;
    290	/* dst = dst | src */
    291	case BPF_OR:
    292		emit(ctx, or, lo(dst), lo(dst), lo(src));
    293		emit(ctx, or, hi(dst), hi(dst), hi(src));
    294		break;
    295	/* dst = dst & src */
    296	case BPF_AND:
    297		emit(ctx, and, lo(dst), lo(dst), lo(src));
    298		emit(ctx, and, hi(dst), hi(dst), hi(src));
    299		break;
    300	/* dst = dst ^ src */
    301	case BPF_XOR:
    302		emit(ctx, xor, lo(dst), lo(dst), lo(src));
    303		emit(ctx, xor, hi(dst), hi(dst), hi(src));
    304		break;
    305	}
    306	clobber_reg64(ctx, dst);
    307}
    308
    309/* ALU invert (64-bit) */
    310static void emit_neg_i64(struct jit_context *ctx, const u8 dst[])
    311{
    312	emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst));
    313	emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst));
    314	emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
    315	emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
    316
    317	clobber_reg64(ctx, dst);
    318}
    319
    320/* ALU shift immediate (64-bit) */
    321static void emit_shift_i64(struct jit_context *ctx,
    322			   const u8 dst[], u32 imm, u8 op)
    323{
    324	switch (BPF_OP(op)) {
    325	/* dst = dst << imm */
    326	case BPF_LSH:
    327		if (imm < 32) {
    328			emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm);
    329			emit(ctx, sll, lo(dst), lo(dst), imm);
    330			emit(ctx, sll, hi(dst), hi(dst), imm);
    331			emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9);
    332		} else {
    333			emit(ctx, sll, hi(dst), lo(dst), imm - 32);
    334			emit(ctx, move, lo(dst), MIPS_R_ZERO);
    335		}
    336		break;
    337	/* dst = dst >> imm */
    338	case BPF_RSH:
    339		if (imm < 32) {
    340			emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
    341			emit(ctx, srl, lo(dst), lo(dst), imm);
    342			emit(ctx, srl, hi(dst), hi(dst), imm);
    343			emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
    344		} else {
    345			emit(ctx, srl, lo(dst), hi(dst), imm - 32);
    346			emit(ctx, move, hi(dst), MIPS_R_ZERO);
    347		}
    348		break;
    349	/* dst = dst >> imm (arithmetic) */
    350	case BPF_ARSH:
    351		if (imm < 32) {
    352			emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
    353			emit(ctx, srl, lo(dst), lo(dst), imm);
    354			emit(ctx, sra, hi(dst), hi(dst), imm);
    355			emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
    356		} else {
    357			emit(ctx, sra, lo(dst), hi(dst), imm - 32);
    358			emit(ctx, sra, hi(dst), hi(dst), 31);
    359		}
    360		break;
    361	}
    362	clobber_reg64(ctx, dst);
    363}
    364
    365/* ALU shift register (64-bit) */
    366static void emit_shift_r64(struct jit_context *ctx,
    367			   const u8 dst[], u8 src, u8 op)
    368{
    369	u8 t1 = MIPS_R_T8;
    370	u8 t2 = MIPS_R_T9;
    371
    372	emit(ctx, andi, t1, src, 32);              /* t1 = src & 32          */
    373	emit(ctx, beqz, t1, 16);                   /* PC += 16 if t1 == 0    */
    374	emit(ctx, nor, t2, src, MIPS_R_ZERO);      /* t2 = ~src (delay slot) */
    375
    376	switch (BPF_OP(op)) {
    377	/* dst = dst << src */
    378	case BPF_LSH:
    379		/* Next: shift >= 32 */
    380		emit(ctx, sllv, hi(dst), lo(dst), src);    /* dh = dl << src */
    381		emit(ctx, move, lo(dst), MIPS_R_ZERO);     /* dl = 0         */
    382		emit(ctx, b, 20);                          /* PC += 20       */
    383		/* +16: shift < 32 */
    384		emit(ctx, srl, t1, lo(dst), 1);            /* t1 = dl >> 1   */
    385		emit(ctx, srlv, t1, t1, t2);               /* t1 = t1 >> t2  */
    386		emit(ctx, sllv, lo(dst), lo(dst), src);    /* dl = dl << src */
    387		emit(ctx, sllv, hi(dst), hi(dst), src);    /* dh = dh << src */
    388		emit(ctx, or, hi(dst), hi(dst), t1);       /* dh = dh | t1   */
    389		break;
    390	/* dst = dst >> src */
    391	case BPF_RSH:
    392		/* Next: shift >= 32 */
    393		emit(ctx, srlv, lo(dst), hi(dst), src);    /* dl = dh >> src */
    394		emit(ctx, move, hi(dst), MIPS_R_ZERO);     /* dh = 0         */
    395		emit(ctx, b, 20);                          /* PC += 20       */
    396		/* +16: shift < 32 */
    397		emit(ctx, sll, t1, hi(dst), 1);            /* t1 = dl << 1   */
    398		emit(ctx, sllv, t1, t1, t2);               /* t1 = t1 << t2  */
    399		emit(ctx, srlv, lo(dst), lo(dst), src);    /* dl = dl >> src */
    400		emit(ctx, srlv, hi(dst), hi(dst), src);    /* dh = dh >> src */
    401		emit(ctx, or, lo(dst), lo(dst), t1);       /* dl = dl | t1   */
    402		break;
    403	/* dst = dst >> src (arithmetic) */
    404	case BPF_ARSH:
    405		/* Next: shift >= 32 */
    406		emit(ctx, srav, lo(dst), hi(dst), src);   /* dl = dh >>a src */
    407		emit(ctx, sra, hi(dst), hi(dst), 31);     /* dh = dh >>a 31  */
    408		emit(ctx, b, 20);                         /* PC += 20        */
    409		/* +16: shift < 32 */
    410		emit(ctx, sll, t1, hi(dst), 1);           /* t1 = dl << 1    */
    411		emit(ctx, sllv, t1, t1, t2);              /* t1 = t1 << t2   */
    412		emit(ctx, srlv, lo(dst), lo(dst), src);   /* dl = dl >>a src */
    413		emit(ctx, srav, hi(dst), hi(dst), src);   /* dh = dh >> src  */
    414		emit(ctx, or, lo(dst), lo(dst), t1);      /* dl = dl | t1    */
    415		break;
    416	}
    417
    418	/* +20: Done */
    419	clobber_reg64(ctx, dst);
    420}
    421
    422/* ALU mul immediate (64x32-bit) */
    423static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
    424{
    425	u8 src = MIPS_R_T6;
    426	u8 tmp = MIPS_R_T9;
    427
    428	switch (imm) {
    429	/* dst = dst * 1 is a no-op */
    430	case 1:
    431		break;
    432	/* dst = dst * -1 */
    433	case -1:
    434		emit_neg_i64(ctx, dst);
    435		break;
    436	case 0:
    437		emit_mov_r(ctx, lo(dst), MIPS_R_ZERO);
    438		emit_mov_r(ctx, hi(dst), MIPS_R_ZERO);
    439		break;
    440	/* Full 64x32 multiply */
    441	default:
    442		/* hi(dst) = hi(dst) * src(imm) */
    443		emit_mov_i(ctx, src, imm);
    444		if (cpu_has_mips32r1 || cpu_has_mips32r6) {
    445			emit(ctx, mul, hi(dst), hi(dst), src);
    446		} else {
    447			emit(ctx, multu, hi(dst), src);
    448			emit(ctx, mflo, hi(dst));
    449		}
    450
    451		/* hi(dst) = hi(dst) - lo(dst) */
    452		if (imm < 0)
    453			emit(ctx, subu, hi(dst), hi(dst), lo(dst));
    454
    455		/* tmp = lo(dst) * src(imm) >> 32 */
    456		/* lo(dst) = lo(dst) * src(imm) */
    457		if (cpu_has_mips32r6) {
    458			emit(ctx, muhu, tmp, lo(dst), src);
    459			emit(ctx, mulu, lo(dst), lo(dst), src);
    460		} else {
    461			emit(ctx, multu, lo(dst), src);
    462			emit(ctx, mflo, lo(dst));
    463			emit(ctx, mfhi, tmp);
    464		}
    465
    466		/* hi(dst) += tmp */
    467		emit(ctx, addu, hi(dst), hi(dst), tmp);
    468		clobber_reg64(ctx, dst);
    469		break;
    470	}
    471}
    472
    473/* ALU mul register (64x64-bit) */
    474static void emit_mul_r64(struct jit_context *ctx,
    475			 const u8 dst[], const u8 src[])
    476{
    477	u8 acc = MIPS_R_T8;
    478	u8 tmp = MIPS_R_T9;
    479
    480	/* acc = hi(dst) * lo(src) */
    481	if (cpu_has_mips32r1 || cpu_has_mips32r6) {
    482		emit(ctx, mul, acc, hi(dst), lo(src));
    483	} else {
    484		emit(ctx, multu, hi(dst), lo(src));
    485		emit(ctx, mflo, acc);
    486	}
    487
    488	/* tmp = lo(dst) * hi(src) */
    489	if (cpu_has_mips32r1 || cpu_has_mips32r6) {
    490		emit(ctx, mul, tmp, lo(dst), hi(src));
    491	} else {
    492		emit(ctx, multu, lo(dst), hi(src));
    493		emit(ctx, mflo, tmp);
    494	}
    495
    496	/* acc += tmp */
    497	emit(ctx, addu, acc, acc, tmp);
    498
    499	/* tmp = lo(dst) * lo(src) >> 32 */
    500	/* lo(dst) = lo(dst) * lo(src) */
    501	if (cpu_has_mips32r6) {
    502		emit(ctx, muhu, tmp, lo(dst), lo(src));
    503		emit(ctx, mulu, lo(dst), lo(dst), lo(src));
    504	} else {
    505		emit(ctx, multu, lo(dst), lo(src));
    506		emit(ctx, mflo, lo(dst));
    507		emit(ctx, mfhi, tmp);
    508	}
    509
    510	/* hi(dst) = acc + tmp */
    511	emit(ctx, addu, hi(dst), acc, tmp);
    512	clobber_reg64(ctx, dst);
    513}
    514
    515/* Helper function for 64-bit modulo */
    516static u64 jit_mod64(u64 a, u64 b)
    517{
    518	u64 rem;
    519
    520	div64_u64_rem(a, b, &rem);
    521	return rem;
    522}
    523
    524/* ALU div/mod register (64-bit) */
    525static void emit_divmod_r64(struct jit_context *ctx,
    526			    const u8 dst[], const u8 src[], u8 op)
    527{
    528	const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
    529	const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
    530	const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */
    531	int exclude, k;
    532	u32 addr = 0;
    533
    534	/* Push caller-saved registers on stack */
    535	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    536		  0, JIT_RESERVED_STACK);
    537
    538	/* Put 64-bit arguments 1 and 2 in registers a0-a3 */
    539	for (k = 0; k < 2; k++) {
    540		emit(ctx, move, MIPS_R_T9, src[k]);
    541		emit(ctx, move, r1[k], dst[k]);
    542		emit(ctx, move, r2[k], MIPS_R_T9);
    543	}
    544
    545	/* Emit function call */
    546	switch (BPF_OP(op)) {
    547	/* dst = dst / src */
    548	case BPF_DIV:
    549		addr = (u32)&div64_u64;
    550		break;
    551	/* dst = dst % src */
    552	case BPF_MOD:
    553		addr = (u32)&jit_mod64;
    554		break;
    555	}
    556	emit_mov_i(ctx, MIPS_R_T9, addr);
    557	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
    558	emit(ctx, nop); /* Delay slot */
    559
    560	/* Store the 64-bit result in dst */
    561	emit(ctx, move, dst[0], r0[0]);
    562	emit(ctx, move, dst[1], r0[1]);
    563
    564	/* Restore caller-saved registers, excluding the computed result */
    565	exclude = BIT(lo(dst)) | BIT(hi(dst));
    566	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    567		 exclude, JIT_RESERVED_STACK);
    568	emit_load_delay(ctx);
    569
    570	clobber_reg64(ctx, dst);
    571	clobber_reg(ctx, MIPS_R_V0);
    572	clobber_reg(ctx, MIPS_R_V1);
    573	clobber_reg(ctx, MIPS_R_RA);
    574}
    575
    576/* Swap bytes in a register word */
    577static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask)
    578{
    579	u8 tmp = MIPS_R_T9;
    580
    581	emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */
    582	emit(ctx, sll, tmp, tmp, 8);    /* tmp = tmp << 8         */
    583	emit(ctx, srl, dst, src, 8);    /* dst = src >> 8         */
    584	emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */
    585	emit(ctx, or,  dst, dst, tmp);  /* dst = dst | tmp        */
    586}
    587
    588/* Swap half words in a register word */
    589static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src)
    590{
    591	u8 tmp = MIPS_R_T9;
    592
    593	emit(ctx, sll, tmp, src, 16);  /* tmp = src << 16 */
    594	emit(ctx, srl, dst, src, 16);  /* dst = src >> 16 */
    595	emit(ctx, or,  dst, dst, tmp); /* dst = dst | tmp */
    596}
    597
    598/* Swap bytes and truncate a register double word, word or half word */
    599static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width)
    600{
    601	u8 tmp = MIPS_R_T8;
    602
    603	switch (width) {
    604	/* Swap bytes in a double word */
    605	case 64:
    606		if (cpu_has_mips32r2 || cpu_has_mips32r6) {
    607			emit(ctx, rotr, tmp, hi(dst), 16);
    608			emit(ctx, rotr, hi(dst), lo(dst), 16);
    609			emit(ctx, wsbh, lo(dst), tmp);
    610			emit(ctx, wsbh, hi(dst), hi(dst));
    611		} else {
    612			emit_swap16_r(ctx, tmp, lo(dst));
    613			emit_swap16_r(ctx, lo(dst), hi(dst));
    614			emit(ctx, move, hi(dst), tmp);
    615
    616			emit(ctx, lui, tmp, 0xff);      /* tmp = 0x00ff0000 */
    617			emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */
    618			emit_swap8_r(ctx, lo(dst), lo(dst), tmp);
    619			emit_swap8_r(ctx, hi(dst), hi(dst), tmp);
    620		}
    621		break;
    622	/* Swap bytes in a word */
    623	/* Swap bytes in a half word */
    624	case 32:
    625	case 16:
    626		emit_bswap_r(ctx, lo(dst), width);
    627		emit(ctx, move, hi(dst), MIPS_R_ZERO);
    628		break;
    629	}
    630	clobber_reg64(ctx, dst);
    631}
    632
    633/* Truncate a register double word, word or half word */
    634static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width)
    635{
    636	switch (width) {
    637	case 64:
    638		break;
    639	/* Zero-extend a word */
    640	case 32:
    641		emit(ctx, move, hi(dst), MIPS_R_ZERO);
    642		clobber_reg(ctx, hi(dst));
    643		break;
    644	/* Zero-extend a half word */
    645	case 16:
    646		emit(ctx, move, hi(dst), MIPS_R_ZERO);
    647		emit(ctx, andi, lo(dst), lo(dst), 0xffff);
    648		clobber_reg64(ctx, dst);
    649		break;
    650	}
    651}
    652
    653/* Load operation: dst = *(size*)(src + off) */
    654static void emit_ldx(struct jit_context *ctx,
    655		     const u8 dst[], u8 src, s16 off, u8 size)
    656{
    657	switch (size) {
    658	/* Load a byte */
    659	case BPF_B:
    660		emit(ctx, lbu, lo(dst), off, src);
    661		emit(ctx, move, hi(dst), MIPS_R_ZERO);
    662		break;
    663	/* Load a half word */
    664	case BPF_H:
    665		emit(ctx, lhu, lo(dst), off, src);
    666		emit(ctx, move, hi(dst), MIPS_R_ZERO);
    667		break;
    668	/* Load a word */
    669	case BPF_W:
    670		emit(ctx, lw, lo(dst), off, src);
    671		emit(ctx, move, hi(dst), MIPS_R_ZERO);
    672		break;
    673	/* Load a double word */
    674	case BPF_DW:
    675		if (dst[1] == src) {
    676			emit(ctx, lw, dst[0], off + 4, src);
    677			emit(ctx, lw, dst[1], off, src);
    678		} else {
    679			emit(ctx, lw, dst[1], off, src);
    680			emit(ctx, lw, dst[0], off + 4, src);
    681		}
    682		emit_load_delay(ctx);
    683		break;
    684	}
    685	clobber_reg64(ctx, dst);
    686}
    687
    688/* Store operation: *(size *)(dst + off) = src */
    689static void emit_stx(struct jit_context *ctx,
    690		     const u8 dst, const u8 src[], s16 off, u8 size)
    691{
    692	switch (size) {
    693	/* Store a byte */
    694	case BPF_B:
    695		emit(ctx, sb, lo(src), off, dst);
    696		break;
    697	/* Store a half word */
    698	case BPF_H:
    699		emit(ctx, sh, lo(src), off, dst);
    700		break;
    701	/* Store a word */
    702	case BPF_W:
    703		emit(ctx, sw, lo(src), off, dst);
    704		break;
    705	/* Store a double word */
    706	case BPF_DW:
    707		emit(ctx, sw, src[1], off, dst);
    708		emit(ctx, sw, src[0], off + 4, dst);
    709		break;
    710	}
    711}
    712
    713/* Atomic read-modify-write (32-bit, non-ll/sc fallback) */
    714static void emit_atomic_r32(struct jit_context *ctx,
    715			    u8 dst, u8 src, s16 off, u8 code)
    716{
    717	u32 exclude = 0;
    718	u32 addr = 0;
    719
    720	/* Push caller-saved registers on stack */
    721	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    722		  0, JIT_RESERVED_STACK);
    723	/*
    724	 * Argument 1: dst+off if xchg, otherwise src, passed in register a0
    725	 * Argument 2: src if xchg, otherwise dst+off, passed in register a1
    726	 */
    727	emit(ctx, move, MIPS_R_T9, dst);
    728	if (code == BPF_XCHG) {
    729		emit(ctx, move, MIPS_R_A1, src);
    730		emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off);
    731	} else {
    732		emit(ctx, move, MIPS_R_A0, src);
    733		emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off);
    734	}
    735
    736	/* Emit function call */
    737	switch (code) {
    738	case BPF_ADD:
    739		addr = (u32)&atomic_add;
    740		break;
    741	case BPF_ADD | BPF_FETCH:
    742		addr = (u32)&atomic_fetch_add;
    743		break;
    744	case BPF_SUB:
    745		addr = (u32)&atomic_sub;
    746		break;
    747	case BPF_SUB | BPF_FETCH:
    748		addr = (u32)&atomic_fetch_sub;
    749		break;
    750	case BPF_OR:
    751		addr = (u32)&atomic_or;
    752		break;
    753	case BPF_OR | BPF_FETCH:
    754		addr = (u32)&atomic_fetch_or;
    755		break;
    756	case BPF_AND:
    757		addr = (u32)&atomic_and;
    758		break;
    759	case BPF_AND | BPF_FETCH:
    760		addr = (u32)&atomic_fetch_and;
    761		break;
    762	case BPF_XOR:
    763		addr = (u32)&atomic_xor;
    764		break;
    765	case BPF_XOR | BPF_FETCH:
    766		addr = (u32)&atomic_fetch_xor;
    767		break;
    768	case BPF_XCHG:
    769		addr = (u32)&atomic_xchg;
    770		break;
    771	}
    772	emit_mov_i(ctx, MIPS_R_T9, addr);
    773	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
    774	emit(ctx, nop); /* Delay slot */
    775
    776	/* Update src register with old value, if specified */
    777	if (code & BPF_FETCH) {
    778		emit(ctx, move, src, MIPS_R_V0);
    779		exclude = BIT(src);
    780		clobber_reg(ctx, src);
    781	}
    782
    783	/* Restore caller-saved registers, except any fetched value */
    784	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    785		 exclude, JIT_RESERVED_STACK);
    786	emit_load_delay(ctx);
    787	clobber_reg(ctx, MIPS_R_RA);
    788}
    789
    790/* Helper function for 64-bit atomic exchange */
    791static s64 jit_xchg64(s64 a, atomic64_t *v)
    792{
    793	return atomic64_xchg(v, a);
    794}
    795
    796/* Atomic read-modify-write (64-bit) */
    797static void emit_atomic_r64(struct jit_context *ctx,
    798			    u8 dst, const u8 src[], s16 off, u8 code)
    799{
    800	const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
    801	const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
    802	u32 exclude = 0;
    803	u32 addr = 0;
    804
    805	/* Push caller-saved registers on stack */
    806	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    807		  0, JIT_RESERVED_STACK);
    808	/*
    809	 * Argument 1: 64-bit src, passed in registers a0-a1
    810	 * Argument 2: 32-bit dst+off, passed in register a2
    811	 */
    812	emit(ctx, move, MIPS_R_T9, dst);
    813	emit(ctx, move, r1[0], src[0]);
    814	emit(ctx, move, r1[1], src[1]);
    815	emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off);
    816
    817	/* Emit function call */
    818	switch (code) {
    819	case BPF_ADD:
    820		addr = (u32)&atomic64_add;
    821		break;
    822	case BPF_ADD | BPF_FETCH:
    823		addr = (u32)&atomic64_fetch_add;
    824		break;
    825	case BPF_SUB:
    826		addr = (u32)&atomic64_sub;
    827		break;
    828	case BPF_SUB | BPF_FETCH:
    829		addr = (u32)&atomic64_fetch_sub;
    830		break;
    831	case BPF_OR:
    832		addr = (u32)&atomic64_or;
    833		break;
    834	case BPF_OR | BPF_FETCH:
    835		addr = (u32)&atomic64_fetch_or;
    836		break;
    837	case BPF_AND:
    838		addr = (u32)&atomic64_and;
    839		break;
    840	case BPF_AND | BPF_FETCH:
    841		addr = (u32)&atomic64_fetch_and;
    842		break;
    843	case BPF_XOR:
    844		addr = (u32)&atomic64_xor;
    845		break;
    846	case BPF_XOR | BPF_FETCH:
    847		addr = (u32)&atomic64_fetch_xor;
    848		break;
    849	case BPF_XCHG:
    850		addr = (u32)&jit_xchg64;
    851		break;
    852	}
    853	emit_mov_i(ctx, MIPS_R_T9, addr);
    854	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
    855	emit(ctx, nop); /* Delay slot */
    856
    857	/* Update src register with old value, if specified */
    858	if (code & BPF_FETCH) {
    859		emit(ctx, move, lo(src), lo(r0));
    860		emit(ctx, move, hi(src), hi(r0));
    861		exclude = BIT(src[0]) | BIT(src[1]);
    862		clobber_reg64(ctx, src);
    863	}
    864
    865	/* Restore caller-saved registers, except any fetched value */
    866	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    867		 exclude, JIT_RESERVED_STACK);
    868	emit_load_delay(ctx);
    869	clobber_reg(ctx, MIPS_R_RA);
    870}
    871
    872/* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */
    873static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off)
    874{
    875	const u8 *r0 = bpf2mips32[BPF_REG_0];
    876
    877	/* Push caller-saved registers on stack */
    878	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    879		  JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
    880	/*
    881	 * Argument 1: 32-bit dst+off, passed in register a0
    882	 * Argument 2: 32-bit r0, passed in register a1
    883	 * Argument 3: 32-bit src, passed in register a2
    884	 */
    885	emit(ctx, addiu, MIPS_R_T9, dst, off);
    886	emit(ctx, move, MIPS_R_T8, src);
    887	emit(ctx, move, MIPS_R_A1, lo(r0));
    888	emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
    889	emit(ctx, move, MIPS_R_A2, MIPS_R_T8);
    890
    891	/* Emit function call */
    892	emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic_cmpxchg);
    893	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
    894	emit(ctx, nop); /* Delay slot */
    895
    896#ifdef __BIG_ENDIAN
    897	emit(ctx, move, lo(r0), MIPS_R_V0);
    898#endif
    899	/* Restore caller-saved registers, except the return value */
    900	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    901		 JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
    902	emit_load_delay(ctx);
    903	clobber_reg(ctx, MIPS_R_V0);
    904	clobber_reg(ctx, MIPS_R_V1);
    905	clobber_reg(ctx, MIPS_R_RA);
    906}
    907
    908/* Atomic compare-and-exchange (64-bit) */
    909static void emit_cmpxchg_r64(struct jit_context *ctx,
    910			     u8 dst, const u8 src[], s16 off)
    911{
    912	const u8 *r0 = bpf2mips32[BPF_REG_0];
    913	const u8 *r2 = bpf2mips32[BPF_REG_2];
    914
    915	/* Push caller-saved registers on stack */
    916	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    917		  JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
    918	/*
    919	 * Argument 1: 32-bit dst+off, passed in register a0 (a1 unused)
    920	 * Argument 2: 64-bit r0, passed in registers a2-a3
    921	 * Argument 3: 64-bit src, passed on stack
    922	 */
    923	push_regs(ctx, BIT(src[0]) | BIT(src[1]), 0, JIT_RESERVED_STACK);
    924	emit(ctx, addiu, MIPS_R_T9, dst, off);
    925	emit(ctx, move, r2[0], r0[0]);
    926	emit(ctx, move, r2[1], r0[1]);
    927	emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
    928
    929	/* Emit function call */
    930	emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic64_cmpxchg);
    931	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
    932	emit(ctx, nop); /* Delay slot */
    933
    934	/* Restore caller-saved registers, except the return value */
    935	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
    936		 JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
    937	emit_load_delay(ctx);
    938	clobber_reg(ctx, MIPS_R_V0);
    939	clobber_reg(ctx, MIPS_R_V1);
    940	clobber_reg(ctx, MIPS_R_RA);
    941}
    942
    943/*
    944 * Conditional movz or an emulated equivalent.
    945 * Note that the rs register may be modified.
    946 */
    947static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
    948{
    949	if (cpu_has_mips_2) {
    950		emit(ctx, movz, rd, rs, rt);           /* rd = rt ? rd : rs  */
    951	} else if (cpu_has_mips32r6) {
    952		if (rs != MIPS_R_ZERO)
    953			emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0  */
    954		emit(ctx, selnez, rd, rd, rt);         /* rd = 0 if rt != 0  */
    955		if (rs != MIPS_R_ZERO)
    956			emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
    957	} else {
    958		emit(ctx, bnez, rt, 8);                /* PC += 8 if rd != 0 */
    959		emit(ctx, nop);                        /* +0: delay slot     */
    960		emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
    961	}
    962	clobber_reg(ctx, rd);
    963	clobber_reg(ctx, rs);
    964}
    965
    966/*
    967 * Conditional movn or an emulated equivalent.
    968 * Note that the rs register may be modified.
    969 */
    970static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
    971{
    972	if (cpu_has_mips_2) {
    973		emit(ctx, movn, rd, rs, rt);           /* rd = rt ? rs : rd  */
    974	} else if (cpu_has_mips32r6) {
    975		if (rs != MIPS_R_ZERO)
    976			emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0  */
    977		emit(ctx, seleqz, rd, rd, rt);         /* rd = 0 if rt != 0  */
    978		if (rs != MIPS_R_ZERO)
    979			emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
    980	} else {
    981		emit(ctx, beqz, rt, 8);                /* PC += 8 if rd == 0 */
    982		emit(ctx, nop);                        /* +0: delay slot     */
    983		emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
    984	}
    985	clobber_reg(ctx, rd);
    986	clobber_reg(ctx, rs);
    987}
    988
    989/* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */
    990static void emit_sltiu_r64(struct jit_context *ctx, u8 rd,
    991			   const u8 rs[], s64 imm)
    992{
    993	u8 tmp = MIPS_R_T9;
    994
    995	if (imm < 0) {
    996		emit_mov_i(ctx, rd, imm);                 /* rd = imm        */
    997		emit(ctx, sltu, rd, lo(rs), rd);          /* rd = rsl < rd   */
    998		emit(ctx, sltiu, tmp, hi(rs), -1);        /* tmp = rsh < ~0U */
    999		emit(ctx, or, rd, rd, tmp);               /* rd = rd | tmp   */
   1000	} else { /* imm >= 0 */
   1001		if (imm > 0x7fff) {
   1002			emit_mov_i(ctx, rd, (s32)imm);     /* rd = imm       */
   1003			emit(ctx, sltu, rd, lo(rs), rd);   /* rd = rsl < rd  */
   1004		} else {
   1005			emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */
   1006		}
   1007		emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh  */
   1008	}
   1009}
   1010
   1011/* Emulation of 64-bit sltu rd, rs, rt */
   1012static void emit_sltu_r64(struct jit_context *ctx, u8 rd,
   1013			  const u8 rs[], const u8 rt[])
   1014{
   1015	u8 tmp = MIPS_R_T9;
   1016
   1017	emit(ctx, sltu, rd, lo(rs), lo(rt));           /* rd = rsl < rtl     */
   1018	emit(ctx, subu, tmp, hi(rs), hi(rt));          /* tmp = rsh - rth    */
   1019	emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp);        /* rd = 0 if tmp != 0 */
   1020	emit(ctx, sltu, tmp, hi(rs), hi(rt));          /* tmp = rsh < rth    */
   1021	emit(ctx, or, rd, rd, tmp);                    /* rd = rd | tmp      */
   1022}
   1023
   1024/* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */
   1025static void emit_slti_r64(struct jit_context *ctx, u8 rd,
   1026			  const u8 rs[], s64 imm)
   1027{
   1028	u8 t1 = MIPS_R_T8;
   1029	u8 t2 = MIPS_R_T9;
   1030	u8 cmp;
   1031
   1032	/*
   1033	 * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl
   1034	 * else                      t1 = rsl <u imm
   1035	 */
   1036	emit_mov_i(ctx, rd, (s32)imm);
   1037	emit(ctx, sltu, t1, lo(rs), rd);               /* t1 = rsl <u imm   */
   1038	emit(ctx, sltu, t2, rd, lo(rs));               /* t2 = imm <u rsl   */
   1039	emit(ctx, srl, rd, hi(rs), 31);                /* rd = rsh >> 31    */
   1040	if (imm < 0)
   1041		emit_movz_r(ctx, t1, t2, rd);          /* t1 = rd ? t1 : t2 */
   1042	else
   1043		emit_movn_r(ctx, t1, t2, rd);          /* t1 = rd ? t2 : t1 */
   1044	/*
   1045	 * if ((imm < 0 && rsh != 0xffffffff) ||
   1046	 *     (imm >= 0 && rsh != 0))
   1047	 *      t1 = 0
   1048	 */
   1049	if (imm < 0) {
   1050		emit(ctx, addiu, rd, hi(rs), 1);       /* rd = rsh + 1 */
   1051		cmp = rd;
   1052	} else { /* imm >= 0 */
   1053		cmp = hi(rs);
   1054	}
   1055	emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp);        /* t1 = 0 if cmp != 0 */
   1056
   1057	/*
   1058	 * if (imm < 0) rd = rsh < -1
   1059	 * else         rd = rsh != 0
   1060	 * rd = rd | t1
   1061	 */
   1062	emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */
   1063	emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1       */
   1064}
   1065
   1066/* Emulation of 64-bit(slt rd, rs, rt) */
   1067static void emit_slt_r64(struct jit_context *ctx, u8 rd,
   1068			 const u8 rs[], const u8 rt[])
   1069{
   1070	u8 t1 = MIPS_R_T7;
   1071	u8 t2 = MIPS_R_T8;
   1072	u8 t3 = MIPS_R_T9;
   1073
   1074	/*
   1075	 * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl
   1076	 * else                     t1 = rsl <u rtl
   1077	 * if (rsh == rth)          t1 = 0
   1078	 */
   1079	emit(ctx, sltu, t1, lo(rs), lo(rt));           /* t1 = rsl <u rtl   */
   1080	emit(ctx, sltu, t2, lo(rt), lo(rs));           /* t2 = rtl <u rsl   */
   1081	emit(ctx, xor, t3, hi(rs), hi(rt));            /* t3 = rlh ^ rth    */
   1082	emit(ctx, srl, rd, t3, 31);                    /* rd = t3 >> 31     */
   1083	emit_movn_r(ctx, t1, t2, rd);                  /* t1 = rd ? t2 : t1 */
   1084	emit_movn_r(ctx, t1, MIPS_R_ZERO, t3);         /* t1 = 0 if t3 != 0 */
   1085
   1086	/* rd = (rsh < rth) | t1 */
   1087	emit(ctx, slt, rd, hi(rs), hi(rt));            /* rd = rsh <s rth   */
   1088	emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1      */
   1089}
   1090
   1091/* Jump immediate (64-bit) */
   1092static void emit_jmp_i64(struct jit_context *ctx,
   1093			 const u8 dst[], s32 imm, s32 off, u8 op)
   1094{
   1095	u8 tmp = MIPS_R_T6;
   1096
   1097	switch (op) {
   1098	/* No-op, used internally for branch optimization */
   1099	case JIT_JNOP:
   1100		break;
   1101	/* PC += off if dst == imm */
   1102	/* PC += off if dst != imm */
   1103	case BPF_JEQ:
   1104	case BPF_JNE:
   1105		if (imm >= -0x7fff && imm <= 0x8000) {
   1106			emit(ctx, addiu, tmp, lo(dst), -imm);
   1107		} else if ((u32)imm <= 0xffff) {
   1108			emit(ctx, xori, tmp, lo(dst), imm);
   1109		} else {       /* Register fallback */
   1110			emit_mov_i(ctx, tmp, imm);
   1111			emit(ctx, xor, tmp, lo(dst), tmp);
   1112		}
   1113		if (imm < 0) { /* Compare sign extension */
   1114			emit(ctx, addu, MIPS_R_T9, hi(dst), 1);
   1115			emit(ctx, or, tmp, tmp, MIPS_R_T9);
   1116		} else {       /* Compare zero extension */
   1117			emit(ctx, or, tmp, tmp, hi(dst));
   1118		}
   1119		if (op == BPF_JEQ)
   1120			emit(ctx, beqz, tmp, off);
   1121		else   /* BPF_JNE */
   1122			emit(ctx, bnez, tmp, off);
   1123		break;
   1124	/* PC += off if dst & imm */
   1125	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
   1126	case BPF_JSET:
   1127	case JIT_JNSET:
   1128		if ((u32)imm <= 0xffff) {
   1129			emit(ctx, andi, tmp, lo(dst), imm);
   1130		} else {     /* Register fallback */
   1131			emit_mov_i(ctx, tmp, imm);
   1132			emit(ctx, and, tmp, lo(dst), tmp);
   1133		}
   1134		if (imm < 0) /* Sign-extension pulls in high word */
   1135			emit(ctx, or, tmp, tmp, hi(dst));
   1136		if (op == BPF_JSET)
   1137			emit(ctx, bnez, tmp, off);
   1138		else   /* JIT_JNSET */
   1139			emit(ctx, beqz, tmp, off);
   1140		break;
   1141	/* PC += off if dst > imm */
   1142	case BPF_JGT:
   1143		emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
   1144		emit(ctx, beqz, tmp, off);
   1145		break;
   1146	/* PC += off if dst >= imm */
   1147	case BPF_JGE:
   1148		emit_sltiu_r64(ctx, tmp, dst, imm);
   1149		emit(ctx, beqz, tmp, off);
   1150		break;
   1151	/* PC += off if dst < imm */
   1152	case BPF_JLT:
   1153		emit_sltiu_r64(ctx, tmp, dst, imm);
   1154		emit(ctx, bnez, tmp, off);
   1155		break;
   1156	/* PC += off if dst <= imm */
   1157	case BPF_JLE:
   1158		emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
   1159		emit(ctx, bnez, tmp, off);
   1160		break;
   1161	/* PC += off if dst > imm (signed) */
   1162	case BPF_JSGT:
   1163		emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
   1164		emit(ctx, beqz, tmp, off);
   1165		break;
   1166	/* PC += off if dst >= imm (signed) */
   1167	case BPF_JSGE:
   1168		emit_slti_r64(ctx, tmp, dst, imm);
   1169		emit(ctx, beqz, tmp, off);
   1170		break;
   1171	/* PC += off if dst < imm (signed) */
   1172	case BPF_JSLT:
   1173		emit_slti_r64(ctx, tmp, dst, imm);
   1174		emit(ctx, bnez, tmp, off);
   1175		break;
   1176	/* PC += off if dst <= imm (signed) */
   1177	case BPF_JSLE:
   1178		emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
   1179		emit(ctx, bnez, tmp, off);
   1180		break;
   1181	}
   1182}
   1183
   1184/* Jump register (64-bit) */
   1185static void emit_jmp_r64(struct jit_context *ctx,
   1186			 const u8 dst[], const u8 src[], s32 off, u8 op)
   1187{
   1188	u8 t1 = MIPS_R_T6;
   1189	u8 t2 = MIPS_R_T7;
   1190
   1191	switch (op) {
   1192	/* No-op, used internally for branch optimization */
   1193	case JIT_JNOP:
   1194		break;
   1195	/* PC += off if dst == src */
   1196	/* PC += off if dst != src */
   1197	case BPF_JEQ:
   1198	case BPF_JNE:
   1199		emit(ctx, subu, t1, lo(dst), lo(src));
   1200		emit(ctx, subu, t2, hi(dst), hi(src));
   1201		emit(ctx, or, t1, t1, t2);
   1202		if (op == BPF_JEQ)
   1203			emit(ctx, beqz, t1, off);
   1204		else   /* BPF_JNE */
   1205			emit(ctx, bnez, t1, off);
   1206		break;
   1207	/* PC += off if dst & src */
   1208	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
   1209	case BPF_JSET:
   1210	case JIT_JNSET:
   1211		emit(ctx, and, t1, lo(dst), lo(src));
   1212		emit(ctx, and, t2, hi(dst), hi(src));
   1213		emit(ctx, or, t1, t1, t2);
   1214		if (op == BPF_JSET)
   1215			emit(ctx, bnez, t1, off);
   1216		else   /* JIT_JNSET */
   1217			emit(ctx, beqz, t1, off);
   1218		break;
   1219	/* PC += off if dst > src */
   1220	case BPF_JGT:
   1221		emit_sltu_r64(ctx, t1, src, dst);
   1222		emit(ctx, bnez, t1, off);
   1223		break;
   1224	/* PC += off if dst >= src */
   1225	case BPF_JGE:
   1226		emit_sltu_r64(ctx, t1, dst, src);
   1227		emit(ctx, beqz, t1, off);
   1228		break;
   1229	/* PC += off if dst < src */
   1230	case BPF_JLT:
   1231		emit_sltu_r64(ctx, t1, dst, src);
   1232		emit(ctx, bnez, t1, off);
   1233		break;
   1234	/* PC += off if dst <= src */
   1235	case BPF_JLE:
   1236		emit_sltu_r64(ctx, t1, src, dst);
   1237		emit(ctx, beqz, t1, off);
   1238		break;
   1239	/* PC += off if dst > src (signed) */
   1240	case BPF_JSGT:
   1241		emit_slt_r64(ctx, t1, src, dst);
   1242		emit(ctx, bnez, t1, off);
   1243		break;
   1244	/* PC += off if dst >= src (signed) */
   1245	case BPF_JSGE:
   1246		emit_slt_r64(ctx, t1, dst, src);
   1247		emit(ctx, beqz, t1, off);
   1248		break;
   1249	/* PC += off if dst < src (signed) */
   1250	case BPF_JSLT:
   1251		emit_slt_r64(ctx, t1, dst, src);
   1252		emit(ctx, bnez, t1, off);
   1253		break;
   1254	/* PC += off if dst <= src (signed) */
   1255	case BPF_JSLE:
   1256		emit_slt_r64(ctx, t1, src, dst);
   1257		emit(ctx, beqz, t1, off);
   1258		break;
   1259	}
   1260}
   1261
   1262/* Function call */
   1263static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
   1264{
   1265	bool fixed;
   1266	u64 addr;
   1267
   1268	/* Decode the call address */
   1269	if (bpf_jit_get_func_addr(ctx->program, insn, false,
   1270				  &addr, &fixed) < 0)
   1271		return -1;
   1272	if (!fixed)
   1273		return -1;
   1274
   1275	/* Push stack arguments */
   1276	push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK);
   1277
   1278	/* Emit function call */
   1279	emit_mov_i(ctx, MIPS_R_T9, addr);
   1280	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
   1281	emit(ctx, nop); /* Delay slot */
   1282
   1283	clobber_reg(ctx, MIPS_R_RA);
   1284	clobber_reg(ctx, MIPS_R_V0);
   1285	clobber_reg(ctx, MIPS_R_V1);
   1286	return 0;
   1287}
   1288
   1289/* Function tail call */
   1290static int emit_tail_call(struct jit_context *ctx)
   1291{
   1292	u8 ary = lo(bpf2mips32[BPF_REG_2]);
   1293	u8 ind = lo(bpf2mips32[BPF_REG_3]);
   1294	u8 t1 = MIPS_R_T8;
   1295	u8 t2 = MIPS_R_T9;
   1296	int off;
   1297
   1298	/*
   1299	 * Tail call:
   1300	 * eBPF R1   - function argument (context ptr), passed in a0-a1
   1301	 * eBPF R2   - ptr to object with array of function entry points
   1302	 * eBPF R3   - array index of function to be called
   1303	 * stack[sz] - remaining tail call count, initialized in prologue
   1304	 */
   1305
   1306	/* if (ind >= ary->map.max_entries) goto out */
   1307	off = offsetof(struct bpf_array, map.max_entries);
   1308	if (off > 0x7fff)
   1309		return -1;
   1310	emit(ctx, lw, t1, off, ary);             /* t1 = ary->map.max_entries*/
   1311	emit_load_delay(ctx);                    /* Load delay slot          */
   1312	emit(ctx, sltu, t1, ind, t1);            /* t1 = ind < t1            */
   1313	emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0  */
   1314						 /* (next insn delay slot)   */
   1315	/* if (TCC-- <= 0) goto out */
   1316	emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP);  /* t2 = *(SP + size) */
   1317	emit_load_delay(ctx);                     /* Load delay slot         */
   1318	emit(ctx, blez, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 <= 0 */
   1319	emit(ctx, addiu, t2, t2, -1);             /* t2-- (delay slot)       */
   1320	emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP);  /* *(SP + size) = t2 */
   1321
   1322	/* prog = ary->ptrs[ind] */
   1323	off = offsetof(struct bpf_array, ptrs);
   1324	if (off > 0x7fff)
   1325		return -1;
   1326	emit(ctx, sll, t1, ind, 2);               /* t1 = ind << 2           */
   1327	emit(ctx, addu, t1, t1, ary);             /* t1 += ary               */
   1328	emit(ctx, lw, t2, off, t1);               /* t2 = *(t1 + off)        */
   1329	emit_load_delay(ctx);                     /* Load delay slot         */
   1330
   1331	/* if (prog == 0) goto out */
   1332	emit(ctx, beqz, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 == 0 */
   1333	emit(ctx, nop);                           /* Delay slot              */
   1334
   1335	/* func = prog->bpf_func + 8 (prologue skip offset) */
   1336	off = offsetof(struct bpf_prog, bpf_func);
   1337	if (off > 0x7fff)
   1338		return -1;
   1339	emit(ctx, lw, t1, off, t2);                /* t1 = *(t2 + off)       */
   1340	emit_load_delay(ctx);                      /* Load delay slot        */
   1341	emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP);  /* t1 += skip (8 or 12)   */
   1342
   1343	/* goto func */
   1344	build_epilogue(ctx, t1);
   1345	return 0;
   1346}
   1347
   1348/*
   1349 * Stack frame layout for a JITed program (stack grows down).
   1350 *
   1351 * Higher address  : Caller's stack frame       :
   1352 *                 :----------------------------:
   1353 *                 : 64-bit eBPF args r3-r5     :
   1354 *                 :----------------------------:
   1355 *                 : Reserved / tail call count :
   1356 *                 +============================+  <--- MIPS sp before call
   1357 *                 | Callee-saved registers,    |
   1358 *                 | including RA and FP        |
   1359 *                 +----------------------------+  <--- eBPF FP (MIPS zero,fp)
   1360 *                 | Local eBPF variables       |
   1361 *                 | allocated by program       |
   1362 *                 +----------------------------+
   1363 *                 | Reserved for caller-saved  |
   1364 *                 | registers                  |
   1365 *                 +----------------------------+
   1366 *                 | Reserved for 64-bit eBPF   |
   1367 *                 | args r3-r5 & args passed   |
   1368 *                 | on stack in kernel calls   |
   1369 * Lower address   +============================+  <--- MIPS sp
   1370 */
   1371
   1372/* Build program prologue to set up the stack and registers */
   1373void build_prologue(struct jit_context *ctx)
   1374{
   1375	const u8 *r1 = bpf2mips32[BPF_REG_1];
   1376	const u8 *fp = bpf2mips32[BPF_REG_FP];
   1377	int stack, saved, locals, reserved;
   1378
   1379	/*
   1380	 * The first two instructions initialize TCC in the reserved (for us)
   1381	 * 16-byte area in the parent's stack frame. On a tail call, the
   1382	 * calling function jumps into the prologue after these instructions.
   1383	 */
   1384	emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT, 0xffff));
   1385	emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP);
   1386
   1387	/*
   1388	 * Register eBPF R1 contains the 32-bit context pointer argument.
   1389	 * A 32-bit argument is always passed in MIPS register a0, regardless
   1390	 * of CPU endianness. Initialize R1 accordingly and zero-extend.
   1391	 */
   1392#ifdef __BIG_ENDIAN
   1393	emit(ctx, move, lo(r1), MIPS_R_A0);
   1394#endif
   1395
   1396	/* === Entry-point for tail calls === */
   1397
   1398	/* Zero-extend the 32-bit argument */
   1399	emit(ctx, move, hi(r1), MIPS_R_ZERO);
   1400
   1401	/* If the eBPF frame pointer was accessed it must be saved */
   1402	if (ctx->accessed & BIT(BPF_REG_FP))
   1403		clobber_reg64(ctx, fp);
   1404
   1405	/* Compute the stack space needed for callee-saved registers */
   1406	saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32);
   1407	saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
   1408
   1409	/* Stack space used by eBPF program local data */
   1410	locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
   1411
   1412	/*
   1413	 * If we are emitting function calls, reserve extra stack space for
   1414	 * caller-saved registers and function arguments passed on the stack.
   1415	 * The required space is computed automatically during resource
   1416	 * usage discovery (pass 1).
   1417	 */
   1418	reserved = ctx->stack_used;
   1419
   1420	/* Allocate the stack frame */
   1421	stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
   1422	emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack);
   1423
   1424	/* Store callee-saved registers on stack */
   1425	push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
   1426
   1427	/* Initialize the eBPF frame pointer if accessed */
   1428	if (ctx->accessed & BIT(BPF_REG_FP))
   1429		emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved);
   1430
   1431	ctx->saved_size = saved;
   1432	ctx->stack_size = stack;
   1433}
   1434
   1435/* Build the program epilogue to restore the stack and registers */
   1436void build_epilogue(struct jit_context *ctx, int dest_reg)
   1437{
   1438	/* Restore callee-saved registers from stack */
   1439	pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
   1440		 ctx->stack_size - ctx->saved_size);
   1441	/*
   1442	 * A 32-bit return value is always passed in MIPS register v0,
   1443	 * but on big-endian targets the low part of R0 is mapped to v1.
   1444	 */
   1445#ifdef __BIG_ENDIAN
   1446	emit(ctx, move, MIPS_R_V0, MIPS_R_V1);
   1447#endif
   1448
   1449	/* Jump to the return address and adjust the stack pointer */
   1450	emit(ctx, jr, dest_reg);
   1451	emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
   1452}
   1453
   1454/* Build one eBPF instruction */
   1455int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
   1456{
   1457	const u8 *dst = bpf2mips32[insn->dst_reg];
   1458	const u8 *src = bpf2mips32[insn->src_reg];
   1459	const u8 *res = bpf2mips32[BPF_REG_0];
   1460	const u8 *tmp = bpf2mips32[JIT_REG_TMP];
   1461	u8 code = insn->code;
   1462	s16 off = insn->off;
   1463	s32 imm = insn->imm;
   1464	s32 val, rel;
   1465	u8 alu, jmp;
   1466
   1467	switch (code) {
   1468	/* ALU operations */
   1469	/* dst = imm */
   1470	case BPF_ALU | BPF_MOV | BPF_K:
   1471		emit_mov_i(ctx, lo(dst), imm);
   1472		emit_zext_ver(ctx, dst);
   1473		break;
   1474	/* dst = src */
   1475	case BPF_ALU | BPF_MOV | BPF_X:
   1476		if (imm == 1) {
   1477			/* Special mov32 for zext */
   1478			emit_mov_i(ctx, hi(dst), 0);
   1479		} else {
   1480			emit_mov_r(ctx, lo(dst), lo(src));
   1481			emit_zext_ver(ctx, dst);
   1482		}
   1483		break;
   1484	/* dst = -dst */
   1485	case BPF_ALU | BPF_NEG:
   1486		emit_alu_i(ctx, lo(dst), 0, BPF_NEG);
   1487		emit_zext_ver(ctx, dst);
   1488		break;
   1489	/* dst = dst & imm */
   1490	/* dst = dst | imm */
   1491	/* dst = dst ^ imm */
   1492	/* dst = dst << imm */
   1493	/* dst = dst >> imm */
   1494	/* dst = dst >> imm (arithmetic) */
   1495	/* dst = dst + imm */
   1496	/* dst = dst - imm */
   1497	/* dst = dst * imm */
   1498	/* dst = dst / imm */
   1499	/* dst = dst % imm */
   1500	case BPF_ALU | BPF_OR | BPF_K:
   1501	case BPF_ALU | BPF_AND | BPF_K:
   1502	case BPF_ALU | BPF_XOR | BPF_K:
   1503	case BPF_ALU | BPF_LSH | BPF_K:
   1504	case BPF_ALU | BPF_RSH | BPF_K:
   1505	case BPF_ALU | BPF_ARSH | BPF_K:
   1506	case BPF_ALU | BPF_ADD | BPF_K:
   1507	case BPF_ALU | BPF_SUB | BPF_K:
   1508	case BPF_ALU | BPF_MUL | BPF_K:
   1509	case BPF_ALU | BPF_DIV | BPF_K:
   1510	case BPF_ALU | BPF_MOD | BPF_K:
   1511		if (!valid_alu_i(BPF_OP(code), imm)) {
   1512			emit_mov_i(ctx, MIPS_R_T6, imm);
   1513			emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code));
   1514		} else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
   1515			emit_alu_i(ctx, lo(dst), val, alu);
   1516		}
   1517		emit_zext_ver(ctx, dst);
   1518		break;
   1519	/* dst = dst & src */
   1520	/* dst = dst | src */
   1521	/* dst = dst ^ src */
   1522	/* dst = dst << src */
   1523	/* dst = dst >> src */
   1524	/* dst = dst >> src (arithmetic) */
   1525	/* dst = dst + src */
   1526	/* dst = dst - src */
   1527	/* dst = dst * src */
   1528	/* dst = dst / src */
   1529	/* dst = dst % src */
   1530	case BPF_ALU | BPF_AND | BPF_X:
   1531	case BPF_ALU | BPF_OR | BPF_X:
   1532	case BPF_ALU | BPF_XOR | BPF_X:
   1533	case BPF_ALU | BPF_LSH | BPF_X:
   1534	case BPF_ALU | BPF_RSH | BPF_X:
   1535	case BPF_ALU | BPF_ARSH | BPF_X:
   1536	case BPF_ALU | BPF_ADD | BPF_X:
   1537	case BPF_ALU | BPF_SUB | BPF_X:
   1538	case BPF_ALU | BPF_MUL | BPF_X:
   1539	case BPF_ALU | BPF_DIV | BPF_X:
   1540	case BPF_ALU | BPF_MOD | BPF_X:
   1541		emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code));
   1542		emit_zext_ver(ctx, dst);
   1543		break;
   1544	/* dst = imm (64-bit) */
   1545	case BPF_ALU64 | BPF_MOV | BPF_K:
   1546		emit_mov_se_i64(ctx, dst, imm);
   1547		break;
   1548	/* dst = src (64-bit) */
   1549	case BPF_ALU64 | BPF_MOV | BPF_X:
   1550		emit_mov_r(ctx, lo(dst), lo(src));
   1551		emit_mov_r(ctx, hi(dst), hi(src));
   1552		break;
   1553	/* dst = -dst (64-bit) */
   1554	case BPF_ALU64 | BPF_NEG:
   1555		emit_neg_i64(ctx, dst);
   1556		break;
   1557	/* dst = dst & imm (64-bit) */
   1558	case BPF_ALU64 | BPF_AND | BPF_K:
   1559		emit_alu_i64(ctx, dst, imm, BPF_OP(code));
   1560		break;
   1561	/* dst = dst | imm (64-bit) */
   1562	/* dst = dst ^ imm (64-bit) */
   1563	/* dst = dst + imm (64-bit) */
   1564	/* dst = dst - imm (64-bit) */
   1565	case BPF_ALU64 | BPF_OR | BPF_K:
   1566	case BPF_ALU64 | BPF_XOR | BPF_K:
   1567	case BPF_ALU64 | BPF_ADD | BPF_K:
   1568	case BPF_ALU64 | BPF_SUB | BPF_K:
   1569		if (imm)
   1570			emit_alu_i64(ctx, dst, imm, BPF_OP(code));
   1571		break;
   1572	/* dst = dst << imm (64-bit) */
   1573	/* dst = dst >> imm (64-bit) */
   1574	/* dst = dst >> imm (64-bit, arithmetic) */
   1575	case BPF_ALU64 | BPF_LSH | BPF_K:
   1576	case BPF_ALU64 | BPF_RSH | BPF_K:
   1577	case BPF_ALU64 | BPF_ARSH | BPF_K:
   1578		if (imm)
   1579			emit_shift_i64(ctx, dst, imm, BPF_OP(code));
   1580		break;
   1581	/* dst = dst * imm (64-bit) */
   1582	case BPF_ALU64 | BPF_MUL | BPF_K:
   1583		emit_mul_i64(ctx, dst, imm);
   1584		break;
   1585	/* dst = dst / imm (64-bit) */
   1586	/* dst = dst % imm (64-bit) */
   1587	case BPF_ALU64 | BPF_DIV | BPF_K:
   1588	case BPF_ALU64 | BPF_MOD | BPF_K:
   1589		/*
   1590		 * Sign-extend the immediate value into a temporary register,
   1591		 * and then do the operation on this register.
   1592		 */
   1593		emit_mov_se_i64(ctx, tmp, imm);
   1594		emit_divmod_r64(ctx, dst, tmp, BPF_OP(code));
   1595		break;
   1596	/* dst = dst & src (64-bit) */
   1597	/* dst = dst | src (64-bit) */
   1598	/* dst = dst ^ src (64-bit) */
   1599	/* dst = dst + src (64-bit) */
   1600	/* dst = dst - src (64-bit) */
   1601	case BPF_ALU64 | BPF_AND | BPF_X:
   1602	case BPF_ALU64 | BPF_OR | BPF_X:
   1603	case BPF_ALU64 | BPF_XOR | BPF_X:
   1604	case BPF_ALU64 | BPF_ADD | BPF_X:
   1605	case BPF_ALU64 | BPF_SUB | BPF_X:
   1606		emit_alu_r64(ctx, dst, src, BPF_OP(code));
   1607		break;
   1608	/* dst = dst << src (64-bit) */
   1609	/* dst = dst >> src (64-bit) */
   1610	/* dst = dst >> src (64-bit, arithmetic) */
   1611	case BPF_ALU64 | BPF_LSH | BPF_X:
   1612	case BPF_ALU64 | BPF_RSH | BPF_X:
   1613	case BPF_ALU64 | BPF_ARSH | BPF_X:
   1614		emit_shift_r64(ctx, dst, lo(src), BPF_OP(code));
   1615		break;
   1616	/* dst = dst * src (64-bit) */
   1617	case BPF_ALU64 | BPF_MUL | BPF_X:
   1618		emit_mul_r64(ctx, dst, src);
   1619		break;
   1620	/* dst = dst / src (64-bit) */
   1621	/* dst = dst % src (64-bit) */
   1622	case BPF_ALU64 | BPF_DIV | BPF_X:
   1623	case BPF_ALU64 | BPF_MOD | BPF_X:
   1624		emit_divmod_r64(ctx, dst, src, BPF_OP(code));
   1625		break;
   1626	/* dst = htole(dst) */
   1627	/* dst = htobe(dst) */
   1628	case BPF_ALU | BPF_END | BPF_FROM_LE:
   1629	case BPF_ALU | BPF_END | BPF_FROM_BE:
   1630		if (BPF_SRC(code) ==
   1631#ifdef __BIG_ENDIAN
   1632		    BPF_FROM_LE
   1633#else
   1634		    BPF_FROM_BE
   1635#endif
   1636		    )
   1637			emit_bswap_r64(ctx, dst, imm);
   1638		else
   1639			emit_trunc_r64(ctx, dst, imm);
   1640		break;
   1641	/* dst = imm64 */
   1642	case BPF_LD | BPF_IMM | BPF_DW:
   1643		emit_mov_i(ctx, lo(dst), imm);
   1644		emit_mov_i(ctx, hi(dst), insn[1].imm);
   1645		return 1;
   1646	/* LDX: dst = *(size *)(src + off) */
   1647	case BPF_LDX | BPF_MEM | BPF_W:
   1648	case BPF_LDX | BPF_MEM | BPF_H:
   1649	case BPF_LDX | BPF_MEM | BPF_B:
   1650	case BPF_LDX | BPF_MEM | BPF_DW:
   1651		emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code));
   1652		break;
   1653	/* ST: *(size *)(dst + off) = imm */
   1654	case BPF_ST | BPF_MEM | BPF_W:
   1655	case BPF_ST | BPF_MEM | BPF_H:
   1656	case BPF_ST | BPF_MEM | BPF_B:
   1657	case BPF_ST | BPF_MEM | BPF_DW:
   1658		switch (BPF_SIZE(code)) {
   1659		case BPF_DW:
   1660			/* Sign-extend immediate value into temporary reg */
   1661			emit_mov_se_i64(ctx, tmp, imm);
   1662			break;
   1663		case BPF_W:
   1664		case BPF_H:
   1665		case BPF_B:
   1666			emit_mov_i(ctx, lo(tmp), imm);
   1667			break;
   1668		}
   1669		emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code));
   1670		break;
   1671	/* STX: *(size *)(dst + off) = src */
   1672	case BPF_STX | BPF_MEM | BPF_W:
   1673	case BPF_STX | BPF_MEM | BPF_H:
   1674	case BPF_STX | BPF_MEM | BPF_B:
   1675	case BPF_STX | BPF_MEM | BPF_DW:
   1676		emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code));
   1677		break;
   1678	/* Speculation barrier */
   1679	case BPF_ST | BPF_NOSPEC:
   1680		break;
   1681	/* Atomics */
   1682	case BPF_STX | BPF_ATOMIC | BPF_W:
   1683		switch (imm) {
   1684		case BPF_ADD:
   1685		case BPF_ADD | BPF_FETCH:
   1686		case BPF_AND:
   1687		case BPF_AND | BPF_FETCH:
   1688		case BPF_OR:
   1689		case BPF_OR | BPF_FETCH:
   1690		case BPF_XOR:
   1691		case BPF_XOR | BPF_FETCH:
   1692		case BPF_XCHG:
   1693			if (cpu_has_llsc)
   1694				emit_atomic_r(ctx, lo(dst), lo(src), off, imm);
   1695			else /* Non-ll/sc fallback */
   1696				emit_atomic_r32(ctx, lo(dst), lo(src),
   1697						off, imm);
   1698			if (imm & BPF_FETCH)
   1699				emit_zext_ver(ctx, src);
   1700			break;
   1701		case BPF_CMPXCHG:
   1702			if (cpu_has_llsc)
   1703				emit_cmpxchg_r(ctx, lo(dst), lo(src),
   1704					       lo(res), off);
   1705			else /* Non-ll/sc fallback */
   1706				emit_cmpxchg_r32(ctx, lo(dst), lo(src), off);
   1707			/* Result zero-extension inserted by verifier */
   1708			break;
   1709		default:
   1710			goto notyet;
   1711		}
   1712		break;
   1713	/* Atomics (64-bit) */
   1714	case BPF_STX | BPF_ATOMIC | BPF_DW:
   1715		switch (imm) {
   1716		case BPF_ADD:
   1717		case BPF_ADD | BPF_FETCH:
   1718		case BPF_AND:
   1719		case BPF_AND | BPF_FETCH:
   1720		case BPF_OR:
   1721		case BPF_OR | BPF_FETCH:
   1722		case BPF_XOR:
   1723		case BPF_XOR | BPF_FETCH:
   1724		case BPF_XCHG:
   1725			emit_atomic_r64(ctx, lo(dst), src, off, imm);
   1726			break;
   1727		case BPF_CMPXCHG:
   1728			emit_cmpxchg_r64(ctx, lo(dst), src, off);
   1729			break;
   1730		default:
   1731			goto notyet;
   1732		}
   1733		break;
   1734	/* PC += off if dst == src */
   1735	/* PC += off if dst != src */
   1736	/* PC += off if dst & src */
   1737	/* PC += off if dst > src */
   1738	/* PC += off if dst >= src */
   1739	/* PC += off if dst < src */
   1740	/* PC += off if dst <= src */
   1741	/* PC += off if dst > src (signed) */
   1742	/* PC += off if dst >= src (signed) */
   1743	/* PC += off if dst < src (signed) */
   1744	/* PC += off if dst <= src (signed) */
   1745	case BPF_JMP32 | BPF_JEQ | BPF_X:
   1746	case BPF_JMP32 | BPF_JNE | BPF_X:
   1747	case BPF_JMP32 | BPF_JSET | BPF_X:
   1748	case BPF_JMP32 | BPF_JGT | BPF_X:
   1749	case BPF_JMP32 | BPF_JGE | BPF_X:
   1750	case BPF_JMP32 | BPF_JLT | BPF_X:
   1751	case BPF_JMP32 | BPF_JLE | BPF_X:
   1752	case BPF_JMP32 | BPF_JSGT | BPF_X:
   1753	case BPF_JMP32 | BPF_JSGE | BPF_X:
   1754	case BPF_JMP32 | BPF_JSLT | BPF_X:
   1755	case BPF_JMP32 | BPF_JSLE | BPF_X:
   1756		if (off == 0)
   1757			break;
   1758		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
   1759		emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp);
   1760		if (finish_jmp(ctx, jmp, off) < 0)
   1761			goto toofar;
   1762		break;
   1763	/* PC += off if dst == imm */
   1764	/* PC += off if dst != imm */
   1765	/* PC += off if dst & imm */
   1766	/* PC += off if dst > imm */
   1767	/* PC += off if dst >= imm */
   1768	/* PC += off if dst < imm */
   1769	/* PC += off if dst <= imm */
   1770	/* PC += off if dst > imm (signed) */
   1771	/* PC += off if dst >= imm (signed) */
   1772	/* PC += off if dst < imm (signed) */
   1773	/* PC += off if dst <= imm (signed) */
   1774	case BPF_JMP32 | BPF_JEQ | BPF_K:
   1775	case BPF_JMP32 | BPF_JNE | BPF_K:
   1776	case BPF_JMP32 | BPF_JSET | BPF_K:
   1777	case BPF_JMP32 | BPF_JGT | BPF_K:
   1778	case BPF_JMP32 | BPF_JGE | BPF_K:
   1779	case BPF_JMP32 | BPF_JLT | BPF_K:
   1780	case BPF_JMP32 | BPF_JLE | BPF_K:
   1781	case BPF_JMP32 | BPF_JSGT | BPF_K:
   1782	case BPF_JMP32 | BPF_JSGE | BPF_K:
   1783	case BPF_JMP32 | BPF_JSLT | BPF_K:
   1784	case BPF_JMP32 | BPF_JSLE | BPF_K:
   1785		if (off == 0)
   1786			break;
   1787		setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
   1788		if (valid_jmp_i(jmp, imm)) {
   1789			emit_jmp_i(ctx, lo(dst), imm, rel, jmp);
   1790		} else {
   1791			/* Move large immediate to register */
   1792			emit_mov_i(ctx, MIPS_R_T6, imm);
   1793			emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp);
   1794		}
   1795		if (finish_jmp(ctx, jmp, off) < 0)
   1796			goto toofar;
   1797		break;
   1798	/* PC += off if dst == src */
   1799	/* PC += off if dst != src */
   1800	/* PC += off if dst & src */
   1801	/* PC += off if dst > src */
   1802	/* PC += off if dst >= src */
   1803	/* PC += off if dst < src */
   1804	/* PC += off if dst <= src */
   1805	/* PC += off if dst > src (signed) */
   1806	/* PC += off if dst >= src (signed) */
   1807	/* PC += off if dst < src (signed) */
   1808	/* PC += off if dst <= src (signed) */
   1809	case BPF_JMP | BPF_JEQ | BPF_X:
   1810	case BPF_JMP | BPF_JNE | BPF_X:
   1811	case BPF_JMP | BPF_JSET | BPF_X:
   1812	case BPF_JMP | BPF_JGT | BPF_X:
   1813	case BPF_JMP | BPF_JGE | BPF_X:
   1814	case BPF_JMP | BPF_JLT | BPF_X:
   1815	case BPF_JMP | BPF_JLE | BPF_X:
   1816	case BPF_JMP | BPF_JSGT | BPF_X:
   1817	case BPF_JMP | BPF_JSGE | BPF_X:
   1818	case BPF_JMP | BPF_JSLT | BPF_X:
   1819	case BPF_JMP | BPF_JSLE | BPF_X:
   1820		if (off == 0)
   1821			break;
   1822		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
   1823		emit_jmp_r64(ctx, dst, src, rel, jmp);
   1824		if (finish_jmp(ctx, jmp, off) < 0)
   1825			goto toofar;
   1826		break;
   1827	/* PC += off if dst == imm */
   1828	/* PC += off if dst != imm */
   1829	/* PC += off if dst & imm */
   1830	/* PC += off if dst > imm */
   1831	/* PC += off if dst >= imm */
   1832	/* PC += off if dst < imm */
   1833	/* PC += off if dst <= imm */
   1834	/* PC += off if dst > imm (signed) */
   1835	/* PC += off if dst >= imm (signed) */
   1836	/* PC += off if dst < imm (signed) */
   1837	/* PC += off if dst <= imm (signed) */
   1838	case BPF_JMP | BPF_JEQ | BPF_K:
   1839	case BPF_JMP | BPF_JNE | BPF_K:
   1840	case BPF_JMP | BPF_JSET | BPF_K:
   1841	case BPF_JMP | BPF_JGT | BPF_K:
   1842	case BPF_JMP | BPF_JGE | BPF_K:
   1843	case BPF_JMP | BPF_JLT | BPF_K:
   1844	case BPF_JMP | BPF_JLE | BPF_K:
   1845	case BPF_JMP | BPF_JSGT | BPF_K:
   1846	case BPF_JMP | BPF_JSGE | BPF_K:
   1847	case BPF_JMP | BPF_JSLT | BPF_K:
   1848	case BPF_JMP | BPF_JSLE | BPF_K:
   1849		if (off == 0)
   1850			break;
   1851		setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
   1852		emit_jmp_i64(ctx, dst, imm, rel, jmp);
   1853		if (finish_jmp(ctx, jmp, off) < 0)
   1854			goto toofar;
   1855		break;
   1856	/* PC += off */
   1857	case BPF_JMP | BPF_JA:
   1858		if (off == 0)
   1859			break;
   1860		if (emit_ja(ctx, off) < 0)
   1861			goto toofar;
   1862		break;
   1863	/* Tail call */
   1864	case BPF_JMP | BPF_TAIL_CALL:
   1865		if (emit_tail_call(ctx) < 0)
   1866			goto invalid;
   1867		break;
   1868	/* Function call */
   1869	case BPF_JMP | BPF_CALL:
   1870		if (emit_call(ctx, insn) < 0)
   1871			goto invalid;
   1872		break;
   1873	/* Function return */
   1874	case BPF_JMP | BPF_EXIT:
   1875		/*
   1876		 * Optimization: when last instruction is EXIT
   1877		 * simply continue to epilogue.
   1878		 */
   1879		if (ctx->bpf_index == ctx->program->len - 1)
   1880			break;
   1881		if (emit_exit(ctx) < 0)
   1882			goto toofar;
   1883		break;
   1884
   1885	default:
   1886invalid:
   1887		pr_err_once("unknown opcode %02x\n", code);
   1888		return -EINVAL;
   1889notyet:
   1890		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
   1891		return -EFAULT;
   1892toofar:
   1893		pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
   1894			     ctx->bpf_index, code);
   1895		return -E2BIG;
   1896	}
   1897	return 0;
   1898}