cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

verifier.c (440865B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
      3 * Copyright (c) 2016 Facebook
      4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
      5 */
      6#include <uapi/linux/btf.h>
      7#include <linux/bpf-cgroup.h>
      8#include <linux/kernel.h>
      9#include <linux/types.h>
     10#include <linux/slab.h>
     11#include <linux/bpf.h>
     12#include <linux/btf.h>
     13#include <linux/bpf_verifier.h>
     14#include <linux/filter.h>
     15#include <net/netlink.h>
     16#include <linux/file.h>
     17#include <linux/vmalloc.h>
     18#include <linux/stringify.h>
     19#include <linux/bsearch.h>
     20#include <linux/sort.h>
     21#include <linux/perf_event.h>
     22#include <linux/ctype.h>
     23#include <linux/error-injection.h>
     24#include <linux/bpf_lsm.h>
     25#include <linux/btf_ids.h>
     26
     27#include "disasm.h"
     28
     29static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
     30#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
     31	[_id] = & _name ## _verifier_ops,
     32#define BPF_MAP_TYPE(_id, _ops)
     33#define BPF_LINK_TYPE(_id, _name)
     34#include <linux/bpf_types.h>
     35#undef BPF_PROG_TYPE
     36#undef BPF_MAP_TYPE
     37#undef BPF_LINK_TYPE
     38};
     39
     40/* bpf_check() is a static code analyzer that walks eBPF program
     41 * instruction by instruction and updates register/stack state.
     42 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
     43 *
     44 * The first pass is depth-first-search to check that the program is a DAG.
     45 * It rejects the following programs:
     46 * - larger than BPF_MAXINSNS insns
     47 * - if loop is present (detected via back-edge)
     48 * - unreachable insns exist (shouldn't be a forest. program = one function)
     49 * - out of bounds or malformed jumps
     50 * The second pass is all possible path descent from the 1st insn.
     51 * Since it's analyzing all paths through the program, the length of the
     52 * analysis is limited to 64k insn, which may be hit even if total number of
     53 * insn is less then 4K, but there are too many branches that change stack/regs.
     54 * Number of 'branches to be analyzed' is limited to 1k
     55 *
     56 * On entry to each instruction, each register has a type, and the instruction
     57 * changes the types of the registers depending on instruction semantics.
     58 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
     59 * copied to R1.
     60 *
     61 * All registers are 64-bit.
     62 * R0 - return register
     63 * R1-R5 argument passing registers
     64 * R6-R9 callee saved registers
     65 * R10 - frame pointer read-only
     66 *
     67 * At the start of BPF program the register R1 contains a pointer to bpf_context
     68 * and has type PTR_TO_CTX.
     69 *
     70 * Verifier tracks arithmetic operations on pointers in case:
     71 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
     72 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
     73 * 1st insn copies R10 (which has FRAME_PTR) type into R1
     74 * and 2nd arithmetic instruction is pattern matched to recognize
     75 * that it wants to construct a pointer to some element within stack.
     76 * So after 2nd insn, the register R1 has type PTR_TO_STACK
     77 * (and -20 constant is saved for further stack bounds checking).
     78 * Meaning that this reg is a pointer to stack plus known immediate constant.
     79 *
     80 * Most of the time the registers have SCALAR_VALUE type, which
     81 * means the register has some value, but it's not a valid pointer.
     82 * (like pointer plus pointer becomes SCALAR_VALUE type)
     83 *
     84 * When verifier sees load or store instructions the type of base register
     85 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
     86 * four pointer types recognized by check_mem_access() function.
     87 *
     88 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
     89 * and the range of [ptr, ptr + map's value_size) is accessible.
     90 *
     91 * registers used to pass values to function calls are checked against
     92 * function argument constraints.
     93 *
     94 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
     95 * It means that the register type passed to this function must be
     96 * PTR_TO_STACK and it will be used inside the function as
     97 * 'pointer to map element key'
     98 *
     99 * For example the argument constraints for bpf_map_lookup_elem():
    100 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
    101 *   .arg1_type = ARG_CONST_MAP_PTR,
    102 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
    103 *
    104 * ret_type says that this function returns 'pointer to map elem value or null'
    105 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
    106 * 2nd argument should be a pointer to stack, which will be used inside
    107 * the helper function as a pointer to map element key.
    108 *
    109 * On the kernel side the helper function looks like:
    110 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
    111 * {
    112 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
    113 *    void *key = (void *) (unsigned long) r2;
    114 *    void *value;
    115 *
    116 *    here kernel can access 'key' and 'map' pointers safely, knowing that
    117 *    [key, key + map->key_size) bytes are valid and were initialized on
    118 *    the stack of eBPF program.
    119 * }
    120 *
    121 * Corresponding eBPF program may look like:
    122 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
    123 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
    124 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
    125 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
    126 * here verifier looks at prototype of map_lookup_elem() and sees:
    127 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
    128 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
    129 *
    130 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
    131 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
    132 * and were initialized prior to this call.
    133 * If it's ok, then verifier allows this BPF_CALL insn and looks at
    134 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
    135 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
    136 * returns either pointer to map value or NULL.
    137 *
    138 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
    139 * insn, the register holding that pointer in the true branch changes state to
    140 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
    141 * branch. See check_cond_jmp_op().
    142 *
    143 * After the call R0 is set to return type of the function and registers R1-R5
    144 * are set to NOT_INIT to indicate that they are no longer readable.
    145 *
    146 * The following reference types represent a potential reference to a kernel
    147 * resource which, after first being allocated, must be checked and freed by
    148 * the BPF program:
    149 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
    150 *
    151 * When the verifier sees a helper call return a reference type, it allocates a
    152 * pointer id for the reference and stores it in the current function state.
    153 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
    154 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
    155 * passes through a NULL-check conditional. For the branch wherein the state is
    156 * changed to CONST_IMM, the verifier releases the reference.
    157 *
    158 * For each helper function that allocates a reference, such as
    159 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
    160 * bpf_sk_release(). When a reference type passes into the release function,
    161 * the verifier also releases the reference. If any unchecked or unreleased
    162 * reference remains at the end of the program, the verifier rejects it.
    163 */
    164
    165/* verifier_state + insn_idx are pushed to stack when branch is encountered */
    166struct bpf_verifier_stack_elem {
    167	/* verifer state is 'st'
    168	 * before processing instruction 'insn_idx'
    169	 * and after processing instruction 'prev_insn_idx'
    170	 */
    171	struct bpf_verifier_state st;
    172	int insn_idx;
    173	int prev_insn_idx;
    174	struct bpf_verifier_stack_elem *next;
    175	/* length of verifier log at the time this state was pushed on stack */
    176	u32 log_pos;
    177};
    178
    179#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
    180#define BPF_COMPLEXITY_LIMIT_STATES	64
    181
    182#define BPF_MAP_KEY_POISON	(1ULL << 63)
    183#define BPF_MAP_KEY_SEEN	(1ULL << 62)
    184
    185#define BPF_MAP_PTR_UNPRIV	1UL
    186#define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
    187					  POISON_POINTER_DELTA))
    188#define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
    189
    190static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
    191static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
    192
    193static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
    194{
    195	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
    196}
    197
    198static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
    199{
    200	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
    201}
    202
    203static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
    204			      const struct bpf_map *map, bool unpriv)
    205{
    206	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
    207	unpriv |= bpf_map_ptr_unpriv(aux);
    208	aux->map_ptr_state = (unsigned long)map |
    209			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
    210}
    211
    212static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
    213{
    214	return aux->map_key_state & BPF_MAP_KEY_POISON;
    215}
    216
    217static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
    218{
    219	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
    220}
    221
    222static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
    223{
    224	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
    225}
    226
    227static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
    228{
    229	bool poisoned = bpf_map_key_poisoned(aux);
    230
    231	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
    232			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
    233}
    234
    235static bool bpf_pseudo_call(const struct bpf_insn *insn)
    236{
    237	return insn->code == (BPF_JMP | BPF_CALL) &&
    238	       insn->src_reg == BPF_PSEUDO_CALL;
    239}
    240
    241static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
    242{
    243	return insn->code == (BPF_JMP | BPF_CALL) &&
    244	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
    245}
    246
    247struct bpf_call_arg_meta {
    248	struct bpf_map *map_ptr;
    249	bool raw_mode;
    250	bool pkt_access;
    251	u8 release_regno;
    252	int regno;
    253	int access_size;
    254	int mem_size;
    255	u64 msize_max_value;
    256	int ref_obj_id;
    257	int map_uid;
    258	int func_id;
    259	struct btf *btf;
    260	u32 btf_id;
    261	struct btf *ret_btf;
    262	u32 ret_btf_id;
    263	u32 subprogno;
    264	struct bpf_map_value_off_desc *kptr_off_desc;
    265	u8 uninit_dynptr_regno;
    266};
    267
    268struct btf *btf_vmlinux;
    269
    270static DEFINE_MUTEX(bpf_verifier_lock);
    271
    272static const struct bpf_line_info *
    273find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
    274{
    275	const struct bpf_line_info *linfo;
    276	const struct bpf_prog *prog;
    277	u32 i, nr_linfo;
    278
    279	prog = env->prog;
    280	nr_linfo = prog->aux->nr_linfo;
    281
    282	if (!nr_linfo || insn_off >= prog->len)
    283		return NULL;
    284
    285	linfo = prog->aux->linfo;
    286	for (i = 1; i < nr_linfo; i++)
    287		if (insn_off < linfo[i].insn_off)
    288			break;
    289
    290	return &linfo[i - 1];
    291}
    292
    293void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
    294		       va_list args)
    295{
    296	unsigned int n;
    297
    298	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
    299
    300	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
    301		  "verifier log line truncated - local buffer too short\n");
    302
    303	if (log->level == BPF_LOG_KERNEL) {
    304		bool newline = n > 0 && log->kbuf[n - 1] == '\n';
    305
    306		pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
    307		return;
    308	}
    309
    310	n = min(log->len_total - log->len_used - 1, n);
    311	log->kbuf[n] = '\0';
    312	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
    313		log->len_used += n;
    314	else
    315		log->ubuf = NULL;
    316}
    317
    318static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
    319{
    320	char zero = 0;
    321
    322	if (!bpf_verifier_log_needed(log))
    323		return;
    324
    325	log->len_used = new_pos;
    326	if (put_user(zero, log->ubuf + new_pos))
    327		log->ubuf = NULL;
    328}
    329
    330/* log_level controls verbosity level of eBPF verifier.
    331 * bpf_verifier_log_write() is used to dump the verification trace to the log,
    332 * so the user can figure out what's wrong with the program
    333 */
    334__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
    335					   const char *fmt, ...)
    336{
    337	va_list args;
    338
    339	if (!bpf_verifier_log_needed(&env->log))
    340		return;
    341
    342	va_start(args, fmt);
    343	bpf_verifier_vlog(&env->log, fmt, args);
    344	va_end(args);
    345}
    346EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
    347
    348__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
    349{
    350	struct bpf_verifier_env *env = private_data;
    351	va_list args;
    352
    353	if (!bpf_verifier_log_needed(&env->log))
    354		return;
    355
    356	va_start(args, fmt);
    357	bpf_verifier_vlog(&env->log, fmt, args);
    358	va_end(args);
    359}
    360
    361__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
    362			    const char *fmt, ...)
    363{
    364	va_list args;
    365
    366	if (!bpf_verifier_log_needed(log))
    367		return;
    368
    369	va_start(args, fmt);
    370	bpf_verifier_vlog(log, fmt, args);
    371	va_end(args);
    372}
    373
    374static const char *ltrim(const char *s)
    375{
    376	while (isspace(*s))
    377		s++;
    378
    379	return s;
    380}
    381
    382__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
    383					 u32 insn_off,
    384					 const char *prefix_fmt, ...)
    385{
    386	const struct bpf_line_info *linfo;
    387
    388	if (!bpf_verifier_log_needed(&env->log))
    389		return;
    390
    391	linfo = find_linfo(env, insn_off);
    392	if (!linfo || linfo == env->prev_linfo)
    393		return;
    394
    395	if (prefix_fmt) {
    396		va_list args;
    397
    398		va_start(args, prefix_fmt);
    399		bpf_verifier_vlog(&env->log, prefix_fmt, args);
    400		va_end(args);
    401	}
    402
    403	verbose(env, "%s\n",
    404		ltrim(btf_name_by_offset(env->prog->aux->btf,
    405					 linfo->line_off)));
    406
    407	env->prev_linfo = linfo;
    408}
    409
    410static void verbose_invalid_scalar(struct bpf_verifier_env *env,
    411				   struct bpf_reg_state *reg,
    412				   struct tnum *range, const char *ctx,
    413				   const char *reg_name)
    414{
    415	char tn_buf[48];
    416
    417	verbose(env, "At %s the register %s ", ctx, reg_name);
    418	if (!tnum_is_unknown(reg->var_off)) {
    419		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
    420		verbose(env, "has value %s", tn_buf);
    421	} else {
    422		verbose(env, "has unknown scalar value");
    423	}
    424	tnum_strn(tn_buf, sizeof(tn_buf), *range);
    425	verbose(env, " should have been in %s\n", tn_buf);
    426}
    427
    428static bool type_is_pkt_pointer(enum bpf_reg_type type)
    429{
    430	return type == PTR_TO_PACKET ||
    431	       type == PTR_TO_PACKET_META;
    432}
    433
    434static bool type_is_sk_pointer(enum bpf_reg_type type)
    435{
    436	return type == PTR_TO_SOCKET ||
    437		type == PTR_TO_SOCK_COMMON ||
    438		type == PTR_TO_TCP_SOCK ||
    439		type == PTR_TO_XDP_SOCK;
    440}
    441
    442static bool reg_type_not_null(enum bpf_reg_type type)
    443{
    444	return type == PTR_TO_SOCKET ||
    445		type == PTR_TO_TCP_SOCK ||
    446		type == PTR_TO_MAP_VALUE ||
    447		type == PTR_TO_MAP_KEY ||
    448		type == PTR_TO_SOCK_COMMON;
    449}
    450
    451static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
    452{
    453	return reg->type == PTR_TO_MAP_VALUE &&
    454		map_value_has_spin_lock(reg->map_ptr);
    455}
    456
    457static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
    458{
    459	return base_type(type) == PTR_TO_SOCKET ||
    460		base_type(type) == PTR_TO_TCP_SOCK ||
    461		base_type(type) == PTR_TO_MEM ||
    462		base_type(type) == PTR_TO_BTF_ID;
    463}
    464
    465static bool type_is_rdonly_mem(u32 type)
    466{
    467	return type & MEM_RDONLY;
    468}
    469
    470static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
    471{
    472	return type == ARG_PTR_TO_SOCK_COMMON;
    473}
    474
    475static bool type_may_be_null(u32 type)
    476{
    477	return type & PTR_MAYBE_NULL;
    478}
    479
    480static bool may_be_acquire_function(enum bpf_func_id func_id)
    481{
    482	return func_id == BPF_FUNC_sk_lookup_tcp ||
    483		func_id == BPF_FUNC_sk_lookup_udp ||
    484		func_id == BPF_FUNC_skc_lookup_tcp ||
    485		func_id == BPF_FUNC_map_lookup_elem ||
    486	        func_id == BPF_FUNC_ringbuf_reserve;
    487}
    488
    489static bool is_acquire_function(enum bpf_func_id func_id,
    490				const struct bpf_map *map)
    491{
    492	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
    493
    494	if (func_id == BPF_FUNC_sk_lookup_tcp ||
    495	    func_id == BPF_FUNC_sk_lookup_udp ||
    496	    func_id == BPF_FUNC_skc_lookup_tcp ||
    497	    func_id == BPF_FUNC_ringbuf_reserve ||
    498	    func_id == BPF_FUNC_kptr_xchg)
    499		return true;
    500
    501	if (func_id == BPF_FUNC_map_lookup_elem &&
    502	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
    503	     map_type == BPF_MAP_TYPE_SOCKHASH))
    504		return true;
    505
    506	return false;
    507}
    508
    509static bool is_ptr_cast_function(enum bpf_func_id func_id)
    510{
    511	return func_id == BPF_FUNC_tcp_sock ||
    512		func_id == BPF_FUNC_sk_fullsock ||
    513		func_id == BPF_FUNC_skc_to_tcp_sock ||
    514		func_id == BPF_FUNC_skc_to_tcp6_sock ||
    515		func_id == BPF_FUNC_skc_to_udp6_sock ||
    516		func_id == BPF_FUNC_skc_to_mptcp_sock ||
    517		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
    518		func_id == BPF_FUNC_skc_to_tcp_request_sock;
    519}
    520
    521static bool is_cmpxchg_insn(const struct bpf_insn *insn)
    522{
    523	return BPF_CLASS(insn->code) == BPF_STX &&
    524	       BPF_MODE(insn->code) == BPF_ATOMIC &&
    525	       insn->imm == BPF_CMPXCHG;
    526}
    527
    528/* string representation of 'enum bpf_reg_type'
    529 *
    530 * Note that reg_type_str() can not appear more than once in a single verbose()
    531 * statement.
    532 */
    533static const char *reg_type_str(struct bpf_verifier_env *env,
    534				enum bpf_reg_type type)
    535{
    536	char postfix[16] = {0}, prefix[32] = {0};
    537	static const char * const str[] = {
    538		[NOT_INIT]		= "?",
    539		[SCALAR_VALUE]		= "scalar",
    540		[PTR_TO_CTX]		= "ctx",
    541		[CONST_PTR_TO_MAP]	= "map_ptr",
    542		[PTR_TO_MAP_VALUE]	= "map_value",
    543		[PTR_TO_STACK]		= "fp",
    544		[PTR_TO_PACKET]		= "pkt",
    545		[PTR_TO_PACKET_META]	= "pkt_meta",
    546		[PTR_TO_PACKET_END]	= "pkt_end",
    547		[PTR_TO_FLOW_KEYS]	= "flow_keys",
    548		[PTR_TO_SOCKET]		= "sock",
    549		[PTR_TO_SOCK_COMMON]	= "sock_common",
    550		[PTR_TO_TCP_SOCK]	= "tcp_sock",
    551		[PTR_TO_TP_BUFFER]	= "tp_buffer",
    552		[PTR_TO_XDP_SOCK]	= "xdp_sock",
    553		[PTR_TO_BTF_ID]		= "ptr_",
    554		[PTR_TO_MEM]		= "mem",
    555		[PTR_TO_BUF]		= "buf",
    556		[PTR_TO_FUNC]		= "func",
    557		[PTR_TO_MAP_KEY]	= "map_key",
    558	};
    559
    560	if (type & PTR_MAYBE_NULL) {
    561		if (base_type(type) == PTR_TO_BTF_ID)
    562			strncpy(postfix, "or_null_", 16);
    563		else
    564			strncpy(postfix, "_or_null", 16);
    565	}
    566
    567	if (type & MEM_RDONLY)
    568		strncpy(prefix, "rdonly_", 32);
    569	if (type & MEM_ALLOC)
    570		strncpy(prefix, "alloc_", 32);
    571	if (type & MEM_USER)
    572		strncpy(prefix, "user_", 32);
    573	if (type & MEM_PERCPU)
    574		strncpy(prefix, "percpu_", 32);
    575	if (type & PTR_UNTRUSTED)
    576		strncpy(prefix, "untrusted_", 32);
    577
    578	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
    579		 prefix, str[base_type(type)], postfix);
    580	return env->type_str_buf;
    581}
    582
    583static char slot_type_char[] = {
    584	[STACK_INVALID]	= '?',
    585	[STACK_SPILL]	= 'r',
    586	[STACK_MISC]	= 'm',
    587	[STACK_ZERO]	= '0',
    588	[STACK_DYNPTR]	= 'd',
    589};
    590
    591static void print_liveness(struct bpf_verifier_env *env,
    592			   enum bpf_reg_liveness live)
    593{
    594	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
    595	    verbose(env, "_");
    596	if (live & REG_LIVE_READ)
    597		verbose(env, "r");
    598	if (live & REG_LIVE_WRITTEN)
    599		verbose(env, "w");
    600	if (live & REG_LIVE_DONE)
    601		verbose(env, "D");
    602}
    603
    604static int get_spi(s32 off)
    605{
    606	return (-off - 1) / BPF_REG_SIZE;
    607}
    608
    609static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
    610{
    611	int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
    612
    613	/* We need to check that slots between [spi - nr_slots + 1, spi] are
    614	 * within [0, allocated_stack).
    615	 *
    616	 * Please note that the spi grows downwards. For example, a dynptr
    617	 * takes the size of two stack slots; the first slot will be at
    618	 * spi and the second slot will be at spi - 1.
    619	 */
    620	return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
    621}
    622
    623static struct bpf_func_state *func(struct bpf_verifier_env *env,
    624				   const struct bpf_reg_state *reg)
    625{
    626	struct bpf_verifier_state *cur = env->cur_state;
    627
    628	return cur->frame[reg->frameno];
    629}
    630
    631static const char *kernel_type_name(const struct btf* btf, u32 id)
    632{
    633	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
    634}
    635
    636static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
    637{
    638	env->scratched_regs |= 1U << regno;
    639}
    640
    641static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
    642{
    643	env->scratched_stack_slots |= 1ULL << spi;
    644}
    645
    646static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
    647{
    648	return (env->scratched_regs >> regno) & 1;
    649}
    650
    651static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
    652{
    653	return (env->scratched_stack_slots >> regno) & 1;
    654}
    655
    656static bool verifier_state_scratched(const struct bpf_verifier_env *env)
    657{
    658	return env->scratched_regs || env->scratched_stack_slots;
    659}
    660
    661static void mark_verifier_state_clean(struct bpf_verifier_env *env)
    662{
    663	env->scratched_regs = 0U;
    664	env->scratched_stack_slots = 0ULL;
    665}
    666
    667/* Used for printing the entire verifier state. */
    668static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
    669{
    670	env->scratched_regs = ~0U;
    671	env->scratched_stack_slots = ~0ULL;
    672}
    673
    674static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
    675{
    676	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
    677	case DYNPTR_TYPE_LOCAL:
    678		return BPF_DYNPTR_TYPE_LOCAL;
    679	case DYNPTR_TYPE_RINGBUF:
    680		return BPF_DYNPTR_TYPE_RINGBUF;
    681	default:
    682		return BPF_DYNPTR_TYPE_INVALID;
    683	}
    684}
    685
    686static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
    687{
    688	return type == BPF_DYNPTR_TYPE_RINGBUF;
    689}
    690
    691static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
    692				   enum bpf_arg_type arg_type, int insn_idx)
    693{
    694	struct bpf_func_state *state = func(env, reg);
    695	enum bpf_dynptr_type type;
    696	int spi, i, id;
    697
    698	spi = get_spi(reg->off);
    699
    700	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
    701		return -EINVAL;
    702
    703	for (i = 0; i < BPF_REG_SIZE; i++) {
    704		state->stack[spi].slot_type[i] = STACK_DYNPTR;
    705		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
    706	}
    707
    708	type = arg_to_dynptr_type(arg_type);
    709	if (type == BPF_DYNPTR_TYPE_INVALID)
    710		return -EINVAL;
    711
    712	state->stack[spi].spilled_ptr.dynptr.first_slot = true;
    713	state->stack[spi].spilled_ptr.dynptr.type = type;
    714	state->stack[spi - 1].spilled_ptr.dynptr.type = type;
    715
    716	if (dynptr_type_refcounted(type)) {
    717		/* The id is used to track proper releasing */
    718		id = acquire_reference_state(env, insn_idx);
    719		if (id < 0)
    720			return id;
    721
    722		state->stack[spi].spilled_ptr.id = id;
    723		state->stack[spi - 1].spilled_ptr.id = id;
    724	}
    725
    726	return 0;
    727}
    728
    729static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
    730{
    731	struct bpf_func_state *state = func(env, reg);
    732	int spi, i;
    733
    734	spi = get_spi(reg->off);
    735
    736	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
    737		return -EINVAL;
    738
    739	for (i = 0; i < BPF_REG_SIZE; i++) {
    740		state->stack[spi].slot_type[i] = STACK_INVALID;
    741		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
    742	}
    743
    744	/* Invalidate any slices associated with this dynptr */
    745	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
    746		release_reference(env, state->stack[spi].spilled_ptr.id);
    747		state->stack[spi].spilled_ptr.id = 0;
    748		state->stack[spi - 1].spilled_ptr.id = 0;
    749	}
    750
    751	state->stack[spi].spilled_ptr.dynptr.first_slot = false;
    752	state->stack[spi].spilled_ptr.dynptr.type = 0;
    753	state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
    754
    755	return 0;
    756}
    757
    758static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
    759{
    760	struct bpf_func_state *state = func(env, reg);
    761	int spi = get_spi(reg->off);
    762	int i;
    763
    764	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
    765		return true;
    766
    767	for (i = 0; i < BPF_REG_SIZE; i++) {
    768		if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
    769		    state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
    770			return false;
    771	}
    772
    773	return true;
    774}
    775
    776static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
    777				     enum bpf_arg_type arg_type)
    778{
    779	struct bpf_func_state *state = func(env, reg);
    780	int spi = get_spi(reg->off);
    781	int i;
    782
    783	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
    784	    !state->stack[spi].spilled_ptr.dynptr.first_slot)
    785		return false;
    786
    787	for (i = 0; i < BPF_REG_SIZE; i++) {
    788		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
    789		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
    790			return false;
    791	}
    792
    793	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
    794	if (arg_type == ARG_PTR_TO_DYNPTR)
    795		return true;
    796
    797	return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type);
    798}
    799
    800/* The reg state of a pointer or a bounded scalar was saved when
    801 * it was spilled to the stack.
    802 */
    803static bool is_spilled_reg(const struct bpf_stack_state *stack)
    804{
    805	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
    806}
    807
    808static void scrub_spilled_slot(u8 *stype)
    809{
    810	if (*stype != STACK_INVALID)
    811		*stype = STACK_MISC;
    812}
    813
    814static void print_verifier_state(struct bpf_verifier_env *env,
    815				 const struct bpf_func_state *state,
    816				 bool print_all)
    817{
    818	const struct bpf_reg_state *reg;
    819	enum bpf_reg_type t;
    820	int i;
    821
    822	if (state->frameno)
    823		verbose(env, " frame%d:", state->frameno);
    824	for (i = 0; i < MAX_BPF_REG; i++) {
    825		reg = &state->regs[i];
    826		t = reg->type;
    827		if (t == NOT_INIT)
    828			continue;
    829		if (!print_all && !reg_scratched(env, i))
    830			continue;
    831		verbose(env, " R%d", i);
    832		print_liveness(env, reg->live);
    833		verbose(env, "=");
    834		if (t == SCALAR_VALUE && reg->precise)
    835			verbose(env, "P");
    836		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
    837		    tnum_is_const(reg->var_off)) {
    838			/* reg->off should be 0 for SCALAR_VALUE */
    839			verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
    840			verbose(env, "%lld", reg->var_off.value + reg->off);
    841		} else {
    842			const char *sep = "";
    843
    844			verbose(env, "%s", reg_type_str(env, t));
    845			if (base_type(t) == PTR_TO_BTF_ID)
    846				verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
    847			verbose(env, "(");
    848/*
    849 * _a stands for append, was shortened to avoid multiline statements below.
    850 * This macro is used to output a comma separated list of attributes.
    851 */
    852#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
    853
    854			if (reg->id)
    855				verbose_a("id=%d", reg->id);
    856			if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id)
    857				verbose_a("ref_obj_id=%d", reg->ref_obj_id);
    858			if (t != SCALAR_VALUE)
    859				verbose_a("off=%d", reg->off);
    860			if (type_is_pkt_pointer(t))
    861				verbose_a("r=%d", reg->range);
    862			else if (base_type(t) == CONST_PTR_TO_MAP ||
    863				 base_type(t) == PTR_TO_MAP_KEY ||
    864				 base_type(t) == PTR_TO_MAP_VALUE)
    865				verbose_a("ks=%d,vs=%d",
    866					  reg->map_ptr->key_size,
    867					  reg->map_ptr->value_size);
    868			if (tnum_is_const(reg->var_off)) {
    869				/* Typically an immediate SCALAR_VALUE, but
    870				 * could be a pointer whose offset is too big
    871				 * for reg->off
    872				 */
    873				verbose_a("imm=%llx", reg->var_off.value);
    874			} else {
    875				if (reg->smin_value != reg->umin_value &&
    876				    reg->smin_value != S64_MIN)
    877					verbose_a("smin=%lld", (long long)reg->smin_value);
    878				if (reg->smax_value != reg->umax_value &&
    879				    reg->smax_value != S64_MAX)
    880					verbose_a("smax=%lld", (long long)reg->smax_value);
    881				if (reg->umin_value != 0)
    882					verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
    883				if (reg->umax_value != U64_MAX)
    884					verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
    885				if (!tnum_is_unknown(reg->var_off)) {
    886					char tn_buf[48];
    887
    888					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
    889					verbose_a("var_off=%s", tn_buf);
    890				}
    891				if (reg->s32_min_value != reg->smin_value &&
    892				    reg->s32_min_value != S32_MIN)
    893					verbose_a("s32_min=%d", (int)(reg->s32_min_value));
    894				if (reg->s32_max_value != reg->smax_value &&
    895				    reg->s32_max_value != S32_MAX)
    896					verbose_a("s32_max=%d", (int)(reg->s32_max_value));
    897				if (reg->u32_min_value != reg->umin_value &&
    898				    reg->u32_min_value != U32_MIN)
    899					verbose_a("u32_min=%d", (int)(reg->u32_min_value));
    900				if (reg->u32_max_value != reg->umax_value &&
    901				    reg->u32_max_value != U32_MAX)
    902					verbose_a("u32_max=%d", (int)(reg->u32_max_value));
    903			}
    904#undef verbose_a
    905
    906			verbose(env, ")");
    907		}
    908	}
    909	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
    910		char types_buf[BPF_REG_SIZE + 1];
    911		bool valid = false;
    912		int j;
    913
    914		for (j = 0; j < BPF_REG_SIZE; j++) {
    915			if (state->stack[i].slot_type[j] != STACK_INVALID)
    916				valid = true;
    917			types_buf[j] = slot_type_char[
    918					state->stack[i].slot_type[j]];
    919		}
    920		types_buf[BPF_REG_SIZE] = 0;
    921		if (!valid)
    922			continue;
    923		if (!print_all && !stack_slot_scratched(env, i))
    924			continue;
    925		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
    926		print_liveness(env, state->stack[i].spilled_ptr.live);
    927		if (is_spilled_reg(&state->stack[i])) {
    928			reg = &state->stack[i].spilled_ptr;
    929			t = reg->type;
    930			verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
    931			if (t == SCALAR_VALUE && reg->precise)
    932				verbose(env, "P");
    933			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
    934				verbose(env, "%lld", reg->var_off.value + reg->off);
    935		} else {
    936			verbose(env, "=%s", types_buf);
    937		}
    938	}
    939	if (state->acquired_refs && state->refs[0].id) {
    940		verbose(env, " refs=%d", state->refs[0].id);
    941		for (i = 1; i < state->acquired_refs; i++)
    942			if (state->refs[i].id)
    943				verbose(env, ",%d", state->refs[i].id);
    944	}
    945	if (state->in_callback_fn)
    946		verbose(env, " cb");
    947	if (state->in_async_callback_fn)
    948		verbose(env, " async_cb");
    949	verbose(env, "\n");
    950	mark_verifier_state_clean(env);
    951}
    952
    953static inline u32 vlog_alignment(u32 pos)
    954{
    955	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
    956			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
    957}
    958
    959static void print_insn_state(struct bpf_verifier_env *env,
    960			     const struct bpf_func_state *state)
    961{
    962	if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
    963		/* remove new line character */
    964		bpf_vlog_reset(&env->log, env->prev_log_len - 1);
    965		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
    966	} else {
    967		verbose(env, "%d:", env->insn_idx);
    968	}
    969	print_verifier_state(env, state, false);
    970}
    971
    972/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
    973 * small to hold src. This is different from krealloc since we don't want to preserve
    974 * the contents of dst.
    975 *
    976 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
    977 * not be allocated.
    978 */
    979static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
    980{
    981	size_t bytes;
    982
    983	if (ZERO_OR_NULL_PTR(src))
    984		goto out;
    985
    986	if (unlikely(check_mul_overflow(n, size, &bytes)))
    987		return NULL;
    988
    989	if (ksize(dst) < bytes) {
    990		kfree(dst);
    991		dst = kmalloc_track_caller(bytes, flags);
    992		if (!dst)
    993			return NULL;
    994	}
    995
    996	memcpy(dst, src, bytes);
    997out:
    998	return dst ? dst : ZERO_SIZE_PTR;
    999}
   1000
   1001/* resize an array from old_n items to new_n items. the array is reallocated if it's too
   1002 * small to hold new_n items. new items are zeroed out if the array grows.
   1003 *
   1004 * Contrary to krealloc_array, does not free arr if new_n is zero.
   1005 */
   1006static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
   1007{
   1008	if (!new_n || old_n == new_n)
   1009		goto out;
   1010
   1011	arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
   1012	if (!arr)
   1013		return NULL;
   1014
   1015	if (new_n > old_n)
   1016		memset(arr + old_n * size, 0, (new_n - old_n) * size);
   1017
   1018out:
   1019	return arr ? arr : ZERO_SIZE_PTR;
   1020}
   1021
   1022static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
   1023{
   1024	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
   1025			       sizeof(struct bpf_reference_state), GFP_KERNEL);
   1026	if (!dst->refs)
   1027		return -ENOMEM;
   1028
   1029	dst->acquired_refs = src->acquired_refs;
   1030	return 0;
   1031}
   1032
   1033static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
   1034{
   1035	size_t n = src->allocated_stack / BPF_REG_SIZE;
   1036
   1037	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
   1038				GFP_KERNEL);
   1039	if (!dst->stack)
   1040		return -ENOMEM;
   1041
   1042	dst->allocated_stack = src->allocated_stack;
   1043	return 0;
   1044}
   1045
   1046static int resize_reference_state(struct bpf_func_state *state, size_t n)
   1047{
   1048	state->refs = realloc_array(state->refs, state->acquired_refs, n,
   1049				    sizeof(struct bpf_reference_state));
   1050	if (!state->refs)
   1051		return -ENOMEM;
   1052
   1053	state->acquired_refs = n;
   1054	return 0;
   1055}
   1056
   1057static int grow_stack_state(struct bpf_func_state *state, int size)
   1058{
   1059	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
   1060
   1061	if (old_n >= n)
   1062		return 0;
   1063
   1064	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
   1065	if (!state->stack)
   1066		return -ENOMEM;
   1067
   1068	state->allocated_stack = size;
   1069	return 0;
   1070}
   1071
   1072/* Acquire a pointer id from the env and update the state->refs to include
   1073 * this new pointer reference.
   1074 * On success, returns a valid pointer id to associate with the register
   1075 * On failure, returns a negative errno.
   1076 */
   1077static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
   1078{
   1079	struct bpf_func_state *state = cur_func(env);
   1080	int new_ofs = state->acquired_refs;
   1081	int id, err;
   1082
   1083	err = resize_reference_state(state, state->acquired_refs + 1);
   1084	if (err)
   1085		return err;
   1086	id = ++env->id_gen;
   1087	state->refs[new_ofs].id = id;
   1088	state->refs[new_ofs].insn_idx = insn_idx;
   1089
   1090	return id;
   1091}
   1092
   1093/* release function corresponding to acquire_reference_state(). Idempotent. */
   1094static int release_reference_state(struct bpf_func_state *state, int ptr_id)
   1095{
   1096	int i, last_idx;
   1097
   1098	last_idx = state->acquired_refs - 1;
   1099	for (i = 0; i < state->acquired_refs; i++) {
   1100		if (state->refs[i].id == ptr_id) {
   1101			if (last_idx && i != last_idx)
   1102				memcpy(&state->refs[i], &state->refs[last_idx],
   1103				       sizeof(*state->refs));
   1104			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
   1105			state->acquired_refs--;
   1106			return 0;
   1107		}
   1108	}
   1109	return -EINVAL;
   1110}
   1111
   1112static void free_func_state(struct bpf_func_state *state)
   1113{
   1114	if (!state)
   1115		return;
   1116	kfree(state->refs);
   1117	kfree(state->stack);
   1118	kfree(state);
   1119}
   1120
   1121static void clear_jmp_history(struct bpf_verifier_state *state)
   1122{
   1123	kfree(state->jmp_history);
   1124	state->jmp_history = NULL;
   1125	state->jmp_history_cnt = 0;
   1126}
   1127
   1128static void free_verifier_state(struct bpf_verifier_state *state,
   1129				bool free_self)
   1130{
   1131	int i;
   1132
   1133	for (i = 0; i <= state->curframe; i++) {
   1134		free_func_state(state->frame[i]);
   1135		state->frame[i] = NULL;
   1136	}
   1137	clear_jmp_history(state);
   1138	if (free_self)
   1139		kfree(state);
   1140}
   1141
   1142/* copy verifier state from src to dst growing dst stack space
   1143 * when necessary to accommodate larger src stack
   1144 */
   1145static int copy_func_state(struct bpf_func_state *dst,
   1146			   const struct bpf_func_state *src)
   1147{
   1148	int err;
   1149
   1150	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
   1151	err = copy_reference_state(dst, src);
   1152	if (err)
   1153		return err;
   1154	return copy_stack_state(dst, src);
   1155}
   1156
   1157static int copy_verifier_state(struct bpf_verifier_state *dst_state,
   1158			       const struct bpf_verifier_state *src)
   1159{
   1160	struct bpf_func_state *dst;
   1161	int i, err;
   1162
   1163	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
   1164					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
   1165					    GFP_USER);
   1166	if (!dst_state->jmp_history)
   1167		return -ENOMEM;
   1168	dst_state->jmp_history_cnt = src->jmp_history_cnt;
   1169
   1170	/* if dst has more stack frames then src frame, free them */
   1171	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
   1172		free_func_state(dst_state->frame[i]);
   1173		dst_state->frame[i] = NULL;
   1174	}
   1175	dst_state->speculative = src->speculative;
   1176	dst_state->curframe = src->curframe;
   1177	dst_state->active_spin_lock = src->active_spin_lock;
   1178	dst_state->branches = src->branches;
   1179	dst_state->parent = src->parent;
   1180	dst_state->first_insn_idx = src->first_insn_idx;
   1181	dst_state->last_insn_idx = src->last_insn_idx;
   1182	for (i = 0; i <= src->curframe; i++) {
   1183		dst = dst_state->frame[i];
   1184		if (!dst) {
   1185			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
   1186			if (!dst)
   1187				return -ENOMEM;
   1188			dst_state->frame[i] = dst;
   1189		}
   1190		err = copy_func_state(dst, src->frame[i]);
   1191		if (err)
   1192			return err;
   1193	}
   1194	return 0;
   1195}
   1196
   1197static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
   1198{
   1199	while (st) {
   1200		u32 br = --st->branches;
   1201
   1202		/* WARN_ON(br > 1) technically makes sense here,
   1203		 * but see comment in push_stack(), hence:
   1204		 */
   1205		WARN_ONCE((int)br < 0,
   1206			  "BUG update_branch_counts:branches_to_explore=%d\n",
   1207			  br);
   1208		if (br)
   1209			break;
   1210		st = st->parent;
   1211	}
   1212}
   1213
   1214static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
   1215		     int *insn_idx, bool pop_log)
   1216{
   1217	struct bpf_verifier_state *cur = env->cur_state;
   1218	struct bpf_verifier_stack_elem *elem, *head = env->head;
   1219	int err;
   1220
   1221	if (env->head == NULL)
   1222		return -ENOENT;
   1223
   1224	if (cur) {
   1225		err = copy_verifier_state(cur, &head->st);
   1226		if (err)
   1227			return err;
   1228	}
   1229	if (pop_log)
   1230		bpf_vlog_reset(&env->log, head->log_pos);
   1231	if (insn_idx)
   1232		*insn_idx = head->insn_idx;
   1233	if (prev_insn_idx)
   1234		*prev_insn_idx = head->prev_insn_idx;
   1235	elem = head->next;
   1236	free_verifier_state(&head->st, false);
   1237	kfree(head);
   1238	env->head = elem;
   1239	env->stack_size--;
   1240	return 0;
   1241}
   1242
   1243static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
   1244					     int insn_idx, int prev_insn_idx,
   1245					     bool speculative)
   1246{
   1247	struct bpf_verifier_state *cur = env->cur_state;
   1248	struct bpf_verifier_stack_elem *elem;
   1249	int err;
   1250
   1251	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
   1252	if (!elem)
   1253		goto err;
   1254
   1255	elem->insn_idx = insn_idx;
   1256	elem->prev_insn_idx = prev_insn_idx;
   1257	elem->next = env->head;
   1258	elem->log_pos = env->log.len_used;
   1259	env->head = elem;
   1260	env->stack_size++;
   1261	err = copy_verifier_state(&elem->st, cur);
   1262	if (err)
   1263		goto err;
   1264	elem->st.speculative |= speculative;
   1265	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
   1266		verbose(env, "The sequence of %d jumps is too complex.\n",
   1267			env->stack_size);
   1268		goto err;
   1269	}
   1270	if (elem->st.parent) {
   1271		++elem->st.parent->branches;
   1272		/* WARN_ON(branches > 2) technically makes sense here,
   1273		 * but
   1274		 * 1. speculative states will bump 'branches' for non-branch
   1275		 * instructions
   1276		 * 2. is_state_visited() heuristics may decide not to create
   1277		 * a new state for a sequence of branches and all such current
   1278		 * and cloned states will be pointing to a single parent state
   1279		 * which might have large 'branches' count.
   1280		 */
   1281	}
   1282	return &elem->st;
   1283err:
   1284	free_verifier_state(env->cur_state, true);
   1285	env->cur_state = NULL;
   1286	/* pop all elements and return */
   1287	while (!pop_stack(env, NULL, NULL, false));
   1288	return NULL;
   1289}
   1290
   1291#define CALLER_SAVED_REGS 6
   1292static const int caller_saved[CALLER_SAVED_REGS] = {
   1293	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
   1294};
   1295
   1296static void __mark_reg_not_init(const struct bpf_verifier_env *env,
   1297				struct bpf_reg_state *reg);
   1298
   1299/* This helper doesn't clear reg->id */
   1300static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
   1301{
   1302	reg->var_off = tnum_const(imm);
   1303	reg->smin_value = (s64)imm;
   1304	reg->smax_value = (s64)imm;
   1305	reg->umin_value = imm;
   1306	reg->umax_value = imm;
   1307
   1308	reg->s32_min_value = (s32)imm;
   1309	reg->s32_max_value = (s32)imm;
   1310	reg->u32_min_value = (u32)imm;
   1311	reg->u32_max_value = (u32)imm;
   1312}
   1313
   1314/* Mark the unknown part of a register (variable offset or scalar value) as
   1315 * known to have the value @imm.
   1316 */
   1317static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
   1318{
   1319	/* Clear id, off, and union(map_ptr, range) */
   1320	memset(((u8 *)reg) + sizeof(reg->type), 0,
   1321	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
   1322	___mark_reg_known(reg, imm);
   1323}
   1324
   1325static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
   1326{
   1327	reg->var_off = tnum_const_subreg(reg->var_off, imm);
   1328	reg->s32_min_value = (s32)imm;
   1329	reg->s32_max_value = (s32)imm;
   1330	reg->u32_min_value = (u32)imm;
   1331	reg->u32_max_value = (u32)imm;
   1332}
   1333
   1334/* Mark the 'variable offset' part of a register as zero.  This should be
   1335 * used only on registers holding a pointer type.
   1336 */
   1337static void __mark_reg_known_zero(struct bpf_reg_state *reg)
   1338{
   1339	__mark_reg_known(reg, 0);
   1340}
   1341
   1342static void __mark_reg_const_zero(struct bpf_reg_state *reg)
   1343{
   1344	__mark_reg_known(reg, 0);
   1345	reg->type = SCALAR_VALUE;
   1346}
   1347
   1348static void mark_reg_known_zero(struct bpf_verifier_env *env,
   1349				struct bpf_reg_state *regs, u32 regno)
   1350{
   1351	if (WARN_ON(regno >= MAX_BPF_REG)) {
   1352		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
   1353		/* Something bad happened, let's kill all regs */
   1354		for (regno = 0; regno < MAX_BPF_REG; regno++)
   1355			__mark_reg_not_init(env, regs + regno);
   1356		return;
   1357	}
   1358	__mark_reg_known_zero(regs + regno);
   1359}
   1360
   1361static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
   1362{
   1363	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
   1364		const struct bpf_map *map = reg->map_ptr;
   1365
   1366		if (map->inner_map_meta) {
   1367			reg->type = CONST_PTR_TO_MAP;
   1368			reg->map_ptr = map->inner_map_meta;
   1369			/* transfer reg's id which is unique for every map_lookup_elem
   1370			 * as UID of the inner map.
   1371			 */
   1372			if (map_value_has_timer(map->inner_map_meta))
   1373				reg->map_uid = reg->id;
   1374		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
   1375			reg->type = PTR_TO_XDP_SOCK;
   1376		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
   1377			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
   1378			reg->type = PTR_TO_SOCKET;
   1379		} else {
   1380			reg->type = PTR_TO_MAP_VALUE;
   1381		}
   1382		return;
   1383	}
   1384
   1385	reg->type &= ~PTR_MAYBE_NULL;
   1386}
   1387
   1388static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
   1389{
   1390	return type_is_pkt_pointer(reg->type);
   1391}
   1392
   1393static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
   1394{
   1395	return reg_is_pkt_pointer(reg) ||
   1396	       reg->type == PTR_TO_PACKET_END;
   1397}
   1398
   1399/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
   1400static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
   1401				    enum bpf_reg_type which)
   1402{
   1403	/* The register can already have a range from prior markings.
   1404	 * This is fine as long as it hasn't been advanced from its
   1405	 * origin.
   1406	 */
   1407	return reg->type == which &&
   1408	       reg->id == 0 &&
   1409	       reg->off == 0 &&
   1410	       tnum_equals_const(reg->var_off, 0);
   1411}
   1412
   1413/* Reset the min/max bounds of a register */
   1414static void __mark_reg_unbounded(struct bpf_reg_state *reg)
   1415{
   1416	reg->smin_value = S64_MIN;
   1417	reg->smax_value = S64_MAX;
   1418	reg->umin_value = 0;
   1419	reg->umax_value = U64_MAX;
   1420
   1421	reg->s32_min_value = S32_MIN;
   1422	reg->s32_max_value = S32_MAX;
   1423	reg->u32_min_value = 0;
   1424	reg->u32_max_value = U32_MAX;
   1425}
   1426
   1427static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
   1428{
   1429	reg->smin_value = S64_MIN;
   1430	reg->smax_value = S64_MAX;
   1431	reg->umin_value = 0;
   1432	reg->umax_value = U64_MAX;
   1433}
   1434
   1435static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
   1436{
   1437	reg->s32_min_value = S32_MIN;
   1438	reg->s32_max_value = S32_MAX;
   1439	reg->u32_min_value = 0;
   1440	reg->u32_max_value = U32_MAX;
   1441}
   1442
   1443static void __update_reg32_bounds(struct bpf_reg_state *reg)
   1444{
   1445	struct tnum var32_off = tnum_subreg(reg->var_off);
   1446
   1447	/* min signed is max(sign bit) | min(other bits) */
   1448	reg->s32_min_value = max_t(s32, reg->s32_min_value,
   1449			var32_off.value | (var32_off.mask & S32_MIN));
   1450	/* max signed is min(sign bit) | max(other bits) */
   1451	reg->s32_max_value = min_t(s32, reg->s32_max_value,
   1452			var32_off.value | (var32_off.mask & S32_MAX));
   1453	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
   1454	reg->u32_max_value = min(reg->u32_max_value,
   1455				 (u32)(var32_off.value | var32_off.mask));
   1456}
   1457
   1458static void __update_reg64_bounds(struct bpf_reg_state *reg)
   1459{
   1460	/* min signed is max(sign bit) | min(other bits) */
   1461	reg->smin_value = max_t(s64, reg->smin_value,
   1462				reg->var_off.value | (reg->var_off.mask & S64_MIN));
   1463	/* max signed is min(sign bit) | max(other bits) */
   1464	reg->smax_value = min_t(s64, reg->smax_value,
   1465				reg->var_off.value | (reg->var_off.mask & S64_MAX));
   1466	reg->umin_value = max(reg->umin_value, reg->var_off.value);
   1467	reg->umax_value = min(reg->umax_value,
   1468			      reg->var_off.value | reg->var_off.mask);
   1469}
   1470
   1471static void __update_reg_bounds(struct bpf_reg_state *reg)
   1472{
   1473	__update_reg32_bounds(reg);
   1474	__update_reg64_bounds(reg);
   1475}
   1476
   1477/* Uses signed min/max values to inform unsigned, and vice-versa */
   1478static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
   1479{
   1480	/* Learn sign from signed bounds.
   1481	 * If we cannot cross the sign boundary, then signed and unsigned bounds
   1482	 * are the same, so combine.  This works even in the negative case, e.g.
   1483	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
   1484	 */
   1485	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
   1486		reg->s32_min_value = reg->u32_min_value =
   1487			max_t(u32, reg->s32_min_value, reg->u32_min_value);
   1488		reg->s32_max_value = reg->u32_max_value =
   1489			min_t(u32, reg->s32_max_value, reg->u32_max_value);
   1490		return;
   1491	}
   1492	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
   1493	 * boundary, so we must be careful.
   1494	 */
   1495	if ((s32)reg->u32_max_value >= 0) {
   1496		/* Positive.  We can't learn anything from the smin, but smax
   1497		 * is positive, hence safe.
   1498		 */
   1499		reg->s32_min_value = reg->u32_min_value;
   1500		reg->s32_max_value = reg->u32_max_value =
   1501			min_t(u32, reg->s32_max_value, reg->u32_max_value);
   1502	} else if ((s32)reg->u32_min_value < 0) {
   1503		/* Negative.  We can't learn anything from the smax, but smin
   1504		 * is negative, hence safe.
   1505		 */
   1506		reg->s32_min_value = reg->u32_min_value =
   1507			max_t(u32, reg->s32_min_value, reg->u32_min_value);
   1508		reg->s32_max_value = reg->u32_max_value;
   1509	}
   1510}
   1511
   1512static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
   1513{
   1514	/* Learn sign from signed bounds.
   1515	 * If we cannot cross the sign boundary, then signed and unsigned bounds
   1516	 * are the same, so combine.  This works even in the negative case, e.g.
   1517	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
   1518	 */
   1519	if (reg->smin_value >= 0 || reg->smax_value < 0) {
   1520		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
   1521							  reg->umin_value);
   1522		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
   1523							  reg->umax_value);
   1524		return;
   1525	}
   1526	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
   1527	 * boundary, so we must be careful.
   1528	 */
   1529	if ((s64)reg->umax_value >= 0) {
   1530		/* Positive.  We can't learn anything from the smin, but smax
   1531		 * is positive, hence safe.
   1532		 */
   1533		reg->smin_value = reg->umin_value;
   1534		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
   1535							  reg->umax_value);
   1536	} else if ((s64)reg->umin_value < 0) {
   1537		/* Negative.  We can't learn anything from the smax, but smin
   1538		 * is negative, hence safe.
   1539		 */
   1540		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
   1541							  reg->umin_value);
   1542		reg->smax_value = reg->umax_value;
   1543	}
   1544}
   1545
   1546static void __reg_deduce_bounds(struct bpf_reg_state *reg)
   1547{
   1548	__reg32_deduce_bounds(reg);
   1549	__reg64_deduce_bounds(reg);
   1550}
   1551
   1552/* Attempts to improve var_off based on unsigned min/max information */
   1553static void __reg_bound_offset(struct bpf_reg_state *reg)
   1554{
   1555	struct tnum var64_off = tnum_intersect(reg->var_off,
   1556					       tnum_range(reg->umin_value,
   1557							  reg->umax_value));
   1558	struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
   1559						tnum_range(reg->u32_min_value,
   1560							   reg->u32_max_value));
   1561
   1562	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
   1563}
   1564
   1565static void reg_bounds_sync(struct bpf_reg_state *reg)
   1566{
   1567	/* We might have learned new bounds from the var_off. */
   1568	__update_reg_bounds(reg);
   1569	/* We might have learned something about the sign bit. */
   1570	__reg_deduce_bounds(reg);
   1571	/* We might have learned some bits from the bounds. */
   1572	__reg_bound_offset(reg);
   1573	/* Intersecting with the old var_off might have improved our bounds
   1574	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
   1575	 * then new var_off is (0; 0x7f...fc) which improves our umax.
   1576	 */
   1577	__update_reg_bounds(reg);
   1578}
   1579
   1580static bool __reg32_bound_s64(s32 a)
   1581{
   1582	return a >= 0 && a <= S32_MAX;
   1583}
   1584
   1585static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
   1586{
   1587	reg->umin_value = reg->u32_min_value;
   1588	reg->umax_value = reg->u32_max_value;
   1589
   1590	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
   1591	 * be positive otherwise set to worse case bounds and refine later
   1592	 * from tnum.
   1593	 */
   1594	if (__reg32_bound_s64(reg->s32_min_value) &&
   1595	    __reg32_bound_s64(reg->s32_max_value)) {
   1596		reg->smin_value = reg->s32_min_value;
   1597		reg->smax_value = reg->s32_max_value;
   1598	} else {
   1599		reg->smin_value = 0;
   1600		reg->smax_value = U32_MAX;
   1601	}
   1602}
   1603
   1604static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
   1605{
   1606	/* special case when 64-bit register has upper 32-bit register
   1607	 * zeroed. Typically happens after zext or <<32, >>32 sequence
   1608	 * allowing us to use 32-bit bounds directly,
   1609	 */
   1610	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
   1611		__reg_assign_32_into_64(reg);
   1612	} else {
   1613		/* Otherwise the best we can do is push lower 32bit known and
   1614		 * unknown bits into register (var_off set from jmp logic)
   1615		 * then learn as much as possible from the 64-bit tnum
   1616		 * known and unknown bits. The previous smin/smax bounds are
   1617		 * invalid here because of jmp32 compare so mark them unknown
   1618		 * so they do not impact tnum bounds calculation.
   1619		 */
   1620		__mark_reg64_unbounded(reg);
   1621	}
   1622	reg_bounds_sync(reg);
   1623}
   1624
   1625static bool __reg64_bound_s32(s64 a)
   1626{
   1627	return a >= S32_MIN && a <= S32_MAX;
   1628}
   1629
   1630static bool __reg64_bound_u32(u64 a)
   1631{
   1632	return a >= U32_MIN && a <= U32_MAX;
   1633}
   1634
   1635static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
   1636{
   1637	__mark_reg32_unbounded(reg);
   1638	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
   1639		reg->s32_min_value = (s32)reg->smin_value;
   1640		reg->s32_max_value = (s32)reg->smax_value;
   1641	}
   1642	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
   1643		reg->u32_min_value = (u32)reg->umin_value;
   1644		reg->u32_max_value = (u32)reg->umax_value;
   1645	}
   1646	reg_bounds_sync(reg);
   1647}
   1648
   1649/* Mark a register as having a completely unknown (scalar) value. */
   1650static void __mark_reg_unknown(const struct bpf_verifier_env *env,
   1651			       struct bpf_reg_state *reg)
   1652{
   1653	/*
   1654	 * Clear type, id, off, and union(map_ptr, range) and
   1655	 * padding between 'type' and union
   1656	 */
   1657	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
   1658	reg->type = SCALAR_VALUE;
   1659	reg->var_off = tnum_unknown;
   1660	reg->frameno = 0;
   1661	reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
   1662	__mark_reg_unbounded(reg);
   1663}
   1664
   1665static void mark_reg_unknown(struct bpf_verifier_env *env,
   1666			     struct bpf_reg_state *regs, u32 regno)
   1667{
   1668	if (WARN_ON(regno >= MAX_BPF_REG)) {
   1669		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
   1670		/* Something bad happened, let's kill all regs except FP */
   1671		for (regno = 0; regno < BPF_REG_FP; regno++)
   1672			__mark_reg_not_init(env, regs + regno);
   1673		return;
   1674	}
   1675	__mark_reg_unknown(env, regs + regno);
   1676}
   1677
   1678static void __mark_reg_not_init(const struct bpf_verifier_env *env,
   1679				struct bpf_reg_state *reg)
   1680{
   1681	__mark_reg_unknown(env, reg);
   1682	reg->type = NOT_INIT;
   1683}
   1684
   1685static void mark_reg_not_init(struct bpf_verifier_env *env,
   1686			      struct bpf_reg_state *regs, u32 regno)
   1687{
   1688	if (WARN_ON(regno >= MAX_BPF_REG)) {
   1689		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
   1690		/* Something bad happened, let's kill all regs except FP */
   1691		for (regno = 0; regno < BPF_REG_FP; regno++)
   1692			__mark_reg_not_init(env, regs + regno);
   1693		return;
   1694	}
   1695	__mark_reg_not_init(env, regs + regno);
   1696}
   1697
   1698static void mark_btf_ld_reg(struct bpf_verifier_env *env,
   1699			    struct bpf_reg_state *regs, u32 regno,
   1700			    enum bpf_reg_type reg_type,
   1701			    struct btf *btf, u32 btf_id,
   1702			    enum bpf_type_flag flag)
   1703{
   1704	if (reg_type == SCALAR_VALUE) {
   1705		mark_reg_unknown(env, regs, regno);
   1706		return;
   1707	}
   1708	mark_reg_known_zero(env, regs, regno);
   1709	regs[regno].type = PTR_TO_BTF_ID | flag;
   1710	regs[regno].btf = btf;
   1711	regs[regno].btf_id = btf_id;
   1712}
   1713
   1714#define DEF_NOT_SUBREG	(0)
   1715static void init_reg_state(struct bpf_verifier_env *env,
   1716			   struct bpf_func_state *state)
   1717{
   1718	struct bpf_reg_state *regs = state->regs;
   1719	int i;
   1720
   1721	for (i = 0; i < MAX_BPF_REG; i++) {
   1722		mark_reg_not_init(env, regs, i);
   1723		regs[i].live = REG_LIVE_NONE;
   1724		regs[i].parent = NULL;
   1725		regs[i].subreg_def = DEF_NOT_SUBREG;
   1726	}
   1727
   1728	/* frame pointer */
   1729	regs[BPF_REG_FP].type = PTR_TO_STACK;
   1730	mark_reg_known_zero(env, regs, BPF_REG_FP);
   1731	regs[BPF_REG_FP].frameno = state->frameno;
   1732}
   1733
   1734#define BPF_MAIN_FUNC (-1)
   1735static void init_func_state(struct bpf_verifier_env *env,
   1736			    struct bpf_func_state *state,
   1737			    int callsite, int frameno, int subprogno)
   1738{
   1739	state->callsite = callsite;
   1740	state->frameno = frameno;
   1741	state->subprogno = subprogno;
   1742	init_reg_state(env, state);
   1743	mark_verifier_state_scratched(env);
   1744}
   1745
   1746/* Similar to push_stack(), but for async callbacks */
   1747static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
   1748						int insn_idx, int prev_insn_idx,
   1749						int subprog)
   1750{
   1751	struct bpf_verifier_stack_elem *elem;
   1752	struct bpf_func_state *frame;
   1753
   1754	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
   1755	if (!elem)
   1756		goto err;
   1757
   1758	elem->insn_idx = insn_idx;
   1759	elem->prev_insn_idx = prev_insn_idx;
   1760	elem->next = env->head;
   1761	elem->log_pos = env->log.len_used;
   1762	env->head = elem;
   1763	env->stack_size++;
   1764	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
   1765		verbose(env,
   1766			"The sequence of %d jumps is too complex for async cb.\n",
   1767			env->stack_size);
   1768		goto err;
   1769	}
   1770	/* Unlike push_stack() do not copy_verifier_state().
   1771	 * The caller state doesn't matter.
   1772	 * This is async callback. It starts in a fresh stack.
   1773	 * Initialize it similar to do_check_common().
   1774	 */
   1775	elem->st.branches = 1;
   1776	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
   1777	if (!frame)
   1778		goto err;
   1779	init_func_state(env, frame,
   1780			BPF_MAIN_FUNC /* callsite */,
   1781			0 /* frameno within this callchain */,
   1782			subprog /* subprog number within this prog */);
   1783	elem->st.frame[0] = frame;
   1784	return &elem->st;
   1785err:
   1786	free_verifier_state(env->cur_state, true);
   1787	env->cur_state = NULL;
   1788	/* pop all elements and return */
   1789	while (!pop_stack(env, NULL, NULL, false));
   1790	return NULL;
   1791}
   1792
   1793
   1794enum reg_arg_type {
   1795	SRC_OP,		/* register is used as source operand */
   1796	DST_OP,		/* register is used as destination operand */
   1797	DST_OP_NO_MARK	/* same as above, check only, don't mark */
   1798};
   1799
   1800static int cmp_subprogs(const void *a, const void *b)
   1801{
   1802	return ((struct bpf_subprog_info *)a)->start -
   1803	       ((struct bpf_subprog_info *)b)->start;
   1804}
   1805
   1806static int find_subprog(struct bpf_verifier_env *env, int off)
   1807{
   1808	struct bpf_subprog_info *p;
   1809
   1810	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
   1811		    sizeof(env->subprog_info[0]), cmp_subprogs);
   1812	if (!p)
   1813		return -ENOENT;
   1814	return p - env->subprog_info;
   1815
   1816}
   1817
   1818static int add_subprog(struct bpf_verifier_env *env, int off)
   1819{
   1820	int insn_cnt = env->prog->len;
   1821	int ret;
   1822
   1823	if (off >= insn_cnt || off < 0) {
   1824		verbose(env, "call to invalid destination\n");
   1825		return -EINVAL;
   1826	}
   1827	ret = find_subprog(env, off);
   1828	if (ret >= 0)
   1829		return ret;
   1830	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
   1831		verbose(env, "too many subprograms\n");
   1832		return -E2BIG;
   1833	}
   1834	/* determine subprog starts. The end is one before the next starts */
   1835	env->subprog_info[env->subprog_cnt++].start = off;
   1836	sort(env->subprog_info, env->subprog_cnt,
   1837	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
   1838	return env->subprog_cnt - 1;
   1839}
   1840
   1841#define MAX_KFUNC_DESCS 256
   1842#define MAX_KFUNC_BTFS	256
   1843
   1844struct bpf_kfunc_desc {
   1845	struct btf_func_model func_model;
   1846	u32 func_id;
   1847	s32 imm;
   1848	u16 offset;
   1849};
   1850
   1851struct bpf_kfunc_btf {
   1852	struct btf *btf;
   1853	struct module *module;
   1854	u16 offset;
   1855};
   1856
   1857struct bpf_kfunc_desc_tab {
   1858	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
   1859	u32 nr_descs;
   1860};
   1861
   1862struct bpf_kfunc_btf_tab {
   1863	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
   1864	u32 nr_descs;
   1865};
   1866
   1867static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
   1868{
   1869	const struct bpf_kfunc_desc *d0 = a;
   1870	const struct bpf_kfunc_desc *d1 = b;
   1871
   1872	/* func_id is not greater than BTF_MAX_TYPE */
   1873	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
   1874}
   1875
   1876static int kfunc_btf_cmp_by_off(const void *a, const void *b)
   1877{
   1878	const struct bpf_kfunc_btf *d0 = a;
   1879	const struct bpf_kfunc_btf *d1 = b;
   1880
   1881	return d0->offset - d1->offset;
   1882}
   1883
   1884static const struct bpf_kfunc_desc *
   1885find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
   1886{
   1887	struct bpf_kfunc_desc desc = {
   1888		.func_id = func_id,
   1889		.offset = offset,
   1890	};
   1891	struct bpf_kfunc_desc_tab *tab;
   1892
   1893	tab = prog->aux->kfunc_tab;
   1894	return bsearch(&desc, tab->descs, tab->nr_descs,
   1895		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
   1896}
   1897
   1898static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
   1899					 s16 offset)
   1900{
   1901	struct bpf_kfunc_btf kf_btf = { .offset = offset };
   1902	struct bpf_kfunc_btf_tab *tab;
   1903	struct bpf_kfunc_btf *b;
   1904	struct module *mod;
   1905	struct btf *btf;
   1906	int btf_fd;
   1907
   1908	tab = env->prog->aux->kfunc_btf_tab;
   1909	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
   1910		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
   1911	if (!b) {
   1912		if (tab->nr_descs == MAX_KFUNC_BTFS) {
   1913			verbose(env, "too many different module BTFs\n");
   1914			return ERR_PTR(-E2BIG);
   1915		}
   1916
   1917		if (bpfptr_is_null(env->fd_array)) {
   1918			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
   1919			return ERR_PTR(-EPROTO);
   1920		}
   1921
   1922		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
   1923					    offset * sizeof(btf_fd),
   1924					    sizeof(btf_fd)))
   1925			return ERR_PTR(-EFAULT);
   1926
   1927		btf = btf_get_by_fd(btf_fd);
   1928		if (IS_ERR(btf)) {
   1929			verbose(env, "invalid module BTF fd specified\n");
   1930			return btf;
   1931		}
   1932
   1933		if (!btf_is_module(btf)) {
   1934			verbose(env, "BTF fd for kfunc is not a module BTF\n");
   1935			btf_put(btf);
   1936			return ERR_PTR(-EINVAL);
   1937		}
   1938
   1939		mod = btf_try_get_module(btf);
   1940		if (!mod) {
   1941			btf_put(btf);
   1942			return ERR_PTR(-ENXIO);
   1943		}
   1944
   1945		b = &tab->descs[tab->nr_descs++];
   1946		b->btf = btf;
   1947		b->module = mod;
   1948		b->offset = offset;
   1949
   1950		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
   1951		     kfunc_btf_cmp_by_off, NULL);
   1952	}
   1953	return b->btf;
   1954}
   1955
   1956void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
   1957{
   1958	if (!tab)
   1959		return;
   1960
   1961	while (tab->nr_descs--) {
   1962		module_put(tab->descs[tab->nr_descs].module);
   1963		btf_put(tab->descs[tab->nr_descs].btf);
   1964	}
   1965	kfree(tab);
   1966}
   1967
   1968static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
   1969{
   1970	if (offset) {
   1971		if (offset < 0) {
   1972			/* In the future, this can be allowed to increase limit
   1973			 * of fd index into fd_array, interpreted as u16.
   1974			 */
   1975			verbose(env, "negative offset disallowed for kernel module function call\n");
   1976			return ERR_PTR(-EINVAL);
   1977		}
   1978
   1979		return __find_kfunc_desc_btf(env, offset);
   1980	}
   1981	return btf_vmlinux ?: ERR_PTR(-ENOENT);
   1982}
   1983
   1984static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
   1985{
   1986	const struct btf_type *func, *func_proto;
   1987	struct bpf_kfunc_btf_tab *btf_tab;
   1988	struct bpf_kfunc_desc_tab *tab;
   1989	struct bpf_prog_aux *prog_aux;
   1990	struct bpf_kfunc_desc *desc;
   1991	const char *func_name;
   1992	struct btf *desc_btf;
   1993	unsigned long call_imm;
   1994	unsigned long addr;
   1995	int err;
   1996
   1997	prog_aux = env->prog->aux;
   1998	tab = prog_aux->kfunc_tab;
   1999	btf_tab = prog_aux->kfunc_btf_tab;
   2000	if (!tab) {
   2001		if (!btf_vmlinux) {
   2002			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
   2003			return -ENOTSUPP;
   2004		}
   2005
   2006		if (!env->prog->jit_requested) {
   2007			verbose(env, "JIT is required for calling kernel function\n");
   2008			return -ENOTSUPP;
   2009		}
   2010
   2011		if (!bpf_jit_supports_kfunc_call()) {
   2012			verbose(env, "JIT does not support calling kernel function\n");
   2013			return -ENOTSUPP;
   2014		}
   2015
   2016		if (!env->prog->gpl_compatible) {
   2017			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
   2018			return -EINVAL;
   2019		}
   2020
   2021		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
   2022		if (!tab)
   2023			return -ENOMEM;
   2024		prog_aux->kfunc_tab = tab;
   2025	}
   2026
   2027	/* func_id == 0 is always invalid, but instead of returning an error, be
   2028	 * conservative and wait until the code elimination pass before returning
   2029	 * error, so that invalid calls that get pruned out can be in BPF programs
   2030	 * loaded from userspace.  It is also required that offset be untouched
   2031	 * for such calls.
   2032	 */
   2033	if (!func_id && !offset)
   2034		return 0;
   2035
   2036	if (!btf_tab && offset) {
   2037		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
   2038		if (!btf_tab)
   2039			return -ENOMEM;
   2040		prog_aux->kfunc_btf_tab = btf_tab;
   2041	}
   2042
   2043	desc_btf = find_kfunc_desc_btf(env, offset);
   2044	if (IS_ERR(desc_btf)) {
   2045		verbose(env, "failed to find BTF for kernel function\n");
   2046		return PTR_ERR(desc_btf);
   2047	}
   2048
   2049	if (find_kfunc_desc(env->prog, func_id, offset))
   2050		return 0;
   2051
   2052	if (tab->nr_descs == MAX_KFUNC_DESCS) {
   2053		verbose(env, "too many different kernel function calls\n");
   2054		return -E2BIG;
   2055	}
   2056
   2057	func = btf_type_by_id(desc_btf, func_id);
   2058	if (!func || !btf_type_is_func(func)) {
   2059		verbose(env, "kernel btf_id %u is not a function\n",
   2060			func_id);
   2061		return -EINVAL;
   2062	}
   2063	func_proto = btf_type_by_id(desc_btf, func->type);
   2064	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
   2065		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
   2066			func_id);
   2067		return -EINVAL;
   2068	}
   2069
   2070	func_name = btf_name_by_offset(desc_btf, func->name_off);
   2071	addr = kallsyms_lookup_name(func_name);
   2072	if (!addr) {
   2073		verbose(env, "cannot find address for kernel function %s\n",
   2074			func_name);
   2075		return -EINVAL;
   2076	}
   2077
   2078	call_imm = BPF_CALL_IMM(addr);
   2079	/* Check whether or not the relative offset overflows desc->imm */
   2080	if ((unsigned long)(s32)call_imm != call_imm) {
   2081		verbose(env, "address of kernel function %s is out of range\n",
   2082			func_name);
   2083		return -EINVAL;
   2084	}
   2085
   2086	desc = &tab->descs[tab->nr_descs++];
   2087	desc->func_id = func_id;
   2088	desc->imm = call_imm;
   2089	desc->offset = offset;
   2090	err = btf_distill_func_proto(&env->log, desc_btf,
   2091				     func_proto, func_name,
   2092				     &desc->func_model);
   2093	if (!err)
   2094		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
   2095		     kfunc_desc_cmp_by_id_off, NULL);
   2096	return err;
   2097}
   2098
   2099static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
   2100{
   2101	const struct bpf_kfunc_desc *d0 = a;
   2102	const struct bpf_kfunc_desc *d1 = b;
   2103
   2104	if (d0->imm > d1->imm)
   2105		return 1;
   2106	else if (d0->imm < d1->imm)
   2107		return -1;
   2108	return 0;
   2109}
   2110
   2111static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
   2112{
   2113	struct bpf_kfunc_desc_tab *tab;
   2114
   2115	tab = prog->aux->kfunc_tab;
   2116	if (!tab)
   2117		return;
   2118
   2119	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
   2120	     kfunc_desc_cmp_by_imm, NULL);
   2121}
   2122
   2123bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
   2124{
   2125	return !!prog->aux->kfunc_tab;
   2126}
   2127
   2128const struct btf_func_model *
   2129bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
   2130			 const struct bpf_insn *insn)
   2131{
   2132	const struct bpf_kfunc_desc desc = {
   2133		.imm = insn->imm,
   2134	};
   2135	const struct bpf_kfunc_desc *res;
   2136	struct bpf_kfunc_desc_tab *tab;
   2137
   2138	tab = prog->aux->kfunc_tab;
   2139	res = bsearch(&desc, tab->descs, tab->nr_descs,
   2140		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
   2141
   2142	return res ? &res->func_model : NULL;
   2143}
   2144
   2145static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
   2146{
   2147	struct bpf_subprog_info *subprog = env->subprog_info;
   2148	struct bpf_insn *insn = env->prog->insnsi;
   2149	int i, ret, insn_cnt = env->prog->len;
   2150
   2151	/* Add entry function. */
   2152	ret = add_subprog(env, 0);
   2153	if (ret)
   2154		return ret;
   2155
   2156	for (i = 0; i < insn_cnt; i++, insn++) {
   2157		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
   2158		    !bpf_pseudo_kfunc_call(insn))
   2159			continue;
   2160
   2161		if (!env->bpf_capable) {
   2162			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
   2163			return -EPERM;
   2164		}
   2165
   2166		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
   2167			ret = add_subprog(env, i + insn->imm + 1);
   2168		else
   2169			ret = add_kfunc_call(env, insn->imm, insn->off);
   2170
   2171		if (ret < 0)
   2172			return ret;
   2173	}
   2174
   2175	/* Add a fake 'exit' subprog which could simplify subprog iteration
   2176	 * logic. 'subprog_cnt' should not be increased.
   2177	 */
   2178	subprog[env->subprog_cnt].start = insn_cnt;
   2179
   2180	if (env->log.level & BPF_LOG_LEVEL2)
   2181		for (i = 0; i < env->subprog_cnt; i++)
   2182			verbose(env, "func#%d @%d\n", i, subprog[i].start);
   2183
   2184	return 0;
   2185}
   2186
   2187static int check_subprogs(struct bpf_verifier_env *env)
   2188{
   2189	int i, subprog_start, subprog_end, off, cur_subprog = 0;
   2190	struct bpf_subprog_info *subprog = env->subprog_info;
   2191	struct bpf_insn *insn = env->prog->insnsi;
   2192	int insn_cnt = env->prog->len;
   2193
   2194	/* now check that all jumps are within the same subprog */
   2195	subprog_start = subprog[cur_subprog].start;
   2196	subprog_end = subprog[cur_subprog + 1].start;
   2197	for (i = 0; i < insn_cnt; i++) {
   2198		u8 code = insn[i].code;
   2199
   2200		if (code == (BPF_JMP | BPF_CALL) &&
   2201		    insn[i].imm == BPF_FUNC_tail_call &&
   2202		    insn[i].src_reg != BPF_PSEUDO_CALL)
   2203			subprog[cur_subprog].has_tail_call = true;
   2204		if (BPF_CLASS(code) == BPF_LD &&
   2205		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
   2206			subprog[cur_subprog].has_ld_abs = true;
   2207		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
   2208			goto next;
   2209		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
   2210			goto next;
   2211		off = i + insn[i].off + 1;
   2212		if (off < subprog_start || off >= subprog_end) {
   2213			verbose(env, "jump out of range from insn %d to %d\n", i, off);
   2214			return -EINVAL;
   2215		}
   2216next:
   2217		if (i == subprog_end - 1) {
   2218			/* to avoid fall-through from one subprog into another
   2219			 * the last insn of the subprog should be either exit
   2220			 * or unconditional jump back
   2221			 */
   2222			if (code != (BPF_JMP | BPF_EXIT) &&
   2223			    code != (BPF_JMP | BPF_JA)) {
   2224				verbose(env, "last insn is not an exit or jmp\n");
   2225				return -EINVAL;
   2226			}
   2227			subprog_start = subprog_end;
   2228			cur_subprog++;
   2229			if (cur_subprog < env->subprog_cnt)
   2230				subprog_end = subprog[cur_subprog + 1].start;
   2231		}
   2232	}
   2233	return 0;
   2234}
   2235
   2236/* Parentage chain of this register (or stack slot) should take care of all
   2237 * issues like callee-saved registers, stack slot allocation time, etc.
   2238 */
   2239static int mark_reg_read(struct bpf_verifier_env *env,
   2240			 const struct bpf_reg_state *state,
   2241			 struct bpf_reg_state *parent, u8 flag)
   2242{
   2243	bool writes = parent == state->parent; /* Observe write marks */
   2244	int cnt = 0;
   2245
   2246	while (parent) {
   2247		/* if read wasn't screened by an earlier write ... */
   2248		if (writes && state->live & REG_LIVE_WRITTEN)
   2249			break;
   2250		if (parent->live & REG_LIVE_DONE) {
   2251			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
   2252				reg_type_str(env, parent->type),
   2253				parent->var_off.value, parent->off);
   2254			return -EFAULT;
   2255		}
   2256		/* The first condition is more likely to be true than the
   2257		 * second, checked it first.
   2258		 */
   2259		if ((parent->live & REG_LIVE_READ) == flag ||
   2260		    parent->live & REG_LIVE_READ64)
   2261			/* The parentage chain never changes and
   2262			 * this parent was already marked as LIVE_READ.
   2263			 * There is no need to keep walking the chain again and
   2264			 * keep re-marking all parents as LIVE_READ.
   2265			 * This case happens when the same register is read
   2266			 * multiple times without writes into it in-between.
   2267			 * Also, if parent has the stronger REG_LIVE_READ64 set,
   2268			 * then no need to set the weak REG_LIVE_READ32.
   2269			 */
   2270			break;
   2271		/* ... then we depend on parent's value */
   2272		parent->live |= flag;
   2273		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
   2274		if (flag == REG_LIVE_READ64)
   2275			parent->live &= ~REG_LIVE_READ32;
   2276		state = parent;
   2277		parent = state->parent;
   2278		writes = true;
   2279		cnt++;
   2280	}
   2281
   2282	if (env->longest_mark_read_walk < cnt)
   2283		env->longest_mark_read_walk = cnt;
   2284	return 0;
   2285}
   2286
   2287/* This function is supposed to be used by the following 32-bit optimization
   2288 * code only. It returns TRUE if the source or destination register operates
   2289 * on 64-bit, otherwise return FALSE.
   2290 */
   2291static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
   2292		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
   2293{
   2294	u8 code, class, op;
   2295
   2296	code = insn->code;
   2297	class = BPF_CLASS(code);
   2298	op = BPF_OP(code);
   2299	if (class == BPF_JMP) {
   2300		/* BPF_EXIT for "main" will reach here. Return TRUE
   2301		 * conservatively.
   2302		 */
   2303		if (op == BPF_EXIT)
   2304			return true;
   2305		if (op == BPF_CALL) {
   2306			/* BPF to BPF call will reach here because of marking
   2307			 * caller saved clobber with DST_OP_NO_MARK for which we
   2308			 * don't care the register def because they are anyway
   2309			 * marked as NOT_INIT already.
   2310			 */
   2311			if (insn->src_reg == BPF_PSEUDO_CALL)
   2312				return false;
   2313			/* Helper call will reach here because of arg type
   2314			 * check, conservatively return TRUE.
   2315			 */
   2316			if (t == SRC_OP)
   2317				return true;
   2318
   2319			return false;
   2320		}
   2321	}
   2322
   2323	if (class == BPF_ALU64 || class == BPF_JMP ||
   2324	    /* BPF_END always use BPF_ALU class. */
   2325	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
   2326		return true;
   2327
   2328	if (class == BPF_ALU || class == BPF_JMP32)
   2329		return false;
   2330
   2331	if (class == BPF_LDX) {
   2332		if (t != SRC_OP)
   2333			return BPF_SIZE(code) == BPF_DW;
   2334		/* LDX source must be ptr. */
   2335		return true;
   2336	}
   2337
   2338	if (class == BPF_STX) {
   2339		/* BPF_STX (including atomic variants) has multiple source
   2340		 * operands, one of which is a ptr. Check whether the caller is
   2341		 * asking about it.
   2342		 */
   2343		if (t == SRC_OP && reg->type != SCALAR_VALUE)
   2344			return true;
   2345		return BPF_SIZE(code) == BPF_DW;
   2346	}
   2347
   2348	if (class == BPF_LD) {
   2349		u8 mode = BPF_MODE(code);
   2350
   2351		/* LD_IMM64 */
   2352		if (mode == BPF_IMM)
   2353			return true;
   2354
   2355		/* Both LD_IND and LD_ABS return 32-bit data. */
   2356		if (t != SRC_OP)
   2357			return  false;
   2358
   2359		/* Implicit ctx ptr. */
   2360		if (regno == BPF_REG_6)
   2361			return true;
   2362
   2363		/* Explicit source could be any width. */
   2364		return true;
   2365	}
   2366
   2367	if (class == BPF_ST)
   2368		/* The only source register for BPF_ST is a ptr. */
   2369		return true;
   2370
   2371	/* Conservatively return true at default. */
   2372	return true;
   2373}
   2374
   2375/* Return the regno defined by the insn, or -1. */
   2376static int insn_def_regno(const struct bpf_insn *insn)
   2377{
   2378	switch (BPF_CLASS(insn->code)) {
   2379	case BPF_JMP:
   2380	case BPF_JMP32:
   2381	case BPF_ST:
   2382		return -1;
   2383	case BPF_STX:
   2384		if (BPF_MODE(insn->code) == BPF_ATOMIC &&
   2385		    (insn->imm & BPF_FETCH)) {
   2386			if (insn->imm == BPF_CMPXCHG)
   2387				return BPF_REG_0;
   2388			else
   2389				return insn->src_reg;
   2390		} else {
   2391			return -1;
   2392		}
   2393	default:
   2394		return insn->dst_reg;
   2395	}
   2396}
   2397
   2398/* Return TRUE if INSN has defined any 32-bit value explicitly. */
   2399static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
   2400{
   2401	int dst_reg = insn_def_regno(insn);
   2402
   2403	if (dst_reg == -1)
   2404		return false;
   2405
   2406	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
   2407}
   2408
   2409static void mark_insn_zext(struct bpf_verifier_env *env,
   2410			   struct bpf_reg_state *reg)
   2411{
   2412	s32 def_idx = reg->subreg_def;
   2413
   2414	if (def_idx == DEF_NOT_SUBREG)
   2415		return;
   2416
   2417	env->insn_aux_data[def_idx - 1].zext_dst = true;
   2418	/* The dst will be zero extended, so won't be sub-register anymore. */
   2419	reg->subreg_def = DEF_NOT_SUBREG;
   2420}
   2421
   2422static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
   2423			 enum reg_arg_type t)
   2424{
   2425	struct bpf_verifier_state *vstate = env->cur_state;
   2426	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   2427	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
   2428	struct bpf_reg_state *reg, *regs = state->regs;
   2429	bool rw64;
   2430
   2431	if (regno >= MAX_BPF_REG) {
   2432		verbose(env, "R%d is invalid\n", regno);
   2433		return -EINVAL;
   2434	}
   2435
   2436	mark_reg_scratched(env, regno);
   2437
   2438	reg = &regs[regno];
   2439	rw64 = is_reg64(env, insn, regno, reg, t);
   2440	if (t == SRC_OP) {
   2441		/* check whether register used as source operand can be read */
   2442		if (reg->type == NOT_INIT) {
   2443			verbose(env, "R%d !read_ok\n", regno);
   2444			return -EACCES;
   2445		}
   2446		/* We don't need to worry about FP liveness because it's read-only */
   2447		if (regno == BPF_REG_FP)
   2448			return 0;
   2449
   2450		if (rw64)
   2451			mark_insn_zext(env, reg);
   2452
   2453		return mark_reg_read(env, reg, reg->parent,
   2454				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
   2455	} else {
   2456		/* check whether register used as dest operand can be written to */
   2457		if (regno == BPF_REG_FP) {
   2458			verbose(env, "frame pointer is read only\n");
   2459			return -EACCES;
   2460		}
   2461		reg->live |= REG_LIVE_WRITTEN;
   2462		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
   2463		if (t == DST_OP)
   2464			mark_reg_unknown(env, regs, regno);
   2465	}
   2466	return 0;
   2467}
   2468
   2469/* for any branch, call, exit record the history of jmps in the given state */
   2470static int push_jmp_history(struct bpf_verifier_env *env,
   2471			    struct bpf_verifier_state *cur)
   2472{
   2473	u32 cnt = cur->jmp_history_cnt;
   2474	struct bpf_idx_pair *p;
   2475
   2476	cnt++;
   2477	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
   2478	if (!p)
   2479		return -ENOMEM;
   2480	p[cnt - 1].idx = env->insn_idx;
   2481	p[cnt - 1].prev_idx = env->prev_insn_idx;
   2482	cur->jmp_history = p;
   2483	cur->jmp_history_cnt = cnt;
   2484	return 0;
   2485}
   2486
   2487/* Backtrack one insn at a time. If idx is not at the top of recorded
   2488 * history then previous instruction came from straight line execution.
   2489 */
   2490static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
   2491			     u32 *history)
   2492{
   2493	u32 cnt = *history;
   2494
   2495	if (cnt && st->jmp_history[cnt - 1].idx == i) {
   2496		i = st->jmp_history[cnt - 1].prev_idx;
   2497		(*history)--;
   2498	} else {
   2499		i--;
   2500	}
   2501	return i;
   2502}
   2503
   2504static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
   2505{
   2506	const struct btf_type *func;
   2507	struct btf *desc_btf;
   2508
   2509	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
   2510		return NULL;
   2511
   2512	desc_btf = find_kfunc_desc_btf(data, insn->off);
   2513	if (IS_ERR(desc_btf))
   2514		return "<error>";
   2515
   2516	func = btf_type_by_id(desc_btf, insn->imm);
   2517	return btf_name_by_offset(desc_btf, func->name_off);
   2518}
   2519
   2520/* For given verifier state backtrack_insn() is called from the last insn to
   2521 * the first insn. Its purpose is to compute a bitmask of registers and
   2522 * stack slots that needs precision in the parent verifier state.
   2523 */
   2524static int backtrack_insn(struct bpf_verifier_env *env, int idx,
   2525			  u32 *reg_mask, u64 *stack_mask)
   2526{
   2527	const struct bpf_insn_cbs cbs = {
   2528		.cb_call	= disasm_kfunc_name,
   2529		.cb_print	= verbose,
   2530		.private_data	= env,
   2531	};
   2532	struct bpf_insn *insn = env->prog->insnsi + idx;
   2533	u8 class = BPF_CLASS(insn->code);
   2534	u8 opcode = BPF_OP(insn->code);
   2535	u8 mode = BPF_MODE(insn->code);
   2536	u32 dreg = 1u << insn->dst_reg;
   2537	u32 sreg = 1u << insn->src_reg;
   2538	u32 spi;
   2539
   2540	if (insn->code == 0)
   2541		return 0;
   2542	if (env->log.level & BPF_LOG_LEVEL2) {
   2543		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
   2544		verbose(env, "%d: ", idx);
   2545		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
   2546	}
   2547
   2548	if (class == BPF_ALU || class == BPF_ALU64) {
   2549		if (!(*reg_mask & dreg))
   2550			return 0;
   2551		if (opcode == BPF_MOV) {
   2552			if (BPF_SRC(insn->code) == BPF_X) {
   2553				/* dreg = sreg
   2554				 * dreg needs precision after this insn
   2555				 * sreg needs precision before this insn
   2556				 */
   2557				*reg_mask &= ~dreg;
   2558				*reg_mask |= sreg;
   2559			} else {
   2560				/* dreg = K
   2561				 * dreg needs precision after this insn.
   2562				 * Corresponding register is already marked
   2563				 * as precise=true in this verifier state.
   2564				 * No further markings in parent are necessary
   2565				 */
   2566				*reg_mask &= ~dreg;
   2567			}
   2568		} else {
   2569			if (BPF_SRC(insn->code) == BPF_X) {
   2570				/* dreg += sreg
   2571				 * both dreg and sreg need precision
   2572				 * before this insn
   2573				 */
   2574				*reg_mask |= sreg;
   2575			} /* else dreg += K
   2576			   * dreg still needs precision before this insn
   2577			   */
   2578		}
   2579	} else if (class == BPF_LDX) {
   2580		if (!(*reg_mask & dreg))
   2581			return 0;
   2582		*reg_mask &= ~dreg;
   2583
   2584		/* scalars can only be spilled into stack w/o losing precision.
   2585		 * Load from any other memory can be zero extended.
   2586		 * The desire to keep that precision is already indicated
   2587		 * by 'precise' mark in corresponding register of this state.
   2588		 * No further tracking necessary.
   2589		 */
   2590		if (insn->src_reg != BPF_REG_FP)
   2591			return 0;
   2592
   2593		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
   2594		 * that [fp - off] slot contains scalar that needs to be
   2595		 * tracked with precision
   2596		 */
   2597		spi = (-insn->off - 1) / BPF_REG_SIZE;
   2598		if (spi >= 64) {
   2599			verbose(env, "BUG spi %d\n", spi);
   2600			WARN_ONCE(1, "verifier backtracking bug");
   2601			return -EFAULT;
   2602		}
   2603		*stack_mask |= 1ull << spi;
   2604	} else if (class == BPF_STX || class == BPF_ST) {
   2605		if (*reg_mask & dreg)
   2606			/* stx & st shouldn't be using _scalar_ dst_reg
   2607			 * to access memory. It means backtracking
   2608			 * encountered a case of pointer subtraction.
   2609			 */
   2610			return -ENOTSUPP;
   2611		/* scalars can only be spilled into stack */
   2612		if (insn->dst_reg != BPF_REG_FP)
   2613			return 0;
   2614		spi = (-insn->off - 1) / BPF_REG_SIZE;
   2615		if (spi >= 64) {
   2616			verbose(env, "BUG spi %d\n", spi);
   2617			WARN_ONCE(1, "verifier backtracking bug");
   2618			return -EFAULT;
   2619		}
   2620		if (!(*stack_mask & (1ull << spi)))
   2621			return 0;
   2622		*stack_mask &= ~(1ull << spi);
   2623		if (class == BPF_STX)
   2624			*reg_mask |= sreg;
   2625	} else if (class == BPF_JMP || class == BPF_JMP32) {
   2626		if (opcode == BPF_CALL) {
   2627			if (insn->src_reg == BPF_PSEUDO_CALL)
   2628				return -ENOTSUPP;
   2629			/* regular helper call sets R0 */
   2630			*reg_mask &= ~1;
   2631			if (*reg_mask & 0x3f) {
   2632				/* if backtracing was looking for registers R1-R5
   2633				 * they should have been found already.
   2634				 */
   2635				verbose(env, "BUG regs %x\n", *reg_mask);
   2636				WARN_ONCE(1, "verifier backtracking bug");
   2637				return -EFAULT;
   2638			}
   2639		} else if (opcode == BPF_EXIT) {
   2640			return -ENOTSUPP;
   2641		}
   2642	} else if (class == BPF_LD) {
   2643		if (!(*reg_mask & dreg))
   2644			return 0;
   2645		*reg_mask &= ~dreg;
   2646		/* It's ld_imm64 or ld_abs or ld_ind.
   2647		 * For ld_imm64 no further tracking of precision
   2648		 * into parent is necessary
   2649		 */
   2650		if (mode == BPF_IND || mode == BPF_ABS)
   2651			/* to be analyzed */
   2652			return -ENOTSUPP;
   2653	}
   2654	return 0;
   2655}
   2656
   2657/* the scalar precision tracking algorithm:
   2658 * . at the start all registers have precise=false.
   2659 * . scalar ranges are tracked as normal through alu and jmp insns.
   2660 * . once precise value of the scalar register is used in:
   2661 *   .  ptr + scalar alu
   2662 *   . if (scalar cond K|scalar)
   2663 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
   2664 *   backtrack through the verifier states and mark all registers and
   2665 *   stack slots with spilled constants that these scalar regisers
   2666 *   should be precise.
   2667 * . during state pruning two registers (or spilled stack slots)
   2668 *   are equivalent if both are not precise.
   2669 *
   2670 * Note the verifier cannot simply walk register parentage chain,
   2671 * since many different registers and stack slots could have been
   2672 * used to compute single precise scalar.
   2673 *
   2674 * The approach of starting with precise=true for all registers and then
   2675 * backtrack to mark a register as not precise when the verifier detects
   2676 * that program doesn't care about specific value (e.g., when helper
   2677 * takes register as ARG_ANYTHING parameter) is not safe.
   2678 *
   2679 * It's ok to walk single parentage chain of the verifier states.
   2680 * It's possible that this backtracking will go all the way till 1st insn.
   2681 * All other branches will be explored for needing precision later.
   2682 *
   2683 * The backtracking needs to deal with cases like:
   2684 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
   2685 * r9 -= r8
   2686 * r5 = r9
   2687 * if r5 > 0x79f goto pc+7
   2688 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
   2689 * r5 += 1
   2690 * ...
   2691 * call bpf_perf_event_output#25
   2692 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
   2693 *
   2694 * and this case:
   2695 * r6 = 1
   2696 * call foo // uses callee's r6 inside to compute r0
   2697 * r0 += r6
   2698 * if r0 == 0 goto
   2699 *
   2700 * to track above reg_mask/stack_mask needs to be independent for each frame.
   2701 *
   2702 * Also if parent's curframe > frame where backtracking started,
   2703 * the verifier need to mark registers in both frames, otherwise callees
   2704 * may incorrectly prune callers. This is similar to
   2705 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
   2706 *
   2707 * For now backtracking falls back into conservative marking.
   2708 */
   2709static void mark_all_scalars_precise(struct bpf_verifier_env *env,
   2710				     struct bpf_verifier_state *st)
   2711{
   2712	struct bpf_func_state *func;
   2713	struct bpf_reg_state *reg;
   2714	int i, j;
   2715
   2716	/* big hammer: mark all scalars precise in this path.
   2717	 * pop_stack may still get !precise scalars.
   2718	 */
   2719	for (; st; st = st->parent)
   2720		for (i = 0; i <= st->curframe; i++) {
   2721			func = st->frame[i];
   2722			for (j = 0; j < BPF_REG_FP; j++) {
   2723				reg = &func->regs[j];
   2724				if (reg->type != SCALAR_VALUE)
   2725					continue;
   2726				reg->precise = true;
   2727			}
   2728			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
   2729				if (!is_spilled_reg(&func->stack[j]))
   2730					continue;
   2731				reg = &func->stack[j].spilled_ptr;
   2732				if (reg->type != SCALAR_VALUE)
   2733					continue;
   2734				reg->precise = true;
   2735			}
   2736		}
   2737}
   2738
   2739static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
   2740				  int spi)
   2741{
   2742	struct bpf_verifier_state *st = env->cur_state;
   2743	int first_idx = st->first_insn_idx;
   2744	int last_idx = env->insn_idx;
   2745	struct bpf_func_state *func;
   2746	struct bpf_reg_state *reg;
   2747	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
   2748	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
   2749	bool skip_first = true;
   2750	bool new_marks = false;
   2751	int i, err;
   2752
   2753	if (!env->bpf_capable)
   2754		return 0;
   2755
   2756	func = st->frame[st->curframe];
   2757	if (regno >= 0) {
   2758		reg = &func->regs[regno];
   2759		if (reg->type != SCALAR_VALUE) {
   2760			WARN_ONCE(1, "backtracing misuse");
   2761			return -EFAULT;
   2762		}
   2763		if (!reg->precise)
   2764			new_marks = true;
   2765		else
   2766			reg_mask = 0;
   2767		reg->precise = true;
   2768	}
   2769
   2770	while (spi >= 0) {
   2771		if (!is_spilled_reg(&func->stack[spi])) {
   2772			stack_mask = 0;
   2773			break;
   2774		}
   2775		reg = &func->stack[spi].spilled_ptr;
   2776		if (reg->type != SCALAR_VALUE) {
   2777			stack_mask = 0;
   2778			break;
   2779		}
   2780		if (!reg->precise)
   2781			new_marks = true;
   2782		else
   2783			stack_mask = 0;
   2784		reg->precise = true;
   2785		break;
   2786	}
   2787
   2788	if (!new_marks)
   2789		return 0;
   2790	if (!reg_mask && !stack_mask)
   2791		return 0;
   2792	for (;;) {
   2793		DECLARE_BITMAP(mask, 64);
   2794		u32 history = st->jmp_history_cnt;
   2795
   2796		if (env->log.level & BPF_LOG_LEVEL2)
   2797			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
   2798		for (i = last_idx;;) {
   2799			if (skip_first) {
   2800				err = 0;
   2801				skip_first = false;
   2802			} else {
   2803				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
   2804			}
   2805			if (err == -ENOTSUPP) {
   2806				mark_all_scalars_precise(env, st);
   2807				return 0;
   2808			} else if (err) {
   2809				return err;
   2810			}
   2811			if (!reg_mask && !stack_mask)
   2812				/* Found assignment(s) into tracked register in this state.
   2813				 * Since this state is already marked, just return.
   2814				 * Nothing to be tracked further in the parent state.
   2815				 */
   2816				return 0;
   2817			if (i == first_idx)
   2818				break;
   2819			i = get_prev_insn_idx(st, i, &history);
   2820			if (i >= env->prog->len) {
   2821				/* This can happen if backtracking reached insn 0
   2822				 * and there are still reg_mask or stack_mask
   2823				 * to backtrack.
   2824				 * It means the backtracking missed the spot where
   2825				 * particular register was initialized with a constant.
   2826				 */
   2827				verbose(env, "BUG backtracking idx %d\n", i);
   2828				WARN_ONCE(1, "verifier backtracking bug");
   2829				return -EFAULT;
   2830			}
   2831		}
   2832		st = st->parent;
   2833		if (!st)
   2834			break;
   2835
   2836		new_marks = false;
   2837		func = st->frame[st->curframe];
   2838		bitmap_from_u64(mask, reg_mask);
   2839		for_each_set_bit(i, mask, 32) {
   2840			reg = &func->regs[i];
   2841			if (reg->type != SCALAR_VALUE) {
   2842				reg_mask &= ~(1u << i);
   2843				continue;
   2844			}
   2845			if (!reg->precise)
   2846				new_marks = true;
   2847			reg->precise = true;
   2848		}
   2849
   2850		bitmap_from_u64(mask, stack_mask);
   2851		for_each_set_bit(i, mask, 64) {
   2852			if (i >= func->allocated_stack / BPF_REG_SIZE) {
   2853				/* the sequence of instructions:
   2854				 * 2: (bf) r3 = r10
   2855				 * 3: (7b) *(u64 *)(r3 -8) = r0
   2856				 * 4: (79) r4 = *(u64 *)(r10 -8)
   2857				 * doesn't contain jmps. It's backtracked
   2858				 * as a single block.
   2859				 * During backtracking insn 3 is not recognized as
   2860				 * stack access, so at the end of backtracking
   2861				 * stack slot fp-8 is still marked in stack_mask.
   2862				 * However the parent state may not have accessed
   2863				 * fp-8 and it's "unallocated" stack space.
   2864				 * In such case fallback to conservative.
   2865				 */
   2866				mark_all_scalars_precise(env, st);
   2867				return 0;
   2868			}
   2869
   2870			if (!is_spilled_reg(&func->stack[i])) {
   2871				stack_mask &= ~(1ull << i);
   2872				continue;
   2873			}
   2874			reg = &func->stack[i].spilled_ptr;
   2875			if (reg->type != SCALAR_VALUE) {
   2876				stack_mask &= ~(1ull << i);
   2877				continue;
   2878			}
   2879			if (!reg->precise)
   2880				new_marks = true;
   2881			reg->precise = true;
   2882		}
   2883		if (env->log.level & BPF_LOG_LEVEL2) {
   2884			verbose(env, "parent %s regs=%x stack=%llx marks:",
   2885				new_marks ? "didn't have" : "already had",
   2886				reg_mask, stack_mask);
   2887			print_verifier_state(env, func, true);
   2888		}
   2889
   2890		if (!reg_mask && !stack_mask)
   2891			break;
   2892		if (!new_marks)
   2893			break;
   2894
   2895		last_idx = st->last_insn_idx;
   2896		first_idx = st->first_insn_idx;
   2897	}
   2898	return 0;
   2899}
   2900
   2901static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
   2902{
   2903	return __mark_chain_precision(env, regno, -1);
   2904}
   2905
   2906static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
   2907{
   2908	return __mark_chain_precision(env, -1, spi);
   2909}
   2910
   2911static bool is_spillable_regtype(enum bpf_reg_type type)
   2912{
   2913	switch (base_type(type)) {
   2914	case PTR_TO_MAP_VALUE:
   2915	case PTR_TO_STACK:
   2916	case PTR_TO_CTX:
   2917	case PTR_TO_PACKET:
   2918	case PTR_TO_PACKET_META:
   2919	case PTR_TO_PACKET_END:
   2920	case PTR_TO_FLOW_KEYS:
   2921	case CONST_PTR_TO_MAP:
   2922	case PTR_TO_SOCKET:
   2923	case PTR_TO_SOCK_COMMON:
   2924	case PTR_TO_TCP_SOCK:
   2925	case PTR_TO_XDP_SOCK:
   2926	case PTR_TO_BTF_ID:
   2927	case PTR_TO_BUF:
   2928	case PTR_TO_MEM:
   2929	case PTR_TO_FUNC:
   2930	case PTR_TO_MAP_KEY:
   2931		return true;
   2932	default:
   2933		return false;
   2934	}
   2935}
   2936
   2937/* Does this register contain a constant zero? */
   2938static bool register_is_null(struct bpf_reg_state *reg)
   2939{
   2940	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
   2941}
   2942
   2943static bool register_is_const(struct bpf_reg_state *reg)
   2944{
   2945	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
   2946}
   2947
   2948static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
   2949{
   2950	return tnum_is_unknown(reg->var_off) &&
   2951	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
   2952	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
   2953	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
   2954	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
   2955}
   2956
   2957static bool register_is_bounded(struct bpf_reg_state *reg)
   2958{
   2959	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
   2960}
   2961
   2962static bool __is_pointer_value(bool allow_ptr_leaks,
   2963			       const struct bpf_reg_state *reg)
   2964{
   2965	if (allow_ptr_leaks)
   2966		return false;
   2967
   2968	return reg->type != SCALAR_VALUE;
   2969}
   2970
   2971static void save_register_state(struct bpf_func_state *state,
   2972				int spi, struct bpf_reg_state *reg,
   2973				int size)
   2974{
   2975	int i;
   2976
   2977	state->stack[spi].spilled_ptr = *reg;
   2978	if (size == BPF_REG_SIZE)
   2979		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
   2980
   2981	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
   2982		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
   2983
   2984	/* size < 8 bytes spill */
   2985	for (; i; i--)
   2986		scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
   2987}
   2988
   2989/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
   2990 * stack boundary and alignment are checked in check_mem_access()
   2991 */
   2992static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
   2993				       /* stack frame we're writing to */
   2994				       struct bpf_func_state *state,
   2995				       int off, int size, int value_regno,
   2996				       int insn_idx)
   2997{
   2998	struct bpf_func_state *cur; /* state of the current function */
   2999	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
   3000	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
   3001	struct bpf_reg_state *reg = NULL;
   3002
   3003	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
   3004	if (err)
   3005		return err;
   3006	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
   3007	 * so it's aligned access and [off, off + size) are within stack limits
   3008	 */
   3009	if (!env->allow_ptr_leaks &&
   3010	    state->stack[spi].slot_type[0] == STACK_SPILL &&
   3011	    size != BPF_REG_SIZE) {
   3012		verbose(env, "attempt to corrupt spilled pointer on stack\n");
   3013		return -EACCES;
   3014	}
   3015
   3016	cur = env->cur_state->frame[env->cur_state->curframe];
   3017	if (value_regno >= 0)
   3018		reg = &cur->regs[value_regno];
   3019	if (!env->bypass_spec_v4) {
   3020		bool sanitize = reg && is_spillable_regtype(reg->type);
   3021
   3022		for (i = 0; i < size; i++) {
   3023			if (state->stack[spi].slot_type[i] == STACK_INVALID) {
   3024				sanitize = true;
   3025				break;
   3026			}
   3027		}
   3028
   3029		if (sanitize)
   3030			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
   3031	}
   3032
   3033	mark_stack_slot_scratched(env, spi);
   3034	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
   3035	    !register_is_null(reg) && env->bpf_capable) {
   3036		if (dst_reg != BPF_REG_FP) {
   3037			/* The backtracking logic can only recognize explicit
   3038			 * stack slot address like [fp - 8]. Other spill of
   3039			 * scalar via different register has to be conservative.
   3040			 * Backtrack from here and mark all registers as precise
   3041			 * that contributed into 'reg' being a constant.
   3042			 */
   3043			err = mark_chain_precision(env, value_regno);
   3044			if (err)
   3045				return err;
   3046		}
   3047		save_register_state(state, spi, reg, size);
   3048	} else if (reg && is_spillable_regtype(reg->type)) {
   3049		/* register containing pointer is being spilled into stack */
   3050		if (size != BPF_REG_SIZE) {
   3051			verbose_linfo(env, insn_idx, "; ");
   3052			verbose(env, "invalid size of register spill\n");
   3053			return -EACCES;
   3054		}
   3055		if (state != cur && reg->type == PTR_TO_STACK) {
   3056			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
   3057			return -EINVAL;
   3058		}
   3059		save_register_state(state, spi, reg, size);
   3060	} else {
   3061		u8 type = STACK_MISC;
   3062
   3063		/* regular write of data into stack destroys any spilled ptr */
   3064		state->stack[spi].spilled_ptr.type = NOT_INIT;
   3065		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
   3066		if (is_spilled_reg(&state->stack[spi]))
   3067			for (i = 0; i < BPF_REG_SIZE; i++)
   3068				scrub_spilled_slot(&state->stack[spi].slot_type[i]);
   3069
   3070		/* only mark the slot as written if all 8 bytes were written
   3071		 * otherwise read propagation may incorrectly stop too soon
   3072		 * when stack slots are partially written.
   3073		 * This heuristic means that read propagation will be
   3074		 * conservative, since it will add reg_live_read marks
   3075		 * to stack slots all the way to first state when programs
   3076		 * writes+reads less than 8 bytes
   3077		 */
   3078		if (size == BPF_REG_SIZE)
   3079			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
   3080
   3081		/* when we zero initialize stack slots mark them as such */
   3082		if (reg && register_is_null(reg)) {
   3083			/* backtracking doesn't work for STACK_ZERO yet. */
   3084			err = mark_chain_precision(env, value_regno);
   3085			if (err)
   3086				return err;
   3087			type = STACK_ZERO;
   3088		}
   3089
   3090		/* Mark slots affected by this stack write. */
   3091		for (i = 0; i < size; i++)
   3092			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
   3093				type;
   3094	}
   3095	return 0;
   3096}
   3097
   3098/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
   3099 * known to contain a variable offset.
   3100 * This function checks whether the write is permitted and conservatively
   3101 * tracks the effects of the write, considering that each stack slot in the
   3102 * dynamic range is potentially written to.
   3103 *
   3104 * 'off' includes 'regno->off'.
   3105 * 'value_regno' can be -1, meaning that an unknown value is being written to
   3106 * the stack.
   3107 *
   3108 * Spilled pointers in range are not marked as written because we don't know
   3109 * what's going to be actually written. This means that read propagation for
   3110 * future reads cannot be terminated by this write.
   3111 *
   3112 * For privileged programs, uninitialized stack slots are considered
   3113 * initialized by this write (even though we don't know exactly what offsets
   3114 * are going to be written to). The idea is that we don't want the verifier to
   3115 * reject future reads that access slots written to through variable offsets.
   3116 */
   3117static int check_stack_write_var_off(struct bpf_verifier_env *env,
   3118				     /* func where register points to */
   3119				     struct bpf_func_state *state,
   3120				     int ptr_regno, int off, int size,
   3121				     int value_regno, int insn_idx)
   3122{
   3123	struct bpf_func_state *cur; /* state of the current function */
   3124	int min_off, max_off;
   3125	int i, err;
   3126	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
   3127	bool writing_zero = false;
   3128	/* set if the fact that we're writing a zero is used to let any
   3129	 * stack slots remain STACK_ZERO
   3130	 */
   3131	bool zero_used = false;
   3132
   3133	cur = env->cur_state->frame[env->cur_state->curframe];
   3134	ptr_reg = &cur->regs[ptr_regno];
   3135	min_off = ptr_reg->smin_value + off;
   3136	max_off = ptr_reg->smax_value + off + size;
   3137	if (value_regno >= 0)
   3138		value_reg = &cur->regs[value_regno];
   3139	if (value_reg && register_is_null(value_reg))
   3140		writing_zero = true;
   3141
   3142	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
   3143	if (err)
   3144		return err;
   3145
   3146
   3147	/* Variable offset writes destroy any spilled pointers in range. */
   3148	for (i = min_off; i < max_off; i++) {
   3149		u8 new_type, *stype;
   3150		int slot, spi;
   3151
   3152		slot = -i - 1;
   3153		spi = slot / BPF_REG_SIZE;
   3154		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
   3155		mark_stack_slot_scratched(env, spi);
   3156
   3157		if (!env->allow_ptr_leaks
   3158				&& *stype != NOT_INIT
   3159				&& *stype != SCALAR_VALUE) {
   3160			/* Reject the write if there's are spilled pointers in
   3161			 * range. If we didn't reject here, the ptr status
   3162			 * would be erased below (even though not all slots are
   3163			 * actually overwritten), possibly opening the door to
   3164			 * leaks.
   3165			 */
   3166			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
   3167				insn_idx, i);
   3168			return -EINVAL;
   3169		}
   3170
   3171		/* Erase all spilled pointers. */
   3172		state->stack[spi].spilled_ptr.type = NOT_INIT;
   3173
   3174		/* Update the slot type. */
   3175		new_type = STACK_MISC;
   3176		if (writing_zero && *stype == STACK_ZERO) {
   3177			new_type = STACK_ZERO;
   3178			zero_used = true;
   3179		}
   3180		/* If the slot is STACK_INVALID, we check whether it's OK to
   3181		 * pretend that it will be initialized by this write. The slot
   3182		 * might not actually be written to, and so if we mark it as
   3183		 * initialized future reads might leak uninitialized memory.
   3184		 * For privileged programs, we will accept such reads to slots
   3185		 * that may or may not be written because, if we're reject
   3186		 * them, the error would be too confusing.
   3187		 */
   3188		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
   3189			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
   3190					insn_idx, i);
   3191			return -EINVAL;
   3192		}
   3193		*stype = new_type;
   3194	}
   3195	if (zero_used) {
   3196		/* backtracking doesn't work for STACK_ZERO yet. */
   3197		err = mark_chain_precision(env, value_regno);
   3198		if (err)
   3199			return err;
   3200	}
   3201	return 0;
   3202}
   3203
   3204/* When register 'dst_regno' is assigned some values from stack[min_off,
   3205 * max_off), we set the register's type according to the types of the
   3206 * respective stack slots. If all the stack values are known to be zeros, then
   3207 * so is the destination reg. Otherwise, the register is considered to be
   3208 * SCALAR. This function does not deal with register filling; the caller must
   3209 * ensure that all spilled registers in the stack range have been marked as
   3210 * read.
   3211 */
   3212static void mark_reg_stack_read(struct bpf_verifier_env *env,
   3213				/* func where src register points to */
   3214				struct bpf_func_state *ptr_state,
   3215				int min_off, int max_off, int dst_regno)
   3216{
   3217	struct bpf_verifier_state *vstate = env->cur_state;
   3218	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   3219	int i, slot, spi;
   3220	u8 *stype;
   3221	int zeros = 0;
   3222
   3223	for (i = min_off; i < max_off; i++) {
   3224		slot = -i - 1;
   3225		spi = slot / BPF_REG_SIZE;
   3226		stype = ptr_state->stack[spi].slot_type;
   3227		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
   3228			break;
   3229		zeros++;
   3230	}
   3231	if (zeros == max_off - min_off) {
   3232		/* any access_size read into register is zero extended,
   3233		 * so the whole register == const_zero
   3234		 */
   3235		__mark_reg_const_zero(&state->regs[dst_regno]);
   3236		/* backtracking doesn't support STACK_ZERO yet,
   3237		 * so mark it precise here, so that later
   3238		 * backtracking can stop here.
   3239		 * Backtracking may not need this if this register
   3240		 * doesn't participate in pointer adjustment.
   3241		 * Forward propagation of precise flag is not
   3242		 * necessary either. This mark is only to stop
   3243		 * backtracking. Any register that contributed
   3244		 * to const 0 was marked precise before spill.
   3245		 */
   3246		state->regs[dst_regno].precise = true;
   3247	} else {
   3248		/* have read misc data from the stack */
   3249		mark_reg_unknown(env, state->regs, dst_regno);
   3250	}
   3251	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
   3252}
   3253
   3254/* Read the stack at 'off' and put the results into the register indicated by
   3255 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
   3256 * spilled reg.
   3257 *
   3258 * 'dst_regno' can be -1, meaning that the read value is not going to a
   3259 * register.
   3260 *
   3261 * The access is assumed to be within the current stack bounds.
   3262 */
   3263static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
   3264				      /* func where src register points to */
   3265				      struct bpf_func_state *reg_state,
   3266				      int off, int size, int dst_regno)
   3267{
   3268	struct bpf_verifier_state *vstate = env->cur_state;
   3269	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   3270	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
   3271	struct bpf_reg_state *reg;
   3272	u8 *stype, type;
   3273
   3274	stype = reg_state->stack[spi].slot_type;
   3275	reg = &reg_state->stack[spi].spilled_ptr;
   3276
   3277	if (is_spilled_reg(&reg_state->stack[spi])) {
   3278		u8 spill_size = 1;
   3279
   3280		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
   3281			spill_size++;
   3282
   3283		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
   3284			if (reg->type != SCALAR_VALUE) {
   3285				verbose_linfo(env, env->insn_idx, "; ");
   3286				verbose(env, "invalid size of register fill\n");
   3287				return -EACCES;
   3288			}
   3289
   3290			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
   3291			if (dst_regno < 0)
   3292				return 0;
   3293
   3294			if (!(off % BPF_REG_SIZE) && size == spill_size) {
   3295				/* The earlier check_reg_arg() has decided the
   3296				 * subreg_def for this insn.  Save it first.
   3297				 */
   3298				s32 subreg_def = state->regs[dst_regno].subreg_def;
   3299
   3300				state->regs[dst_regno] = *reg;
   3301				state->regs[dst_regno].subreg_def = subreg_def;
   3302			} else {
   3303				for (i = 0; i < size; i++) {
   3304					type = stype[(slot - i) % BPF_REG_SIZE];
   3305					if (type == STACK_SPILL)
   3306						continue;
   3307					if (type == STACK_MISC)
   3308						continue;
   3309					verbose(env, "invalid read from stack off %d+%d size %d\n",
   3310						off, i, size);
   3311					return -EACCES;
   3312				}
   3313				mark_reg_unknown(env, state->regs, dst_regno);
   3314			}
   3315			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
   3316			return 0;
   3317		}
   3318
   3319		if (dst_regno >= 0) {
   3320			/* restore register state from stack */
   3321			state->regs[dst_regno] = *reg;
   3322			/* mark reg as written since spilled pointer state likely
   3323			 * has its liveness marks cleared by is_state_visited()
   3324			 * which resets stack/reg liveness for state transitions
   3325			 */
   3326			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
   3327		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
   3328			/* If dst_regno==-1, the caller is asking us whether
   3329			 * it is acceptable to use this value as a SCALAR_VALUE
   3330			 * (e.g. for XADD).
   3331			 * We must not allow unprivileged callers to do that
   3332			 * with spilled pointers.
   3333			 */
   3334			verbose(env, "leaking pointer from stack off %d\n",
   3335				off);
   3336			return -EACCES;
   3337		}
   3338		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
   3339	} else {
   3340		for (i = 0; i < size; i++) {
   3341			type = stype[(slot - i) % BPF_REG_SIZE];
   3342			if (type == STACK_MISC)
   3343				continue;
   3344			if (type == STACK_ZERO)
   3345				continue;
   3346			verbose(env, "invalid read from stack off %d+%d size %d\n",
   3347				off, i, size);
   3348			return -EACCES;
   3349		}
   3350		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
   3351		if (dst_regno >= 0)
   3352			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
   3353	}
   3354	return 0;
   3355}
   3356
   3357enum bpf_access_src {
   3358	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
   3359	ACCESS_HELPER = 2,  /* the access is performed by a helper */
   3360};
   3361
   3362static int check_stack_range_initialized(struct bpf_verifier_env *env,
   3363					 int regno, int off, int access_size,
   3364					 bool zero_size_allowed,
   3365					 enum bpf_access_src type,
   3366					 struct bpf_call_arg_meta *meta);
   3367
   3368static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
   3369{
   3370	return cur_regs(env) + regno;
   3371}
   3372
   3373/* Read the stack at 'ptr_regno + off' and put the result into the register
   3374 * 'dst_regno'.
   3375 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
   3376 * but not its variable offset.
   3377 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
   3378 *
   3379 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
   3380 * filling registers (i.e. reads of spilled register cannot be detected when
   3381 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
   3382 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
   3383 * offset; for a fixed offset check_stack_read_fixed_off should be used
   3384 * instead.
   3385 */
   3386static int check_stack_read_var_off(struct bpf_verifier_env *env,
   3387				    int ptr_regno, int off, int size, int dst_regno)
   3388{
   3389	/* The state of the source register. */
   3390	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
   3391	struct bpf_func_state *ptr_state = func(env, reg);
   3392	int err;
   3393	int min_off, max_off;
   3394
   3395	/* Note that we pass a NULL meta, so raw access will not be permitted.
   3396	 */
   3397	err = check_stack_range_initialized(env, ptr_regno, off, size,
   3398					    false, ACCESS_DIRECT, NULL);
   3399	if (err)
   3400		return err;
   3401
   3402	min_off = reg->smin_value + off;
   3403	max_off = reg->smax_value + off;
   3404	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
   3405	return 0;
   3406}
   3407
   3408/* check_stack_read dispatches to check_stack_read_fixed_off or
   3409 * check_stack_read_var_off.
   3410 *
   3411 * The caller must ensure that the offset falls within the allocated stack
   3412 * bounds.
   3413 *
   3414 * 'dst_regno' is a register which will receive the value from the stack. It
   3415 * can be -1, meaning that the read value is not going to a register.
   3416 */
   3417static int check_stack_read(struct bpf_verifier_env *env,
   3418			    int ptr_regno, int off, int size,
   3419			    int dst_regno)
   3420{
   3421	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
   3422	struct bpf_func_state *state = func(env, reg);
   3423	int err;
   3424	/* Some accesses are only permitted with a static offset. */
   3425	bool var_off = !tnum_is_const(reg->var_off);
   3426
   3427	/* The offset is required to be static when reads don't go to a
   3428	 * register, in order to not leak pointers (see
   3429	 * check_stack_read_fixed_off).
   3430	 */
   3431	if (dst_regno < 0 && var_off) {
   3432		char tn_buf[48];
   3433
   3434		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   3435		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
   3436			tn_buf, off, size);
   3437		return -EACCES;
   3438	}
   3439	/* Variable offset is prohibited for unprivileged mode for simplicity
   3440	 * since it requires corresponding support in Spectre masking for stack
   3441	 * ALU. See also retrieve_ptr_limit().
   3442	 */
   3443	if (!env->bypass_spec_v1 && var_off) {
   3444		char tn_buf[48];
   3445
   3446		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   3447		verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
   3448				ptr_regno, tn_buf);
   3449		return -EACCES;
   3450	}
   3451
   3452	if (!var_off) {
   3453		off += reg->var_off.value;
   3454		err = check_stack_read_fixed_off(env, state, off, size,
   3455						 dst_regno);
   3456	} else {
   3457		/* Variable offset stack reads need more conservative handling
   3458		 * than fixed offset ones. Note that dst_regno >= 0 on this
   3459		 * branch.
   3460		 */
   3461		err = check_stack_read_var_off(env, ptr_regno, off, size,
   3462					       dst_regno);
   3463	}
   3464	return err;
   3465}
   3466
   3467
   3468/* check_stack_write dispatches to check_stack_write_fixed_off or
   3469 * check_stack_write_var_off.
   3470 *
   3471 * 'ptr_regno' is the register used as a pointer into the stack.
   3472 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
   3473 * 'value_regno' is the register whose value we're writing to the stack. It can
   3474 * be -1, meaning that we're not writing from a register.
   3475 *
   3476 * The caller must ensure that the offset falls within the maximum stack size.
   3477 */
   3478static int check_stack_write(struct bpf_verifier_env *env,
   3479			     int ptr_regno, int off, int size,
   3480			     int value_regno, int insn_idx)
   3481{
   3482	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
   3483	struct bpf_func_state *state = func(env, reg);
   3484	int err;
   3485
   3486	if (tnum_is_const(reg->var_off)) {
   3487		off += reg->var_off.value;
   3488		err = check_stack_write_fixed_off(env, state, off, size,
   3489						  value_regno, insn_idx);
   3490	} else {
   3491		/* Variable offset stack reads need more conservative handling
   3492		 * than fixed offset ones.
   3493		 */
   3494		err = check_stack_write_var_off(env, state,
   3495						ptr_regno, off, size,
   3496						value_regno, insn_idx);
   3497	}
   3498	return err;
   3499}
   3500
   3501static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
   3502				 int off, int size, enum bpf_access_type type)
   3503{
   3504	struct bpf_reg_state *regs = cur_regs(env);
   3505	struct bpf_map *map = regs[regno].map_ptr;
   3506	u32 cap = bpf_map_flags_to_cap(map);
   3507
   3508	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
   3509		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
   3510			map->value_size, off, size);
   3511		return -EACCES;
   3512	}
   3513
   3514	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
   3515		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
   3516			map->value_size, off, size);
   3517		return -EACCES;
   3518	}
   3519
   3520	return 0;
   3521}
   3522
   3523/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
   3524static int __check_mem_access(struct bpf_verifier_env *env, int regno,
   3525			      int off, int size, u32 mem_size,
   3526			      bool zero_size_allowed)
   3527{
   3528	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
   3529	struct bpf_reg_state *reg;
   3530
   3531	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
   3532		return 0;
   3533
   3534	reg = &cur_regs(env)[regno];
   3535	switch (reg->type) {
   3536	case PTR_TO_MAP_KEY:
   3537		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
   3538			mem_size, off, size);
   3539		break;
   3540	case PTR_TO_MAP_VALUE:
   3541		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
   3542			mem_size, off, size);
   3543		break;
   3544	case PTR_TO_PACKET:
   3545	case PTR_TO_PACKET_META:
   3546	case PTR_TO_PACKET_END:
   3547		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
   3548			off, size, regno, reg->id, off, mem_size);
   3549		break;
   3550	case PTR_TO_MEM:
   3551	default:
   3552		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
   3553			mem_size, off, size);
   3554	}
   3555
   3556	return -EACCES;
   3557}
   3558
   3559/* check read/write into a memory region with possible variable offset */
   3560static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
   3561				   int off, int size, u32 mem_size,
   3562				   bool zero_size_allowed)
   3563{
   3564	struct bpf_verifier_state *vstate = env->cur_state;
   3565	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   3566	struct bpf_reg_state *reg = &state->regs[regno];
   3567	int err;
   3568
   3569	/* We may have adjusted the register pointing to memory region, so we
   3570	 * need to try adding each of min_value and max_value to off
   3571	 * to make sure our theoretical access will be safe.
   3572	 *
   3573	 * The minimum value is only important with signed
   3574	 * comparisons where we can't assume the floor of a
   3575	 * value is 0.  If we are using signed variables for our
   3576	 * index'es we need to make sure that whatever we use
   3577	 * will have a set floor within our range.
   3578	 */
   3579	if (reg->smin_value < 0 &&
   3580	    (reg->smin_value == S64_MIN ||
   3581	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
   3582	      reg->smin_value + off < 0)) {
   3583		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
   3584			regno);
   3585		return -EACCES;
   3586	}
   3587	err = __check_mem_access(env, regno, reg->smin_value + off, size,
   3588				 mem_size, zero_size_allowed);
   3589	if (err) {
   3590		verbose(env, "R%d min value is outside of the allowed memory range\n",
   3591			regno);
   3592		return err;
   3593	}
   3594
   3595	/* If we haven't set a max value then we need to bail since we can't be
   3596	 * sure we won't do bad things.
   3597	 * If reg->umax_value + off could overflow, treat that as unbounded too.
   3598	 */
   3599	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
   3600		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
   3601			regno);
   3602		return -EACCES;
   3603	}
   3604	err = __check_mem_access(env, regno, reg->umax_value + off, size,
   3605				 mem_size, zero_size_allowed);
   3606	if (err) {
   3607		verbose(env, "R%d max value is outside of the allowed memory range\n",
   3608			regno);
   3609		return err;
   3610	}
   3611
   3612	return 0;
   3613}
   3614
   3615static int __check_ptr_off_reg(struct bpf_verifier_env *env,
   3616			       const struct bpf_reg_state *reg, int regno,
   3617			       bool fixed_off_ok)
   3618{
   3619	/* Access to this pointer-typed register or passing it to a helper
   3620	 * is only allowed in its original, unmodified form.
   3621	 */
   3622
   3623	if (reg->off < 0) {
   3624		verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
   3625			reg_type_str(env, reg->type), regno, reg->off);
   3626		return -EACCES;
   3627	}
   3628
   3629	if (!fixed_off_ok && reg->off) {
   3630		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
   3631			reg_type_str(env, reg->type), regno, reg->off);
   3632		return -EACCES;
   3633	}
   3634
   3635	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
   3636		char tn_buf[48];
   3637
   3638		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   3639		verbose(env, "variable %s access var_off=%s disallowed\n",
   3640			reg_type_str(env, reg->type), tn_buf);
   3641		return -EACCES;
   3642	}
   3643
   3644	return 0;
   3645}
   3646
   3647int check_ptr_off_reg(struct bpf_verifier_env *env,
   3648		      const struct bpf_reg_state *reg, int regno)
   3649{
   3650	return __check_ptr_off_reg(env, reg, regno, false);
   3651}
   3652
   3653static int map_kptr_match_type(struct bpf_verifier_env *env,
   3654			       struct bpf_map_value_off_desc *off_desc,
   3655			       struct bpf_reg_state *reg, u32 regno)
   3656{
   3657	const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id);
   3658	int perm_flags = PTR_MAYBE_NULL;
   3659	const char *reg_name = "";
   3660
   3661	/* Only unreferenced case accepts untrusted pointers */
   3662	if (off_desc->type == BPF_KPTR_UNREF)
   3663		perm_flags |= PTR_UNTRUSTED;
   3664
   3665	if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
   3666		goto bad_type;
   3667
   3668	if (!btf_is_kernel(reg->btf)) {
   3669		verbose(env, "R%d must point to kernel BTF\n", regno);
   3670		return -EINVAL;
   3671	}
   3672	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
   3673	reg_name = kernel_type_name(reg->btf, reg->btf_id);
   3674
   3675	/* For ref_ptr case, release function check should ensure we get one
   3676	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
   3677	 * normal store of unreferenced kptr, we must ensure var_off is zero.
   3678	 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
   3679	 * reg->off and reg->ref_obj_id are not needed here.
   3680	 */
   3681	if (__check_ptr_off_reg(env, reg, regno, true))
   3682		return -EACCES;
   3683
   3684	/* A full type match is needed, as BTF can be vmlinux or module BTF, and
   3685	 * we also need to take into account the reg->off.
   3686	 *
   3687	 * We want to support cases like:
   3688	 *
   3689	 * struct foo {
   3690	 *         struct bar br;
   3691	 *         struct baz bz;
   3692	 * };
   3693	 *
   3694	 * struct foo *v;
   3695	 * v = func();	      // PTR_TO_BTF_ID
   3696	 * val->foo = v;      // reg->off is zero, btf and btf_id match type
   3697	 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
   3698	 *                    // first member type of struct after comparison fails
   3699	 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
   3700	 *                    // to match type
   3701	 *
   3702	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
   3703	 * is zero. We must also ensure that btf_struct_ids_match does not walk
   3704	 * the struct to match type against first member of struct, i.e. reject
   3705	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
   3706	 * strict mode to true for type match.
   3707	 */
   3708	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
   3709				  off_desc->kptr.btf, off_desc->kptr.btf_id,
   3710				  off_desc->type == BPF_KPTR_REF))
   3711		goto bad_type;
   3712	return 0;
   3713bad_type:
   3714	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
   3715		reg_type_str(env, reg->type), reg_name);
   3716	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
   3717	if (off_desc->type == BPF_KPTR_UNREF)
   3718		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
   3719			targ_name);
   3720	else
   3721		verbose(env, "\n");
   3722	return -EINVAL;
   3723}
   3724
   3725static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
   3726				 int value_regno, int insn_idx,
   3727				 struct bpf_map_value_off_desc *off_desc)
   3728{
   3729	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
   3730	int class = BPF_CLASS(insn->code);
   3731	struct bpf_reg_state *val_reg;
   3732
   3733	/* Things we already checked for in check_map_access and caller:
   3734	 *  - Reject cases where variable offset may touch kptr
   3735	 *  - size of access (must be BPF_DW)
   3736	 *  - tnum_is_const(reg->var_off)
   3737	 *  - off_desc->offset == off + reg->var_off.value
   3738	 */
   3739	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
   3740	if (BPF_MODE(insn->code) != BPF_MEM) {
   3741		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
   3742		return -EACCES;
   3743	}
   3744
   3745	/* We only allow loading referenced kptr, since it will be marked as
   3746	 * untrusted, similar to unreferenced kptr.
   3747	 */
   3748	if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) {
   3749		verbose(env, "store to referenced kptr disallowed\n");
   3750		return -EACCES;
   3751	}
   3752
   3753	if (class == BPF_LDX) {
   3754		val_reg = reg_state(env, value_regno);
   3755		/* We can simply mark the value_regno receiving the pointer
   3756		 * value from map as PTR_TO_BTF_ID, with the correct type.
   3757		 */
   3758		mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf,
   3759				off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
   3760		/* For mark_ptr_or_null_reg */
   3761		val_reg->id = ++env->id_gen;
   3762	} else if (class == BPF_STX) {
   3763		val_reg = reg_state(env, value_regno);
   3764		if (!register_is_null(val_reg) &&
   3765		    map_kptr_match_type(env, off_desc, val_reg, value_regno))
   3766			return -EACCES;
   3767	} else if (class == BPF_ST) {
   3768		if (insn->imm) {
   3769			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
   3770				off_desc->offset);
   3771			return -EACCES;
   3772		}
   3773	} else {
   3774		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
   3775		return -EACCES;
   3776	}
   3777	return 0;
   3778}
   3779
   3780/* check read/write into a map element with possible variable offset */
   3781static int check_map_access(struct bpf_verifier_env *env, u32 regno,
   3782			    int off, int size, bool zero_size_allowed,
   3783			    enum bpf_access_src src)
   3784{
   3785	struct bpf_verifier_state *vstate = env->cur_state;
   3786	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   3787	struct bpf_reg_state *reg = &state->regs[regno];
   3788	struct bpf_map *map = reg->map_ptr;
   3789	int err;
   3790
   3791	err = check_mem_region_access(env, regno, off, size, map->value_size,
   3792				      zero_size_allowed);
   3793	if (err)
   3794		return err;
   3795
   3796	if (map_value_has_spin_lock(map)) {
   3797		u32 lock = map->spin_lock_off;
   3798
   3799		/* if any part of struct bpf_spin_lock can be touched by
   3800		 * load/store reject this program.
   3801		 * To check that [x1, x2) overlaps with [y1, y2)
   3802		 * it is sufficient to check x1 < y2 && y1 < x2.
   3803		 */
   3804		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
   3805		     lock < reg->umax_value + off + size) {
   3806			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
   3807			return -EACCES;
   3808		}
   3809	}
   3810	if (map_value_has_timer(map)) {
   3811		u32 t = map->timer_off;
   3812
   3813		if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
   3814		     t < reg->umax_value + off + size) {
   3815			verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
   3816			return -EACCES;
   3817		}
   3818	}
   3819	if (map_value_has_kptrs(map)) {
   3820		struct bpf_map_value_off *tab = map->kptr_off_tab;
   3821		int i;
   3822
   3823		for (i = 0; i < tab->nr_off; i++) {
   3824			u32 p = tab->off[i].offset;
   3825
   3826			if (reg->smin_value + off < p + sizeof(u64) &&
   3827			    p < reg->umax_value + off + size) {
   3828				if (src != ACCESS_DIRECT) {
   3829					verbose(env, "kptr cannot be accessed indirectly by helper\n");
   3830					return -EACCES;
   3831				}
   3832				if (!tnum_is_const(reg->var_off)) {
   3833					verbose(env, "kptr access cannot have variable offset\n");
   3834					return -EACCES;
   3835				}
   3836				if (p != off + reg->var_off.value) {
   3837					verbose(env, "kptr access misaligned expected=%u off=%llu\n",
   3838						p, off + reg->var_off.value);
   3839					return -EACCES;
   3840				}
   3841				if (size != bpf_size_to_bytes(BPF_DW)) {
   3842					verbose(env, "kptr access size must be BPF_DW\n");
   3843					return -EACCES;
   3844				}
   3845				break;
   3846			}
   3847		}
   3848	}
   3849	return err;
   3850}
   3851
   3852#define MAX_PACKET_OFF 0xffff
   3853
   3854static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
   3855				       const struct bpf_call_arg_meta *meta,
   3856				       enum bpf_access_type t)
   3857{
   3858	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
   3859
   3860	switch (prog_type) {
   3861	/* Program types only with direct read access go here! */
   3862	case BPF_PROG_TYPE_LWT_IN:
   3863	case BPF_PROG_TYPE_LWT_OUT:
   3864	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
   3865	case BPF_PROG_TYPE_SK_REUSEPORT:
   3866	case BPF_PROG_TYPE_FLOW_DISSECTOR:
   3867	case BPF_PROG_TYPE_CGROUP_SKB:
   3868		if (t == BPF_WRITE)
   3869			return false;
   3870		fallthrough;
   3871
   3872	/* Program types with direct read + write access go here! */
   3873	case BPF_PROG_TYPE_SCHED_CLS:
   3874	case BPF_PROG_TYPE_SCHED_ACT:
   3875	case BPF_PROG_TYPE_XDP:
   3876	case BPF_PROG_TYPE_LWT_XMIT:
   3877	case BPF_PROG_TYPE_SK_SKB:
   3878	case BPF_PROG_TYPE_SK_MSG:
   3879		if (meta)
   3880			return meta->pkt_access;
   3881
   3882		env->seen_direct_write = true;
   3883		return true;
   3884
   3885	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
   3886		if (t == BPF_WRITE)
   3887			env->seen_direct_write = true;
   3888
   3889		return true;
   3890
   3891	default:
   3892		return false;
   3893	}
   3894}
   3895
   3896static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
   3897			       int size, bool zero_size_allowed)
   3898{
   3899	struct bpf_reg_state *regs = cur_regs(env);
   3900	struct bpf_reg_state *reg = &regs[regno];
   3901	int err;
   3902
   3903	/* We may have added a variable offset to the packet pointer; but any
   3904	 * reg->range we have comes after that.  We are only checking the fixed
   3905	 * offset.
   3906	 */
   3907
   3908	/* We don't allow negative numbers, because we aren't tracking enough
   3909	 * detail to prove they're safe.
   3910	 */
   3911	if (reg->smin_value < 0) {
   3912		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
   3913			regno);
   3914		return -EACCES;
   3915	}
   3916
   3917	err = reg->range < 0 ? -EINVAL :
   3918	      __check_mem_access(env, regno, off, size, reg->range,
   3919				 zero_size_allowed);
   3920	if (err) {
   3921		verbose(env, "R%d offset is outside of the packet\n", regno);
   3922		return err;
   3923	}
   3924
   3925	/* __check_mem_access has made sure "off + size - 1" is within u16.
   3926	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
   3927	 * otherwise find_good_pkt_pointers would have refused to set range info
   3928	 * that __check_mem_access would have rejected this pkt access.
   3929	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
   3930	 */
   3931	env->prog->aux->max_pkt_offset =
   3932		max_t(u32, env->prog->aux->max_pkt_offset,
   3933		      off + reg->umax_value + size - 1);
   3934
   3935	return err;
   3936}
   3937
   3938/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
   3939static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
   3940			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
   3941			    struct btf **btf, u32 *btf_id)
   3942{
   3943	struct bpf_insn_access_aux info = {
   3944		.reg_type = *reg_type,
   3945		.log = &env->log,
   3946	};
   3947
   3948	if (env->ops->is_valid_access &&
   3949	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
   3950		/* A non zero info.ctx_field_size indicates that this field is a
   3951		 * candidate for later verifier transformation to load the whole
   3952		 * field and then apply a mask when accessed with a narrower
   3953		 * access than actual ctx access size. A zero info.ctx_field_size
   3954		 * will only allow for whole field access and rejects any other
   3955		 * type of narrower access.
   3956		 */
   3957		*reg_type = info.reg_type;
   3958
   3959		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
   3960			*btf = info.btf;
   3961			*btf_id = info.btf_id;
   3962		} else {
   3963			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
   3964		}
   3965		/* remember the offset of last byte accessed in ctx */
   3966		if (env->prog->aux->max_ctx_offset < off + size)
   3967			env->prog->aux->max_ctx_offset = off + size;
   3968		return 0;
   3969	}
   3970
   3971	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
   3972	return -EACCES;
   3973}
   3974
   3975static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
   3976				  int size)
   3977{
   3978	if (size < 0 || off < 0 ||
   3979	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
   3980		verbose(env, "invalid access to flow keys off=%d size=%d\n",
   3981			off, size);
   3982		return -EACCES;
   3983	}
   3984	return 0;
   3985}
   3986
   3987static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
   3988			     u32 regno, int off, int size,
   3989			     enum bpf_access_type t)
   3990{
   3991	struct bpf_reg_state *regs = cur_regs(env);
   3992	struct bpf_reg_state *reg = &regs[regno];
   3993	struct bpf_insn_access_aux info = {};
   3994	bool valid;
   3995
   3996	if (reg->smin_value < 0) {
   3997		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
   3998			regno);
   3999		return -EACCES;
   4000	}
   4001
   4002	switch (reg->type) {
   4003	case PTR_TO_SOCK_COMMON:
   4004		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
   4005		break;
   4006	case PTR_TO_SOCKET:
   4007		valid = bpf_sock_is_valid_access(off, size, t, &info);
   4008		break;
   4009	case PTR_TO_TCP_SOCK:
   4010		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
   4011		break;
   4012	case PTR_TO_XDP_SOCK:
   4013		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
   4014		break;
   4015	default:
   4016		valid = false;
   4017	}
   4018
   4019
   4020	if (valid) {
   4021		env->insn_aux_data[insn_idx].ctx_field_size =
   4022			info.ctx_field_size;
   4023		return 0;
   4024	}
   4025
   4026	verbose(env, "R%d invalid %s access off=%d size=%d\n",
   4027		regno, reg_type_str(env, reg->type), off, size);
   4028
   4029	return -EACCES;
   4030}
   4031
   4032static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
   4033{
   4034	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
   4035}
   4036
   4037static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
   4038{
   4039	const struct bpf_reg_state *reg = reg_state(env, regno);
   4040
   4041	return reg->type == PTR_TO_CTX;
   4042}
   4043
   4044static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
   4045{
   4046	const struct bpf_reg_state *reg = reg_state(env, regno);
   4047
   4048	return type_is_sk_pointer(reg->type);
   4049}
   4050
   4051static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
   4052{
   4053	const struct bpf_reg_state *reg = reg_state(env, regno);
   4054
   4055	return type_is_pkt_pointer(reg->type);
   4056}
   4057
   4058static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
   4059{
   4060	const struct bpf_reg_state *reg = reg_state(env, regno);
   4061
   4062	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
   4063	return reg->type == PTR_TO_FLOW_KEYS;
   4064}
   4065
   4066static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
   4067				   const struct bpf_reg_state *reg,
   4068				   int off, int size, bool strict)
   4069{
   4070	struct tnum reg_off;
   4071	int ip_align;
   4072
   4073	/* Byte size accesses are always allowed. */
   4074	if (!strict || size == 1)
   4075		return 0;
   4076
   4077	/* For platforms that do not have a Kconfig enabling
   4078	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
   4079	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
   4080	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
   4081	 * to this code only in strict mode where we want to emulate
   4082	 * the NET_IP_ALIGN==2 checking.  Therefore use an
   4083	 * unconditional IP align value of '2'.
   4084	 */
   4085	ip_align = 2;
   4086
   4087	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
   4088	if (!tnum_is_aligned(reg_off, size)) {
   4089		char tn_buf[48];
   4090
   4091		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   4092		verbose(env,
   4093			"misaligned packet access off %d+%s+%d+%d size %d\n",
   4094			ip_align, tn_buf, reg->off, off, size);
   4095		return -EACCES;
   4096	}
   4097
   4098	return 0;
   4099}
   4100
   4101static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
   4102				       const struct bpf_reg_state *reg,
   4103				       const char *pointer_desc,
   4104				       int off, int size, bool strict)
   4105{
   4106	struct tnum reg_off;
   4107
   4108	/* Byte size accesses are always allowed. */
   4109	if (!strict || size == 1)
   4110		return 0;
   4111
   4112	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
   4113	if (!tnum_is_aligned(reg_off, size)) {
   4114		char tn_buf[48];
   4115
   4116		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   4117		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
   4118			pointer_desc, tn_buf, reg->off, off, size);
   4119		return -EACCES;
   4120	}
   4121
   4122	return 0;
   4123}
   4124
   4125static int check_ptr_alignment(struct bpf_verifier_env *env,
   4126			       const struct bpf_reg_state *reg, int off,
   4127			       int size, bool strict_alignment_once)
   4128{
   4129	bool strict = env->strict_alignment || strict_alignment_once;
   4130	const char *pointer_desc = "";
   4131
   4132	switch (reg->type) {
   4133	case PTR_TO_PACKET:
   4134	case PTR_TO_PACKET_META:
   4135		/* Special case, because of NET_IP_ALIGN. Given metadata sits
   4136		 * right in front, treat it the very same way.
   4137		 */
   4138		return check_pkt_ptr_alignment(env, reg, off, size, strict);
   4139	case PTR_TO_FLOW_KEYS:
   4140		pointer_desc = "flow keys ";
   4141		break;
   4142	case PTR_TO_MAP_KEY:
   4143		pointer_desc = "key ";
   4144		break;
   4145	case PTR_TO_MAP_VALUE:
   4146		pointer_desc = "value ";
   4147		break;
   4148	case PTR_TO_CTX:
   4149		pointer_desc = "context ";
   4150		break;
   4151	case PTR_TO_STACK:
   4152		pointer_desc = "stack ";
   4153		/* The stack spill tracking logic in check_stack_write_fixed_off()
   4154		 * and check_stack_read_fixed_off() relies on stack accesses being
   4155		 * aligned.
   4156		 */
   4157		strict = true;
   4158		break;
   4159	case PTR_TO_SOCKET:
   4160		pointer_desc = "sock ";
   4161		break;
   4162	case PTR_TO_SOCK_COMMON:
   4163		pointer_desc = "sock_common ";
   4164		break;
   4165	case PTR_TO_TCP_SOCK:
   4166		pointer_desc = "tcp_sock ";
   4167		break;
   4168	case PTR_TO_XDP_SOCK:
   4169		pointer_desc = "xdp_sock ";
   4170		break;
   4171	default:
   4172		break;
   4173	}
   4174	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
   4175					   strict);
   4176}
   4177
   4178static int update_stack_depth(struct bpf_verifier_env *env,
   4179			      const struct bpf_func_state *func,
   4180			      int off)
   4181{
   4182	u16 stack = env->subprog_info[func->subprogno].stack_depth;
   4183
   4184	if (stack >= -off)
   4185		return 0;
   4186
   4187	/* update known max for given subprogram */
   4188	env->subprog_info[func->subprogno].stack_depth = -off;
   4189	return 0;
   4190}
   4191
   4192/* starting from main bpf function walk all instructions of the function
   4193 * and recursively walk all callees that given function can call.
   4194 * Ignore jump and exit insns.
   4195 * Since recursion is prevented by check_cfg() this algorithm
   4196 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
   4197 */
   4198static int check_max_stack_depth(struct bpf_verifier_env *env)
   4199{
   4200	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
   4201	struct bpf_subprog_info *subprog = env->subprog_info;
   4202	struct bpf_insn *insn = env->prog->insnsi;
   4203	bool tail_call_reachable = false;
   4204	int ret_insn[MAX_CALL_FRAMES];
   4205	int ret_prog[MAX_CALL_FRAMES];
   4206	int j;
   4207
   4208process_func:
   4209	/* protect against potential stack overflow that might happen when
   4210	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
   4211	 * depth for such case down to 256 so that the worst case scenario
   4212	 * would result in 8k stack size (32 which is tailcall limit * 256 =
   4213	 * 8k).
   4214	 *
   4215	 * To get the idea what might happen, see an example:
   4216	 * func1 -> sub rsp, 128
   4217	 *  subfunc1 -> sub rsp, 256
   4218	 *  tailcall1 -> add rsp, 256
   4219	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
   4220	 *   subfunc2 -> sub rsp, 64
   4221	 *   subfunc22 -> sub rsp, 128
   4222	 *   tailcall2 -> add rsp, 128
   4223	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
   4224	 *
   4225	 * tailcall will unwind the current stack frame but it will not get rid
   4226	 * of caller's stack as shown on the example above.
   4227	 */
   4228	if (idx && subprog[idx].has_tail_call && depth >= 256) {
   4229		verbose(env,
   4230			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
   4231			depth);
   4232		return -EACCES;
   4233	}
   4234	/* round up to 32-bytes, since this is granularity
   4235	 * of interpreter stack size
   4236	 */
   4237	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
   4238	if (depth > MAX_BPF_STACK) {
   4239		verbose(env, "combined stack size of %d calls is %d. Too large\n",
   4240			frame + 1, depth);
   4241		return -EACCES;
   4242	}
   4243continue_func:
   4244	subprog_end = subprog[idx + 1].start;
   4245	for (; i < subprog_end; i++) {
   4246		int next_insn;
   4247
   4248		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
   4249			continue;
   4250		/* remember insn and function to return to */
   4251		ret_insn[frame] = i + 1;
   4252		ret_prog[frame] = idx;
   4253
   4254		/* find the callee */
   4255		next_insn = i + insn[i].imm + 1;
   4256		idx = find_subprog(env, next_insn);
   4257		if (idx < 0) {
   4258			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
   4259				  next_insn);
   4260			return -EFAULT;
   4261		}
   4262		if (subprog[idx].is_async_cb) {
   4263			if (subprog[idx].has_tail_call) {
   4264				verbose(env, "verifier bug. subprog has tail_call and async cb\n");
   4265				return -EFAULT;
   4266			}
   4267			 /* async callbacks don't increase bpf prog stack size */
   4268			continue;
   4269		}
   4270		i = next_insn;
   4271
   4272		if (subprog[idx].has_tail_call)
   4273			tail_call_reachable = true;
   4274
   4275		frame++;
   4276		if (frame >= MAX_CALL_FRAMES) {
   4277			verbose(env, "the call stack of %d frames is too deep !\n",
   4278				frame);
   4279			return -E2BIG;
   4280		}
   4281		goto process_func;
   4282	}
   4283	/* if tail call got detected across bpf2bpf calls then mark each of the
   4284	 * currently present subprog frames as tail call reachable subprogs;
   4285	 * this info will be utilized by JIT so that we will be preserving the
   4286	 * tail call counter throughout bpf2bpf calls combined with tailcalls
   4287	 */
   4288	if (tail_call_reachable)
   4289		for (j = 0; j < frame; j++)
   4290			subprog[ret_prog[j]].tail_call_reachable = true;
   4291	if (subprog[0].tail_call_reachable)
   4292		env->prog->aux->tail_call_reachable = true;
   4293
   4294	/* end of for() loop means the last insn of the 'subprog'
   4295	 * was reached. Doesn't matter whether it was JA or EXIT
   4296	 */
   4297	if (frame == 0)
   4298		return 0;
   4299	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
   4300	frame--;
   4301	i = ret_insn[frame];
   4302	idx = ret_prog[frame];
   4303	goto continue_func;
   4304}
   4305
   4306#ifndef CONFIG_BPF_JIT_ALWAYS_ON
   4307static int get_callee_stack_depth(struct bpf_verifier_env *env,
   4308				  const struct bpf_insn *insn, int idx)
   4309{
   4310	int start = idx + insn->imm + 1, subprog;
   4311
   4312	subprog = find_subprog(env, start);
   4313	if (subprog < 0) {
   4314		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
   4315			  start);
   4316		return -EFAULT;
   4317	}
   4318	return env->subprog_info[subprog].stack_depth;
   4319}
   4320#endif
   4321
   4322static int __check_buffer_access(struct bpf_verifier_env *env,
   4323				 const char *buf_info,
   4324				 const struct bpf_reg_state *reg,
   4325				 int regno, int off, int size)
   4326{
   4327	if (off < 0) {
   4328		verbose(env,
   4329			"R%d invalid %s buffer access: off=%d, size=%d\n",
   4330			regno, buf_info, off, size);
   4331		return -EACCES;
   4332	}
   4333	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
   4334		char tn_buf[48];
   4335
   4336		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   4337		verbose(env,
   4338			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
   4339			regno, off, tn_buf);
   4340		return -EACCES;
   4341	}
   4342
   4343	return 0;
   4344}
   4345
   4346static int check_tp_buffer_access(struct bpf_verifier_env *env,
   4347				  const struct bpf_reg_state *reg,
   4348				  int regno, int off, int size)
   4349{
   4350	int err;
   4351
   4352	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
   4353	if (err)
   4354		return err;
   4355
   4356	if (off + size > env->prog->aux->max_tp_access)
   4357		env->prog->aux->max_tp_access = off + size;
   4358
   4359	return 0;
   4360}
   4361
   4362static int check_buffer_access(struct bpf_verifier_env *env,
   4363			       const struct bpf_reg_state *reg,
   4364			       int regno, int off, int size,
   4365			       bool zero_size_allowed,
   4366			       u32 *max_access)
   4367{
   4368	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
   4369	int err;
   4370
   4371	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
   4372	if (err)
   4373		return err;
   4374
   4375	if (off + size > *max_access)
   4376		*max_access = off + size;
   4377
   4378	return 0;
   4379}
   4380
   4381/* BPF architecture zero extends alu32 ops into 64-bit registesr */
   4382static void zext_32_to_64(struct bpf_reg_state *reg)
   4383{
   4384	reg->var_off = tnum_subreg(reg->var_off);
   4385	__reg_assign_32_into_64(reg);
   4386}
   4387
   4388/* truncate register to smaller size (in bytes)
   4389 * must be called with size < BPF_REG_SIZE
   4390 */
   4391static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
   4392{
   4393	u64 mask;
   4394
   4395	/* clear high bits in bit representation */
   4396	reg->var_off = tnum_cast(reg->var_off, size);
   4397
   4398	/* fix arithmetic bounds */
   4399	mask = ((u64)1 << (size * 8)) - 1;
   4400	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
   4401		reg->umin_value &= mask;
   4402		reg->umax_value &= mask;
   4403	} else {
   4404		reg->umin_value = 0;
   4405		reg->umax_value = mask;
   4406	}
   4407	reg->smin_value = reg->umin_value;
   4408	reg->smax_value = reg->umax_value;
   4409
   4410	/* If size is smaller than 32bit register the 32bit register
   4411	 * values are also truncated so we push 64-bit bounds into
   4412	 * 32-bit bounds. Above were truncated < 32-bits already.
   4413	 */
   4414	if (size >= 4)
   4415		return;
   4416	__reg_combine_64_into_32(reg);
   4417}
   4418
   4419static bool bpf_map_is_rdonly(const struct bpf_map *map)
   4420{
   4421	/* A map is considered read-only if the following condition are true:
   4422	 *
   4423	 * 1) BPF program side cannot change any of the map content. The
   4424	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
   4425	 *    and was set at map creation time.
   4426	 * 2) The map value(s) have been initialized from user space by a
   4427	 *    loader and then "frozen", such that no new map update/delete
   4428	 *    operations from syscall side are possible for the rest of
   4429	 *    the map's lifetime from that point onwards.
   4430	 * 3) Any parallel/pending map update/delete operations from syscall
   4431	 *    side have been completed. Only after that point, it's safe to
   4432	 *    assume that map value(s) are immutable.
   4433	 */
   4434	return (map->map_flags & BPF_F_RDONLY_PROG) &&
   4435	       READ_ONCE(map->frozen) &&
   4436	       !bpf_map_write_active(map);
   4437}
   4438
   4439static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
   4440{
   4441	void *ptr;
   4442	u64 addr;
   4443	int err;
   4444
   4445	err = map->ops->map_direct_value_addr(map, &addr, off);
   4446	if (err)
   4447		return err;
   4448	ptr = (void *)(long)addr + off;
   4449
   4450	switch (size) {
   4451	case sizeof(u8):
   4452		*val = (u64)*(u8 *)ptr;
   4453		break;
   4454	case sizeof(u16):
   4455		*val = (u64)*(u16 *)ptr;
   4456		break;
   4457	case sizeof(u32):
   4458		*val = (u64)*(u32 *)ptr;
   4459		break;
   4460	case sizeof(u64):
   4461		*val = *(u64 *)ptr;
   4462		break;
   4463	default:
   4464		return -EINVAL;
   4465	}
   4466	return 0;
   4467}
   4468
   4469static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
   4470				   struct bpf_reg_state *regs,
   4471				   int regno, int off, int size,
   4472				   enum bpf_access_type atype,
   4473				   int value_regno)
   4474{
   4475	struct bpf_reg_state *reg = regs + regno;
   4476	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
   4477	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
   4478	enum bpf_type_flag flag = 0;
   4479	u32 btf_id;
   4480	int ret;
   4481
   4482	if (off < 0) {
   4483		verbose(env,
   4484			"R%d is ptr_%s invalid negative access: off=%d\n",
   4485			regno, tname, off);
   4486		return -EACCES;
   4487	}
   4488	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
   4489		char tn_buf[48];
   4490
   4491		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   4492		verbose(env,
   4493			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
   4494			regno, tname, off, tn_buf);
   4495		return -EACCES;
   4496	}
   4497
   4498	if (reg->type & MEM_USER) {
   4499		verbose(env,
   4500			"R%d is ptr_%s access user memory: off=%d\n",
   4501			regno, tname, off);
   4502		return -EACCES;
   4503	}
   4504
   4505	if (reg->type & MEM_PERCPU) {
   4506		verbose(env,
   4507			"R%d is ptr_%s access percpu memory: off=%d\n",
   4508			regno, tname, off);
   4509		return -EACCES;
   4510	}
   4511
   4512	if (env->ops->btf_struct_access) {
   4513		ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
   4514						  off, size, atype, &btf_id, &flag);
   4515	} else {
   4516		if (atype != BPF_READ) {
   4517			verbose(env, "only read is supported\n");
   4518			return -EACCES;
   4519		}
   4520
   4521		ret = btf_struct_access(&env->log, reg->btf, t, off, size,
   4522					atype, &btf_id, &flag);
   4523	}
   4524
   4525	if (ret < 0)
   4526		return ret;
   4527
   4528	/* If this is an untrusted pointer, all pointers formed by walking it
   4529	 * also inherit the untrusted flag.
   4530	 */
   4531	if (type_flag(reg->type) & PTR_UNTRUSTED)
   4532		flag |= PTR_UNTRUSTED;
   4533
   4534	if (atype == BPF_READ && value_regno >= 0)
   4535		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
   4536
   4537	return 0;
   4538}
   4539
   4540static int check_ptr_to_map_access(struct bpf_verifier_env *env,
   4541				   struct bpf_reg_state *regs,
   4542				   int regno, int off, int size,
   4543				   enum bpf_access_type atype,
   4544				   int value_regno)
   4545{
   4546	struct bpf_reg_state *reg = regs + regno;
   4547	struct bpf_map *map = reg->map_ptr;
   4548	enum bpf_type_flag flag = 0;
   4549	const struct btf_type *t;
   4550	const char *tname;
   4551	u32 btf_id;
   4552	int ret;
   4553
   4554	if (!btf_vmlinux) {
   4555		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
   4556		return -ENOTSUPP;
   4557	}
   4558
   4559	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
   4560		verbose(env, "map_ptr access not supported for map type %d\n",
   4561			map->map_type);
   4562		return -ENOTSUPP;
   4563	}
   4564
   4565	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
   4566	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
   4567
   4568	if (!env->allow_ptr_to_map_access) {
   4569		verbose(env,
   4570			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
   4571			tname);
   4572		return -EPERM;
   4573	}
   4574
   4575	if (off < 0) {
   4576		verbose(env, "R%d is %s invalid negative access: off=%d\n",
   4577			regno, tname, off);
   4578		return -EACCES;
   4579	}
   4580
   4581	if (atype != BPF_READ) {
   4582		verbose(env, "only read from %s is supported\n", tname);
   4583		return -EACCES;
   4584	}
   4585
   4586	ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
   4587	if (ret < 0)
   4588		return ret;
   4589
   4590	if (value_regno >= 0)
   4591		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
   4592
   4593	return 0;
   4594}
   4595
   4596/* Check that the stack access at the given offset is within bounds. The
   4597 * maximum valid offset is -1.
   4598 *
   4599 * The minimum valid offset is -MAX_BPF_STACK for writes, and
   4600 * -state->allocated_stack for reads.
   4601 */
   4602static int check_stack_slot_within_bounds(int off,
   4603					  struct bpf_func_state *state,
   4604					  enum bpf_access_type t)
   4605{
   4606	int min_valid_off;
   4607
   4608	if (t == BPF_WRITE)
   4609		min_valid_off = -MAX_BPF_STACK;
   4610	else
   4611		min_valid_off = -state->allocated_stack;
   4612
   4613	if (off < min_valid_off || off > -1)
   4614		return -EACCES;
   4615	return 0;
   4616}
   4617
   4618/* Check that the stack access at 'regno + off' falls within the maximum stack
   4619 * bounds.
   4620 *
   4621 * 'off' includes `regno->offset`, but not its dynamic part (if any).
   4622 */
   4623static int check_stack_access_within_bounds(
   4624		struct bpf_verifier_env *env,
   4625		int regno, int off, int access_size,
   4626		enum bpf_access_src src, enum bpf_access_type type)
   4627{
   4628	struct bpf_reg_state *regs = cur_regs(env);
   4629	struct bpf_reg_state *reg = regs + regno;
   4630	struct bpf_func_state *state = func(env, reg);
   4631	int min_off, max_off;
   4632	int err;
   4633	char *err_extra;
   4634
   4635	if (src == ACCESS_HELPER)
   4636		/* We don't know if helpers are reading or writing (or both). */
   4637		err_extra = " indirect access to";
   4638	else if (type == BPF_READ)
   4639		err_extra = " read from";
   4640	else
   4641		err_extra = " write to";
   4642
   4643	if (tnum_is_const(reg->var_off)) {
   4644		min_off = reg->var_off.value + off;
   4645		if (access_size > 0)
   4646			max_off = min_off + access_size - 1;
   4647		else
   4648			max_off = min_off;
   4649	} else {
   4650		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
   4651		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
   4652			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
   4653				err_extra, regno);
   4654			return -EACCES;
   4655		}
   4656		min_off = reg->smin_value + off;
   4657		if (access_size > 0)
   4658			max_off = reg->smax_value + off + access_size - 1;
   4659		else
   4660			max_off = min_off;
   4661	}
   4662
   4663	err = check_stack_slot_within_bounds(min_off, state, type);
   4664	if (!err)
   4665		err = check_stack_slot_within_bounds(max_off, state, type);
   4666
   4667	if (err) {
   4668		if (tnum_is_const(reg->var_off)) {
   4669			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
   4670				err_extra, regno, off, access_size);
   4671		} else {
   4672			char tn_buf[48];
   4673
   4674			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   4675			verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
   4676				err_extra, regno, tn_buf, access_size);
   4677		}
   4678	}
   4679	return err;
   4680}
   4681
   4682/* check whether memory at (regno + off) is accessible for t = (read | write)
   4683 * if t==write, value_regno is a register which value is stored into memory
   4684 * if t==read, value_regno is a register which will receive the value from memory
   4685 * if t==write && value_regno==-1, some unknown value is stored into memory
   4686 * if t==read && value_regno==-1, don't care what we read from memory
   4687 */
   4688static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
   4689			    int off, int bpf_size, enum bpf_access_type t,
   4690			    int value_regno, bool strict_alignment_once)
   4691{
   4692	struct bpf_reg_state *regs = cur_regs(env);
   4693	struct bpf_reg_state *reg = regs + regno;
   4694	struct bpf_func_state *state;
   4695	int size, err = 0;
   4696
   4697	size = bpf_size_to_bytes(bpf_size);
   4698	if (size < 0)
   4699		return size;
   4700
   4701	/* alignment checks will add in reg->off themselves */
   4702	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
   4703	if (err)
   4704		return err;
   4705
   4706	/* for access checks, reg->off is just part of off */
   4707	off += reg->off;
   4708
   4709	if (reg->type == PTR_TO_MAP_KEY) {
   4710		if (t == BPF_WRITE) {
   4711			verbose(env, "write to change key R%d not allowed\n", regno);
   4712			return -EACCES;
   4713		}
   4714
   4715		err = check_mem_region_access(env, regno, off, size,
   4716					      reg->map_ptr->key_size, false);
   4717		if (err)
   4718			return err;
   4719		if (value_regno >= 0)
   4720			mark_reg_unknown(env, regs, value_regno);
   4721	} else if (reg->type == PTR_TO_MAP_VALUE) {
   4722		struct bpf_map_value_off_desc *kptr_off_desc = NULL;
   4723
   4724		if (t == BPF_WRITE && value_regno >= 0 &&
   4725		    is_pointer_value(env, value_regno)) {
   4726			verbose(env, "R%d leaks addr into map\n", value_regno);
   4727			return -EACCES;
   4728		}
   4729		err = check_map_access_type(env, regno, off, size, t);
   4730		if (err)
   4731			return err;
   4732		err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
   4733		if (err)
   4734			return err;
   4735		if (tnum_is_const(reg->var_off))
   4736			kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr,
   4737								  off + reg->var_off.value);
   4738		if (kptr_off_desc) {
   4739			err = check_map_kptr_access(env, regno, value_regno, insn_idx,
   4740						    kptr_off_desc);
   4741		} else if (t == BPF_READ && value_regno >= 0) {
   4742			struct bpf_map *map = reg->map_ptr;
   4743
   4744			/* if map is read-only, track its contents as scalars */
   4745			if (tnum_is_const(reg->var_off) &&
   4746			    bpf_map_is_rdonly(map) &&
   4747			    map->ops->map_direct_value_addr) {
   4748				int map_off = off + reg->var_off.value;
   4749				u64 val = 0;
   4750
   4751				err = bpf_map_direct_read(map, map_off, size,
   4752							  &val);
   4753				if (err)
   4754					return err;
   4755
   4756				regs[value_regno].type = SCALAR_VALUE;
   4757				__mark_reg_known(&regs[value_regno], val);
   4758			} else {
   4759				mark_reg_unknown(env, regs, value_regno);
   4760			}
   4761		}
   4762	} else if (base_type(reg->type) == PTR_TO_MEM) {
   4763		bool rdonly_mem = type_is_rdonly_mem(reg->type);
   4764
   4765		if (type_may_be_null(reg->type)) {
   4766			verbose(env, "R%d invalid mem access '%s'\n", regno,
   4767				reg_type_str(env, reg->type));
   4768			return -EACCES;
   4769		}
   4770
   4771		if (t == BPF_WRITE && rdonly_mem) {
   4772			verbose(env, "R%d cannot write into %s\n",
   4773				regno, reg_type_str(env, reg->type));
   4774			return -EACCES;
   4775		}
   4776
   4777		if (t == BPF_WRITE && value_regno >= 0 &&
   4778		    is_pointer_value(env, value_regno)) {
   4779			verbose(env, "R%d leaks addr into mem\n", value_regno);
   4780			return -EACCES;
   4781		}
   4782
   4783		err = check_mem_region_access(env, regno, off, size,
   4784					      reg->mem_size, false);
   4785		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
   4786			mark_reg_unknown(env, regs, value_regno);
   4787	} else if (reg->type == PTR_TO_CTX) {
   4788		enum bpf_reg_type reg_type = SCALAR_VALUE;
   4789		struct btf *btf = NULL;
   4790		u32 btf_id = 0;
   4791
   4792		if (t == BPF_WRITE && value_regno >= 0 &&
   4793		    is_pointer_value(env, value_regno)) {
   4794			verbose(env, "R%d leaks addr into ctx\n", value_regno);
   4795			return -EACCES;
   4796		}
   4797
   4798		err = check_ptr_off_reg(env, reg, regno);
   4799		if (err < 0)
   4800			return err;
   4801
   4802		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
   4803				       &btf_id);
   4804		if (err)
   4805			verbose_linfo(env, insn_idx, "; ");
   4806		if (!err && t == BPF_READ && value_regno >= 0) {
   4807			/* ctx access returns either a scalar, or a
   4808			 * PTR_TO_PACKET[_META,_END]. In the latter
   4809			 * case, we know the offset is zero.
   4810			 */
   4811			if (reg_type == SCALAR_VALUE) {
   4812				mark_reg_unknown(env, regs, value_regno);
   4813			} else {
   4814				mark_reg_known_zero(env, regs,
   4815						    value_regno);
   4816				if (type_may_be_null(reg_type))
   4817					regs[value_regno].id = ++env->id_gen;
   4818				/* A load of ctx field could have different
   4819				 * actual load size with the one encoded in the
   4820				 * insn. When the dst is PTR, it is for sure not
   4821				 * a sub-register.
   4822				 */
   4823				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
   4824				if (base_type(reg_type) == PTR_TO_BTF_ID) {
   4825					regs[value_regno].btf = btf;
   4826					regs[value_regno].btf_id = btf_id;
   4827				}
   4828			}
   4829			regs[value_regno].type = reg_type;
   4830		}
   4831
   4832	} else if (reg->type == PTR_TO_STACK) {
   4833		/* Basic bounds checks. */
   4834		err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
   4835		if (err)
   4836			return err;
   4837
   4838		state = func(env, reg);
   4839		err = update_stack_depth(env, state, off);
   4840		if (err)
   4841			return err;
   4842
   4843		if (t == BPF_READ)
   4844			err = check_stack_read(env, regno, off, size,
   4845					       value_regno);
   4846		else
   4847			err = check_stack_write(env, regno, off, size,
   4848						value_regno, insn_idx);
   4849	} else if (reg_is_pkt_pointer(reg)) {
   4850		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
   4851			verbose(env, "cannot write into packet\n");
   4852			return -EACCES;
   4853		}
   4854		if (t == BPF_WRITE && value_regno >= 0 &&
   4855		    is_pointer_value(env, value_regno)) {
   4856			verbose(env, "R%d leaks addr into packet\n",
   4857				value_regno);
   4858			return -EACCES;
   4859		}
   4860		err = check_packet_access(env, regno, off, size, false);
   4861		if (!err && t == BPF_READ && value_regno >= 0)
   4862			mark_reg_unknown(env, regs, value_regno);
   4863	} else if (reg->type == PTR_TO_FLOW_KEYS) {
   4864		if (t == BPF_WRITE && value_regno >= 0 &&
   4865		    is_pointer_value(env, value_regno)) {
   4866			verbose(env, "R%d leaks addr into flow keys\n",
   4867				value_regno);
   4868			return -EACCES;
   4869		}
   4870
   4871		err = check_flow_keys_access(env, off, size);
   4872		if (!err && t == BPF_READ && value_regno >= 0)
   4873			mark_reg_unknown(env, regs, value_regno);
   4874	} else if (type_is_sk_pointer(reg->type)) {
   4875		if (t == BPF_WRITE) {
   4876			verbose(env, "R%d cannot write into %s\n",
   4877				regno, reg_type_str(env, reg->type));
   4878			return -EACCES;
   4879		}
   4880		err = check_sock_access(env, insn_idx, regno, off, size, t);
   4881		if (!err && value_regno >= 0)
   4882			mark_reg_unknown(env, regs, value_regno);
   4883	} else if (reg->type == PTR_TO_TP_BUFFER) {
   4884		err = check_tp_buffer_access(env, reg, regno, off, size);
   4885		if (!err && t == BPF_READ && value_regno >= 0)
   4886			mark_reg_unknown(env, regs, value_regno);
   4887	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
   4888		   !type_may_be_null(reg->type)) {
   4889		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
   4890					      value_regno);
   4891	} else if (reg->type == CONST_PTR_TO_MAP) {
   4892		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
   4893					      value_regno);
   4894	} else if (base_type(reg->type) == PTR_TO_BUF) {
   4895		bool rdonly_mem = type_is_rdonly_mem(reg->type);
   4896		u32 *max_access;
   4897
   4898		if (rdonly_mem) {
   4899			if (t == BPF_WRITE) {
   4900				verbose(env, "R%d cannot write into %s\n",
   4901					regno, reg_type_str(env, reg->type));
   4902				return -EACCES;
   4903			}
   4904			max_access = &env->prog->aux->max_rdonly_access;
   4905		} else {
   4906			max_access = &env->prog->aux->max_rdwr_access;
   4907		}
   4908
   4909		err = check_buffer_access(env, reg, regno, off, size, false,
   4910					  max_access);
   4911
   4912		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
   4913			mark_reg_unknown(env, regs, value_regno);
   4914	} else {
   4915		verbose(env, "R%d invalid mem access '%s'\n", regno,
   4916			reg_type_str(env, reg->type));
   4917		return -EACCES;
   4918	}
   4919
   4920	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
   4921	    regs[value_regno].type == SCALAR_VALUE) {
   4922		/* b/h/w load zero-extends, mark upper bits as known 0 */
   4923		coerce_reg_to_size(&regs[value_regno], size);
   4924	}
   4925	return err;
   4926}
   4927
   4928static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
   4929{
   4930	int load_reg;
   4931	int err;
   4932
   4933	switch (insn->imm) {
   4934	case BPF_ADD:
   4935	case BPF_ADD | BPF_FETCH:
   4936	case BPF_AND:
   4937	case BPF_AND | BPF_FETCH:
   4938	case BPF_OR:
   4939	case BPF_OR | BPF_FETCH:
   4940	case BPF_XOR:
   4941	case BPF_XOR | BPF_FETCH:
   4942	case BPF_XCHG:
   4943	case BPF_CMPXCHG:
   4944		break;
   4945	default:
   4946		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
   4947		return -EINVAL;
   4948	}
   4949
   4950	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
   4951		verbose(env, "invalid atomic operand size\n");
   4952		return -EINVAL;
   4953	}
   4954
   4955	/* check src1 operand */
   4956	err = check_reg_arg(env, insn->src_reg, SRC_OP);
   4957	if (err)
   4958		return err;
   4959
   4960	/* check src2 operand */
   4961	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   4962	if (err)
   4963		return err;
   4964
   4965	if (insn->imm == BPF_CMPXCHG) {
   4966		/* Check comparison of R0 with memory location */
   4967		const u32 aux_reg = BPF_REG_0;
   4968
   4969		err = check_reg_arg(env, aux_reg, SRC_OP);
   4970		if (err)
   4971			return err;
   4972
   4973		if (is_pointer_value(env, aux_reg)) {
   4974			verbose(env, "R%d leaks addr into mem\n", aux_reg);
   4975			return -EACCES;
   4976		}
   4977	}
   4978
   4979	if (is_pointer_value(env, insn->src_reg)) {
   4980		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
   4981		return -EACCES;
   4982	}
   4983
   4984	if (is_ctx_reg(env, insn->dst_reg) ||
   4985	    is_pkt_reg(env, insn->dst_reg) ||
   4986	    is_flow_key_reg(env, insn->dst_reg) ||
   4987	    is_sk_reg(env, insn->dst_reg)) {
   4988		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
   4989			insn->dst_reg,
   4990			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
   4991		return -EACCES;
   4992	}
   4993
   4994	if (insn->imm & BPF_FETCH) {
   4995		if (insn->imm == BPF_CMPXCHG)
   4996			load_reg = BPF_REG_0;
   4997		else
   4998			load_reg = insn->src_reg;
   4999
   5000		/* check and record load of old value */
   5001		err = check_reg_arg(env, load_reg, DST_OP);
   5002		if (err)
   5003			return err;
   5004	} else {
   5005		/* This instruction accesses a memory location but doesn't
   5006		 * actually load it into a register.
   5007		 */
   5008		load_reg = -1;
   5009	}
   5010
   5011	/* Check whether we can read the memory, with second call for fetch
   5012	 * case to simulate the register fill.
   5013	 */
   5014	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
   5015			       BPF_SIZE(insn->code), BPF_READ, -1, true);
   5016	if (!err && load_reg >= 0)
   5017		err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
   5018				       BPF_SIZE(insn->code), BPF_READ, load_reg,
   5019				       true);
   5020	if (err)
   5021		return err;
   5022
   5023	/* Check whether we can write into the same memory. */
   5024	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
   5025			       BPF_SIZE(insn->code), BPF_WRITE, -1, true);
   5026	if (err)
   5027		return err;
   5028
   5029	return 0;
   5030}
   5031
   5032/* When register 'regno' is used to read the stack (either directly or through
   5033 * a helper function) make sure that it's within stack boundary and, depending
   5034 * on the access type, that all elements of the stack are initialized.
   5035 *
   5036 * 'off' includes 'regno->off', but not its dynamic part (if any).
   5037 *
   5038 * All registers that have been spilled on the stack in the slots within the
   5039 * read offsets are marked as read.
   5040 */
   5041static int check_stack_range_initialized(
   5042		struct bpf_verifier_env *env, int regno, int off,
   5043		int access_size, bool zero_size_allowed,
   5044		enum bpf_access_src type, struct bpf_call_arg_meta *meta)
   5045{
   5046	struct bpf_reg_state *reg = reg_state(env, regno);
   5047	struct bpf_func_state *state = func(env, reg);
   5048	int err, min_off, max_off, i, j, slot, spi;
   5049	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
   5050	enum bpf_access_type bounds_check_type;
   5051	/* Some accesses can write anything into the stack, others are
   5052	 * read-only.
   5053	 */
   5054	bool clobber = false;
   5055
   5056	if (access_size == 0 && !zero_size_allowed) {
   5057		verbose(env, "invalid zero-sized read\n");
   5058		return -EACCES;
   5059	}
   5060
   5061	if (type == ACCESS_HELPER) {
   5062		/* The bounds checks for writes are more permissive than for
   5063		 * reads. However, if raw_mode is not set, we'll do extra
   5064		 * checks below.
   5065		 */
   5066		bounds_check_type = BPF_WRITE;
   5067		clobber = true;
   5068	} else {
   5069		bounds_check_type = BPF_READ;
   5070	}
   5071	err = check_stack_access_within_bounds(env, regno, off, access_size,
   5072					       type, bounds_check_type);
   5073	if (err)
   5074		return err;
   5075
   5076
   5077	if (tnum_is_const(reg->var_off)) {
   5078		min_off = max_off = reg->var_off.value + off;
   5079	} else {
   5080		/* Variable offset is prohibited for unprivileged mode for
   5081		 * simplicity since it requires corresponding support in
   5082		 * Spectre masking for stack ALU.
   5083		 * See also retrieve_ptr_limit().
   5084		 */
   5085		if (!env->bypass_spec_v1) {
   5086			char tn_buf[48];
   5087
   5088			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   5089			verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
   5090				regno, err_extra, tn_buf);
   5091			return -EACCES;
   5092		}
   5093		/* Only initialized buffer on stack is allowed to be accessed
   5094		 * with variable offset. With uninitialized buffer it's hard to
   5095		 * guarantee that whole memory is marked as initialized on
   5096		 * helper return since specific bounds are unknown what may
   5097		 * cause uninitialized stack leaking.
   5098		 */
   5099		if (meta && meta->raw_mode)
   5100			meta = NULL;
   5101
   5102		min_off = reg->smin_value + off;
   5103		max_off = reg->smax_value + off;
   5104	}
   5105
   5106	if (meta && meta->raw_mode) {
   5107		meta->access_size = access_size;
   5108		meta->regno = regno;
   5109		return 0;
   5110	}
   5111
   5112	for (i = min_off; i < max_off + access_size; i++) {
   5113		u8 *stype;
   5114
   5115		slot = -i - 1;
   5116		spi = slot / BPF_REG_SIZE;
   5117		if (state->allocated_stack <= slot)
   5118			goto err;
   5119		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
   5120		if (*stype == STACK_MISC)
   5121			goto mark;
   5122		if (*stype == STACK_ZERO) {
   5123			if (clobber) {
   5124				/* helper can write anything into the stack */
   5125				*stype = STACK_MISC;
   5126			}
   5127			goto mark;
   5128		}
   5129
   5130		if (is_spilled_reg(&state->stack[spi]) &&
   5131		    base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
   5132			goto mark;
   5133
   5134		if (is_spilled_reg(&state->stack[spi]) &&
   5135		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
   5136		     env->allow_ptr_leaks)) {
   5137			if (clobber) {
   5138				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
   5139				for (j = 0; j < BPF_REG_SIZE; j++)
   5140					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
   5141			}
   5142			goto mark;
   5143		}
   5144
   5145err:
   5146		if (tnum_is_const(reg->var_off)) {
   5147			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
   5148				err_extra, regno, min_off, i - min_off, access_size);
   5149		} else {
   5150			char tn_buf[48];
   5151
   5152			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   5153			verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
   5154				err_extra, regno, tn_buf, i - min_off, access_size);
   5155		}
   5156		return -EACCES;
   5157mark:
   5158		/* reading any byte out of 8-byte 'spill_slot' will cause
   5159		 * the whole slot to be marked as 'read'
   5160		 */
   5161		mark_reg_read(env, &state->stack[spi].spilled_ptr,
   5162			      state->stack[spi].spilled_ptr.parent,
   5163			      REG_LIVE_READ64);
   5164	}
   5165	return update_stack_depth(env, state, min_off);
   5166}
   5167
   5168static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
   5169				   int access_size, bool zero_size_allowed,
   5170				   struct bpf_call_arg_meta *meta)
   5171{
   5172	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   5173	u32 *max_access;
   5174
   5175	switch (base_type(reg->type)) {
   5176	case PTR_TO_PACKET:
   5177	case PTR_TO_PACKET_META:
   5178		return check_packet_access(env, regno, reg->off, access_size,
   5179					   zero_size_allowed);
   5180	case PTR_TO_MAP_KEY:
   5181		if (meta && meta->raw_mode) {
   5182			verbose(env, "R%d cannot write into %s\n", regno,
   5183				reg_type_str(env, reg->type));
   5184			return -EACCES;
   5185		}
   5186		return check_mem_region_access(env, regno, reg->off, access_size,
   5187					       reg->map_ptr->key_size, false);
   5188	case PTR_TO_MAP_VALUE:
   5189		if (check_map_access_type(env, regno, reg->off, access_size,
   5190					  meta && meta->raw_mode ? BPF_WRITE :
   5191					  BPF_READ))
   5192			return -EACCES;
   5193		return check_map_access(env, regno, reg->off, access_size,
   5194					zero_size_allowed, ACCESS_HELPER);
   5195	case PTR_TO_MEM:
   5196		if (type_is_rdonly_mem(reg->type)) {
   5197			if (meta && meta->raw_mode) {
   5198				verbose(env, "R%d cannot write into %s\n", regno,
   5199					reg_type_str(env, reg->type));
   5200				return -EACCES;
   5201			}
   5202		}
   5203		return check_mem_region_access(env, regno, reg->off,
   5204					       access_size, reg->mem_size,
   5205					       zero_size_allowed);
   5206	case PTR_TO_BUF:
   5207		if (type_is_rdonly_mem(reg->type)) {
   5208			if (meta && meta->raw_mode) {
   5209				verbose(env, "R%d cannot write into %s\n", regno,
   5210					reg_type_str(env, reg->type));
   5211				return -EACCES;
   5212			}
   5213
   5214			max_access = &env->prog->aux->max_rdonly_access;
   5215		} else {
   5216			max_access = &env->prog->aux->max_rdwr_access;
   5217		}
   5218		return check_buffer_access(env, reg, regno, reg->off,
   5219					   access_size, zero_size_allowed,
   5220					   max_access);
   5221	case PTR_TO_STACK:
   5222		return check_stack_range_initialized(
   5223				env,
   5224				regno, reg->off, access_size,
   5225				zero_size_allowed, ACCESS_HELPER, meta);
   5226	default: /* scalar_value or invalid ptr */
   5227		/* Allow zero-byte read from NULL, regardless of pointer type */
   5228		if (zero_size_allowed && access_size == 0 &&
   5229		    register_is_null(reg))
   5230			return 0;
   5231
   5232		verbose(env, "R%d type=%s ", regno,
   5233			reg_type_str(env, reg->type));
   5234		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
   5235		return -EACCES;
   5236	}
   5237}
   5238
   5239static int check_mem_size_reg(struct bpf_verifier_env *env,
   5240			      struct bpf_reg_state *reg, u32 regno,
   5241			      bool zero_size_allowed,
   5242			      struct bpf_call_arg_meta *meta)
   5243{
   5244	int err;
   5245
   5246	/* This is used to refine r0 return value bounds for helpers
   5247	 * that enforce this value as an upper bound on return values.
   5248	 * See do_refine_retval_range() for helpers that can refine
   5249	 * the return value. C type of helper is u32 so we pull register
   5250	 * bound from umax_value however, if negative verifier errors
   5251	 * out. Only upper bounds can be learned because retval is an
   5252	 * int type and negative retvals are allowed.
   5253	 */
   5254	meta->msize_max_value = reg->umax_value;
   5255
   5256	/* The register is SCALAR_VALUE; the access check
   5257	 * happens using its boundaries.
   5258	 */
   5259	if (!tnum_is_const(reg->var_off))
   5260		/* For unprivileged variable accesses, disable raw
   5261		 * mode so that the program is required to
   5262		 * initialize all the memory that the helper could
   5263		 * just partially fill up.
   5264		 */
   5265		meta = NULL;
   5266
   5267	if (reg->smin_value < 0) {
   5268		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
   5269			regno);
   5270		return -EACCES;
   5271	}
   5272
   5273	if (reg->umin_value == 0) {
   5274		err = check_helper_mem_access(env, regno - 1, 0,
   5275					      zero_size_allowed,
   5276					      meta);
   5277		if (err)
   5278			return err;
   5279	}
   5280
   5281	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
   5282		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
   5283			regno);
   5284		return -EACCES;
   5285	}
   5286	err = check_helper_mem_access(env, regno - 1,
   5287				      reg->umax_value,
   5288				      zero_size_allowed, meta);
   5289	if (!err)
   5290		err = mark_chain_precision(env, regno);
   5291	return err;
   5292}
   5293
   5294int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
   5295		   u32 regno, u32 mem_size)
   5296{
   5297	bool may_be_null = type_may_be_null(reg->type);
   5298	struct bpf_reg_state saved_reg;
   5299	struct bpf_call_arg_meta meta;
   5300	int err;
   5301
   5302	if (register_is_null(reg))
   5303		return 0;
   5304
   5305	memset(&meta, 0, sizeof(meta));
   5306	/* Assuming that the register contains a value check if the memory
   5307	 * access is safe. Temporarily save and restore the register's state as
   5308	 * the conversion shouldn't be visible to a caller.
   5309	 */
   5310	if (may_be_null) {
   5311		saved_reg = *reg;
   5312		mark_ptr_not_null_reg(reg);
   5313	}
   5314
   5315	err = check_helper_mem_access(env, regno, mem_size, true, &meta);
   5316	/* Check access for BPF_WRITE */
   5317	meta.raw_mode = true;
   5318	err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
   5319
   5320	if (may_be_null)
   5321		*reg = saved_reg;
   5322
   5323	return err;
   5324}
   5325
   5326int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
   5327			     u32 regno)
   5328{
   5329	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
   5330	bool may_be_null = type_may_be_null(mem_reg->type);
   5331	struct bpf_reg_state saved_reg;
   5332	struct bpf_call_arg_meta meta;
   5333	int err;
   5334
   5335	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
   5336
   5337	memset(&meta, 0, sizeof(meta));
   5338
   5339	if (may_be_null) {
   5340		saved_reg = *mem_reg;
   5341		mark_ptr_not_null_reg(mem_reg);
   5342	}
   5343
   5344	err = check_mem_size_reg(env, reg, regno, true, &meta);
   5345	/* Check access for BPF_WRITE */
   5346	meta.raw_mode = true;
   5347	err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
   5348
   5349	if (may_be_null)
   5350		*mem_reg = saved_reg;
   5351	return err;
   5352}
   5353
   5354/* Implementation details:
   5355 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
   5356 * Two bpf_map_lookups (even with the same key) will have different reg->id.
   5357 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
   5358 * value_or_null->value transition, since the verifier only cares about
   5359 * the range of access to valid map value pointer and doesn't care about actual
   5360 * address of the map element.
   5361 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
   5362 * reg->id > 0 after value_or_null->value transition. By doing so
   5363 * two bpf_map_lookups will be considered two different pointers that
   5364 * point to different bpf_spin_locks.
   5365 * The verifier allows taking only one bpf_spin_lock at a time to avoid
   5366 * dead-locks.
   5367 * Since only one bpf_spin_lock is allowed the checks are simpler than
   5368 * reg_is_refcounted() logic. The verifier needs to remember only
   5369 * one spin_lock instead of array of acquired_refs.
   5370 * cur_state->active_spin_lock remembers which map value element got locked
   5371 * and clears it after bpf_spin_unlock.
   5372 */
   5373static int process_spin_lock(struct bpf_verifier_env *env, int regno,
   5374			     bool is_lock)
   5375{
   5376	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   5377	struct bpf_verifier_state *cur = env->cur_state;
   5378	bool is_const = tnum_is_const(reg->var_off);
   5379	struct bpf_map *map = reg->map_ptr;
   5380	u64 val = reg->var_off.value;
   5381
   5382	if (!is_const) {
   5383		verbose(env,
   5384			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
   5385			regno);
   5386		return -EINVAL;
   5387	}
   5388	if (!map->btf) {
   5389		verbose(env,
   5390			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
   5391			map->name);
   5392		return -EINVAL;
   5393	}
   5394	if (!map_value_has_spin_lock(map)) {
   5395		if (map->spin_lock_off == -E2BIG)
   5396			verbose(env,
   5397				"map '%s' has more than one 'struct bpf_spin_lock'\n",
   5398				map->name);
   5399		else if (map->spin_lock_off == -ENOENT)
   5400			verbose(env,
   5401				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
   5402				map->name);
   5403		else
   5404			verbose(env,
   5405				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
   5406				map->name);
   5407		return -EINVAL;
   5408	}
   5409	if (map->spin_lock_off != val + reg->off) {
   5410		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
   5411			val + reg->off);
   5412		return -EINVAL;
   5413	}
   5414	if (is_lock) {
   5415		if (cur->active_spin_lock) {
   5416			verbose(env,
   5417				"Locking two bpf_spin_locks are not allowed\n");
   5418			return -EINVAL;
   5419		}
   5420		cur->active_spin_lock = reg->id;
   5421	} else {
   5422		if (!cur->active_spin_lock) {
   5423			verbose(env, "bpf_spin_unlock without taking a lock\n");
   5424			return -EINVAL;
   5425		}
   5426		if (cur->active_spin_lock != reg->id) {
   5427			verbose(env, "bpf_spin_unlock of different lock\n");
   5428			return -EINVAL;
   5429		}
   5430		cur->active_spin_lock = 0;
   5431	}
   5432	return 0;
   5433}
   5434
   5435static int process_timer_func(struct bpf_verifier_env *env, int regno,
   5436			      struct bpf_call_arg_meta *meta)
   5437{
   5438	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   5439	bool is_const = tnum_is_const(reg->var_off);
   5440	struct bpf_map *map = reg->map_ptr;
   5441	u64 val = reg->var_off.value;
   5442
   5443	if (!is_const) {
   5444		verbose(env,
   5445			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
   5446			regno);
   5447		return -EINVAL;
   5448	}
   5449	if (!map->btf) {
   5450		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
   5451			map->name);
   5452		return -EINVAL;
   5453	}
   5454	if (!map_value_has_timer(map)) {
   5455		if (map->timer_off == -E2BIG)
   5456			verbose(env,
   5457				"map '%s' has more than one 'struct bpf_timer'\n",
   5458				map->name);
   5459		else if (map->timer_off == -ENOENT)
   5460			verbose(env,
   5461				"map '%s' doesn't have 'struct bpf_timer'\n",
   5462				map->name);
   5463		else
   5464			verbose(env,
   5465				"map '%s' is not a struct type or bpf_timer is mangled\n",
   5466				map->name);
   5467		return -EINVAL;
   5468	}
   5469	if (map->timer_off != val + reg->off) {
   5470		verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
   5471			val + reg->off, map->timer_off);
   5472		return -EINVAL;
   5473	}
   5474	if (meta->map_ptr) {
   5475		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
   5476		return -EFAULT;
   5477	}
   5478	meta->map_uid = reg->map_uid;
   5479	meta->map_ptr = map;
   5480	return 0;
   5481}
   5482
   5483static int process_kptr_func(struct bpf_verifier_env *env, int regno,
   5484			     struct bpf_call_arg_meta *meta)
   5485{
   5486	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   5487	struct bpf_map_value_off_desc *off_desc;
   5488	struct bpf_map *map_ptr = reg->map_ptr;
   5489	u32 kptr_off;
   5490	int ret;
   5491
   5492	if (!tnum_is_const(reg->var_off)) {
   5493		verbose(env,
   5494			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
   5495			regno);
   5496		return -EINVAL;
   5497	}
   5498	if (!map_ptr->btf) {
   5499		verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
   5500			map_ptr->name);
   5501		return -EINVAL;
   5502	}
   5503	if (!map_value_has_kptrs(map_ptr)) {
   5504		ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab);
   5505		if (ret == -E2BIG)
   5506			verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name,
   5507				BPF_MAP_VALUE_OFF_MAX);
   5508		else if (ret == -EEXIST)
   5509			verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name);
   5510		else
   5511			verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
   5512		return -EINVAL;
   5513	}
   5514
   5515	meta->map_ptr = map_ptr;
   5516	kptr_off = reg->off + reg->var_off.value;
   5517	off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off);
   5518	if (!off_desc) {
   5519		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
   5520		return -EACCES;
   5521	}
   5522	if (off_desc->type != BPF_KPTR_REF) {
   5523		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
   5524		return -EACCES;
   5525	}
   5526	meta->kptr_off_desc = off_desc;
   5527	return 0;
   5528}
   5529
   5530static bool arg_type_is_mem_size(enum bpf_arg_type type)
   5531{
   5532	return type == ARG_CONST_SIZE ||
   5533	       type == ARG_CONST_SIZE_OR_ZERO;
   5534}
   5535
   5536static bool arg_type_is_alloc_size(enum bpf_arg_type type)
   5537{
   5538	return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
   5539}
   5540
   5541static bool arg_type_is_int_ptr(enum bpf_arg_type type)
   5542{
   5543	return type == ARG_PTR_TO_INT ||
   5544	       type == ARG_PTR_TO_LONG;
   5545}
   5546
   5547static bool arg_type_is_release(enum bpf_arg_type type)
   5548{
   5549	return type & OBJ_RELEASE;
   5550}
   5551
   5552static bool arg_type_is_dynptr(enum bpf_arg_type type)
   5553{
   5554	return base_type(type) == ARG_PTR_TO_DYNPTR;
   5555}
   5556
   5557static int int_ptr_type_to_size(enum bpf_arg_type type)
   5558{
   5559	if (type == ARG_PTR_TO_INT)
   5560		return sizeof(u32);
   5561	else if (type == ARG_PTR_TO_LONG)
   5562		return sizeof(u64);
   5563
   5564	return -EINVAL;
   5565}
   5566
   5567static int resolve_map_arg_type(struct bpf_verifier_env *env,
   5568				 const struct bpf_call_arg_meta *meta,
   5569				 enum bpf_arg_type *arg_type)
   5570{
   5571	if (!meta->map_ptr) {
   5572		/* kernel subsystem misconfigured verifier */
   5573		verbose(env, "invalid map_ptr to access map->type\n");
   5574		return -EACCES;
   5575	}
   5576
   5577	switch (meta->map_ptr->map_type) {
   5578	case BPF_MAP_TYPE_SOCKMAP:
   5579	case BPF_MAP_TYPE_SOCKHASH:
   5580		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
   5581			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
   5582		} else {
   5583			verbose(env, "invalid arg_type for sockmap/sockhash\n");
   5584			return -EINVAL;
   5585		}
   5586		break;
   5587	case BPF_MAP_TYPE_BLOOM_FILTER:
   5588		if (meta->func_id == BPF_FUNC_map_peek_elem)
   5589			*arg_type = ARG_PTR_TO_MAP_VALUE;
   5590		break;
   5591	default:
   5592		break;
   5593	}
   5594	return 0;
   5595}
   5596
   5597struct bpf_reg_types {
   5598	const enum bpf_reg_type types[10];
   5599	u32 *btf_id;
   5600};
   5601
   5602static const struct bpf_reg_types map_key_value_types = {
   5603	.types = {
   5604		PTR_TO_STACK,
   5605		PTR_TO_PACKET,
   5606		PTR_TO_PACKET_META,
   5607		PTR_TO_MAP_KEY,
   5608		PTR_TO_MAP_VALUE,
   5609	},
   5610};
   5611
   5612static const struct bpf_reg_types sock_types = {
   5613	.types = {
   5614		PTR_TO_SOCK_COMMON,
   5615		PTR_TO_SOCKET,
   5616		PTR_TO_TCP_SOCK,
   5617		PTR_TO_XDP_SOCK,
   5618	},
   5619};
   5620
   5621#ifdef CONFIG_NET
   5622static const struct bpf_reg_types btf_id_sock_common_types = {
   5623	.types = {
   5624		PTR_TO_SOCK_COMMON,
   5625		PTR_TO_SOCKET,
   5626		PTR_TO_TCP_SOCK,
   5627		PTR_TO_XDP_SOCK,
   5628		PTR_TO_BTF_ID,
   5629	},
   5630	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
   5631};
   5632#endif
   5633
   5634static const struct bpf_reg_types mem_types = {
   5635	.types = {
   5636		PTR_TO_STACK,
   5637		PTR_TO_PACKET,
   5638		PTR_TO_PACKET_META,
   5639		PTR_TO_MAP_KEY,
   5640		PTR_TO_MAP_VALUE,
   5641		PTR_TO_MEM,
   5642		PTR_TO_MEM | MEM_ALLOC,
   5643		PTR_TO_BUF,
   5644	},
   5645};
   5646
   5647static const struct bpf_reg_types int_ptr_types = {
   5648	.types = {
   5649		PTR_TO_STACK,
   5650		PTR_TO_PACKET,
   5651		PTR_TO_PACKET_META,
   5652		PTR_TO_MAP_KEY,
   5653		PTR_TO_MAP_VALUE,
   5654	},
   5655};
   5656
   5657static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
   5658static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
   5659static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
   5660static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
   5661static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
   5662static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
   5663static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
   5664static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
   5665static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
   5666static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
   5667static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
   5668static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
   5669static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
   5670
   5671static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
   5672	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
   5673	[ARG_PTR_TO_MAP_VALUE]		= &map_key_value_types,
   5674	[ARG_CONST_SIZE]		= &scalar_types,
   5675	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
   5676	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
   5677	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
   5678	[ARG_PTR_TO_CTX]		= &context_types,
   5679	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
   5680#ifdef CONFIG_NET
   5681	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
   5682#endif
   5683	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
   5684	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
   5685	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
   5686	[ARG_PTR_TO_MEM]		= &mem_types,
   5687	[ARG_PTR_TO_ALLOC_MEM]		= &alloc_mem_types,
   5688	[ARG_PTR_TO_INT]		= &int_ptr_types,
   5689	[ARG_PTR_TO_LONG]		= &int_ptr_types,
   5690	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
   5691	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
   5692	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
   5693	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
   5694	[ARG_PTR_TO_TIMER]		= &timer_types,
   5695	[ARG_PTR_TO_KPTR]		= &kptr_types,
   5696	[ARG_PTR_TO_DYNPTR]		= &stack_ptr_types,
   5697};
   5698
   5699static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
   5700			  enum bpf_arg_type arg_type,
   5701			  const u32 *arg_btf_id,
   5702			  struct bpf_call_arg_meta *meta)
   5703{
   5704	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   5705	enum bpf_reg_type expected, type = reg->type;
   5706	const struct bpf_reg_types *compatible;
   5707	int i, j;
   5708
   5709	compatible = compatible_reg_types[base_type(arg_type)];
   5710	if (!compatible) {
   5711		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
   5712		return -EFAULT;
   5713	}
   5714
   5715	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
   5716	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
   5717	 *
   5718	 * Same for MAYBE_NULL:
   5719	 *
   5720	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
   5721	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
   5722	 *
   5723	 * Therefore we fold these flags depending on the arg_type before comparison.
   5724	 */
   5725	if (arg_type & MEM_RDONLY)
   5726		type &= ~MEM_RDONLY;
   5727	if (arg_type & PTR_MAYBE_NULL)
   5728		type &= ~PTR_MAYBE_NULL;
   5729
   5730	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
   5731		expected = compatible->types[i];
   5732		if (expected == NOT_INIT)
   5733			break;
   5734
   5735		if (type == expected)
   5736			goto found;
   5737	}
   5738
   5739	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
   5740	for (j = 0; j + 1 < i; j++)
   5741		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
   5742	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
   5743	return -EACCES;
   5744
   5745found:
   5746	if (reg->type == PTR_TO_BTF_ID) {
   5747		/* For bpf_sk_release, it needs to match against first member
   5748		 * 'struct sock_common', hence make an exception for it. This
   5749		 * allows bpf_sk_release to work for multiple socket types.
   5750		 */
   5751		bool strict_type_match = arg_type_is_release(arg_type) &&
   5752					 meta->func_id != BPF_FUNC_sk_release;
   5753
   5754		if (!arg_btf_id) {
   5755			if (!compatible->btf_id) {
   5756				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
   5757				return -EFAULT;
   5758			}
   5759			arg_btf_id = compatible->btf_id;
   5760		}
   5761
   5762		if (meta->func_id == BPF_FUNC_kptr_xchg) {
   5763			if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno))
   5764				return -EACCES;
   5765		} else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
   5766						 btf_vmlinux, *arg_btf_id,
   5767						 strict_type_match)) {
   5768			verbose(env, "R%d is of type %s but %s is expected\n",
   5769				regno, kernel_type_name(reg->btf, reg->btf_id),
   5770				kernel_type_name(btf_vmlinux, *arg_btf_id));
   5771			return -EACCES;
   5772		}
   5773	}
   5774
   5775	return 0;
   5776}
   5777
   5778int check_func_arg_reg_off(struct bpf_verifier_env *env,
   5779			   const struct bpf_reg_state *reg, int regno,
   5780			   enum bpf_arg_type arg_type)
   5781{
   5782	enum bpf_reg_type type = reg->type;
   5783	bool fixed_off_ok = false;
   5784
   5785	switch ((u32)type) {
   5786	/* Pointer types where reg offset is explicitly allowed: */
   5787	case PTR_TO_STACK:
   5788		if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
   5789			verbose(env, "cannot pass in dynptr at an offset\n");
   5790			return -EINVAL;
   5791		}
   5792		fallthrough;
   5793	case PTR_TO_PACKET:
   5794	case PTR_TO_PACKET_META:
   5795	case PTR_TO_MAP_KEY:
   5796	case PTR_TO_MAP_VALUE:
   5797	case PTR_TO_MEM:
   5798	case PTR_TO_MEM | MEM_RDONLY:
   5799	case PTR_TO_MEM | MEM_ALLOC:
   5800	case PTR_TO_BUF:
   5801	case PTR_TO_BUF | MEM_RDONLY:
   5802	case SCALAR_VALUE:
   5803		/* Some of the argument types nevertheless require a
   5804		 * zero register offset.
   5805		 */
   5806		if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
   5807			return 0;
   5808		break;
   5809	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
   5810	 * fixed offset.
   5811	 */
   5812	case PTR_TO_BTF_ID:
   5813		/* When referenced PTR_TO_BTF_ID is passed to release function,
   5814		 * it's fixed offset must be 0.	In the other cases, fixed offset
   5815		 * can be non-zero.
   5816		 */
   5817		if (arg_type_is_release(arg_type) && reg->off) {
   5818			verbose(env, "R%d must have zero offset when passed to release func\n",
   5819				regno);
   5820			return -EINVAL;
   5821		}
   5822		/* For arg is release pointer, fixed_off_ok must be false, but
   5823		 * we already checked and rejected reg->off != 0 above, so set
   5824		 * to true to allow fixed offset for all other cases.
   5825		 */
   5826		fixed_off_ok = true;
   5827		break;
   5828	default:
   5829		break;
   5830	}
   5831	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
   5832}
   5833
   5834static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
   5835{
   5836	struct bpf_func_state *state = func(env, reg);
   5837	int spi = get_spi(reg->off);
   5838
   5839	return state->stack[spi].spilled_ptr.id;
   5840}
   5841
   5842static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
   5843			  struct bpf_call_arg_meta *meta,
   5844			  const struct bpf_func_proto *fn)
   5845{
   5846	u32 regno = BPF_REG_1 + arg;
   5847	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
   5848	enum bpf_arg_type arg_type = fn->arg_type[arg];
   5849	enum bpf_reg_type type = reg->type;
   5850	int err = 0;
   5851
   5852	if (arg_type == ARG_DONTCARE)
   5853		return 0;
   5854
   5855	err = check_reg_arg(env, regno, SRC_OP);
   5856	if (err)
   5857		return err;
   5858
   5859	if (arg_type == ARG_ANYTHING) {
   5860		if (is_pointer_value(env, regno)) {
   5861			verbose(env, "R%d leaks addr into helper function\n",
   5862				regno);
   5863			return -EACCES;
   5864		}
   5865		return 0;
   5866	}
   5867
   5868	if (type_is_pkt_pointer(type) &&
   5869	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
   5870		verbose(env, "helper access to the packet is not allowed\n");
   5871		return -EACCES;
   5872	}
   5873
   5874	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
   5875		err = resolve_map_arg_type(env, meta, &arg_type);
   5876		if (err)
   5877			return err;
   5878	}
   5879
   5880	if (register_is_null(reg) && type_may_be_null(arg_type))
   5881		/* A NULL register has a SCALAR_VALUE type, so skip
   5882		 * type checking.
   5883		 */
   5884		goto skip_type_check;
   5885
   5886	err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta);
   5887	if (err)
   5888		return err;
   5889
   5890	err = check_func_arg_reg_off(env, reg, regno, arg_type);
   5891	if (err)
   5892		return err;
   5893
   5894skip_type_check:
   5895	if (arg_type_is_release(arg_type)) {
   5896		if (arg_type_is_dynptr(arg_type)) {
   5897			struct bpf_func_state *state = func(env, reg);
   5898			int spi = get_spi(reg->off);
   5899
   5900			if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
   5901			    !state->stack[spi].spilled_ptr.id) {
   5902				verbose(env, "arg %d is an unacquired reference\n", regno);
   5903				return -EINVAL;
   5904			}
   5905		} else if (!reg->ref_obj_id && !register_is_null(reg)) {
   5906			verbose(env, "R%d must be referenced when passed to release function\n",
   5907				regno);
   5908			return -EINVAL;
   5909		}
   5910		if (meta->release_regno) {
   5911			verbose(env, "verifier internal error: more than one release argument\n");
   5912			return -EFAULT;
   5913		}
   5914		meta->release_regno = regno;
   5915	}
   5916
   5917	if (reg->ref_obj_id) {
   5918		if (meta->ref_obj_id) {
   5919			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
   5920				regno, reg->ref_obj_id,
   5921				meta->ref_obj_id);
   5922			return -EFAULT;
   5923		}
   5924		meta->ref_obj_id = reg->ref_obj_id;
   5925	}
   5926
   5927	if (arg_type == ARG_CONST_MAP_PTR) {
   5928		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
   5929		if (meta->map_ptr) {
   5930			/* Use map_uid (which is unique id of inner map) to reject:
   5931			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
   5932			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
   5933			 * if (inner_map1 && inner_map2) {
   5934			 *     timer = bpf_map_lookup_elem(inner_map1);
   5935			 *     if (timer)
   5936			 *         // mismatch would have been allowed
   5937			 *         bpf_timer_init(timer, inner_map2);
   5938			 * }
   5939			 *
   5940			 * Comparing map_ptr is enough to distinguish normal and outer maps.
   5941			 */
   5942			if (meta->map_ptr != reg->map_ptr ||
   5943			    meta->map_uid != reg->map_uid) {
   5944				verbose(env,
   5945					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
   5946					meta->map_uid, reg->map_uid);
   5947				return -EINVAL;
   5948			}
   5949		}
   5950		meta->map_ptr = reg->map_ptr;
   5951		meta->map_uid = reg->map_uid;
   5952	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
   5953		/* bpf_map_xxx(..., map_ptr, ..., key) call:
   5954		 * check that [key, key + map->key_size) are within
   5955		 * stack limits and initialized
   5956		 */
   5957		if (!meta->map_ptr) {
   5958			/* in function declaration map_ptr must come before
   5959			 * map_key, so that it's verified and known before
   5960			 * we have to check map_key here. Otherwise it means
   5961			 * that kernel subsystem misconfigured verifier
   5962			 */
   5963			verbose(env, "invalid map_ptr to access map->key\n");
   5964			return -EACCES;
   5965		}
   5966		err = check_helper_mem_access(env, regno,
   5967					      meta->map_ptr->key_size, false,
   5968					      NULL);
   5969	} else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
   5970		if (type_may_be_null(arg_type) && register_is_null(reg))
   5971			return 0;
   5972
   5973		/* bpf_map_xxx(..., map_ptr, ..., value) call:
   5974		 * check [value, value + map->value_size) validity
   5975		 */
   5976		if (!meta->map_ptr) {
   5977			/* kernel subsystem misconfigured verifier */
   5978			verbose(env, "invalid map_ptr to access map->value\n");
   5979			return -EACCES;
   5980		}
   5981		meta->raw_mode = arg_type & MEM_UNINIT;
   5982		err = check_helper_mem_access(env, regno,
   5983					      meta->map_ptr->value_size, false,
   5984					      meta);
   5985	} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
   5986		if (!reg->btf_id) {
   5987			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
   5988			return -EACCES;
   5989		}
   5990		meta->ret_btf = reg->btf;
   5991		meta->ret_btf_id = reg->btf_id;
   5992	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
   5993		if (meta->func_id == BPF_FUNC_spin_lock) {
   5994			if (process_spin_lock(env, regno, true))
   5995				return -EACCES;
   5996		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
   5997			if (process_spin_lock(env, regno, false))
   5998				return -EACCES;
   5999		} else {
   6000			verbose(env, "verifier internal error\n");
   6001			return -EFAULT;
   6002		}
   6003	} else if (arg_type == ARG_PTR_TO_TIMER) {
   6004		if (process_timer_func(env, regno, meta))
   6005			return -EACCES;
   6006	} else if (arg_type == ARG_PTR_TO_FUNC) {
   6007		meta->subprogno = reg->subprogno;
   6008	} else if (base_type(arg_type) == ARG_PTR_TO_MEM) {
   6009		/* The access to this pointer is only checked when we hit the
   6010		 * next is_mem_size argument below.
   6011		 */
   6012		meta->raw_mode = arg_type & MEM_UNINIT;
   6013	} else if (arg_type_is_mem_size(arg_type)) {
   6014		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
   6015
   6016		err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
   6017	} else if (arg_type_is_dynptr(arg_type)) {
   6018		if (arg_type & MEM_UNINIT) {
   6019			if (!is_dynptr_reg_valid_uninit(env, reg)) {
   6020				verbose(env, "Dynptr has to be an uninitialized dynptr\n");
   6021				return -EINVAL;
   6022			}
   6023
   6024			/* We only support one dynptr being uninitialized at the moment,
   6025			 * which is sufficient for the helper functions we have right now.
   6026			 */
   6027			if (meta->uninit_dynptr_regno) {
   6028				verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
   6029				return -EFAULT;
   6030			}
   6031
   6032			meta->uninit_dynptr_regno = regno;
   6033		} else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) {
   6034			const char *err_extra = "";
   6035
   6036			switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
   6037			case DYNPTR_TYPE_LOCAL:
   6038				err_extra = "local ";
   6039				break;
   6040			case DYNPTR_TYPE_RINGBUF:
   6041				err_extra = "ringbuf ";
   6042				break;
   6043			default:
   6044				break;
   6045			}
   6046
   6047			verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
   6048				err_extra, arg + 1);
   6049			return -EINVAL;
   6050		}
   6051	} else if (arg_type_is_alloc_size(arg_type)) {
   6052		if (!tnum_is_const(reg->var_off)) {
   6053			verbose(env, "R%d is not a known constant'\n",
   6054				regno);
   6055			return -EACCES;
   6056		}
   6057		meta->mem_size = reg->var_off.value;
   6058	} else if (arg_type_is_int_ptr(arg_type)) {
   6059		int size = int_ptr_type_to_size(arg_type);
   6060
   6061		err = check_helper_mem_access(env, regno, size, false, meta);
   6062		if (err)
   6063			return err;
   6064		err = check_ptr_alignment(env, reg, 0, size, true);
   6065	} else if (arg_type == ARG_PTR_TO_CONST_STR) {
   6066		struct bpf_map *map = reg->map_ptr;
   6067		int map_off;
   6068		u64 map_addr;
   6069		char *str_ptr;
   6070
   6071		if (!bpf_map_is_rdonly(map)) {
   6072			verbose(env, "R%d does not point to a readonly map'\n", regno);
   6073			return -EACCES;
   6074		}
   6075
   6076		if (!tnum_is_const(reg->var_off)) {
   6077			verbose(env, "R%d is not a constant address'\n", regno);
   6078			return -EACCES;
   6079		}
   6080
   6081		if (!map->ops->map_direct_value_addr) {
   6082			verbose(env, "no direct value access support for this map type\n");
   6083			return -EACCES;
   6084		}
   6085
   6086		err = check_map_access(env, regno, reg->off,
   6087				       map->value_size - reg->off, false,
   6088				       ACCESS_HELPER);
   6089		if (err)
   6090			return err;
   6091
   6092		map_off = reg->off + reg->var_off.value;
   6093		err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
   6094		if (err) {
   6095			verbose(env, "direct value access on string failed\n");
   6096			return err;
   6097		}
   6098
   6099		str_ptr = (char *)(long)(map_addr);
   6100		if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
   6101			verbose(env, "string is not zero-terminated\n");
   6102			return -EINVAL;
   6103		}
   6104	} else if (arg_type == ARG_PTR_TO_KPTR) {
   6105		if (process_kptr_func(env, regno, meta))
   6106			return -EACCES;
   6107	}
   6108
   6109	return err;
   6110}
   6111
   6112static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
   6113{
   6114	enum bpf_attach_type eatype = env->prog->expected_attach_type;
   6115	enum bpf_prog_type type = resolve_prog_type(env->prog);
   6116
   6117	if (func_id != BPF_FUNC_map_update_elem)
   6118		return false;
   6119
   6120	/* It's not possible to get access to a locked struct sock in these
   6121	 * contexts, so updating is safe.
   6122	 */
   6123	switch (type) {
   6124	case BPF_PROG_TYPE_TRACING:
   6125		if (eatype == BPF_TRACE_ITER)
   6126			return true;
   6127		break;
   6128	case BPF_PROG_TYPE_SOCKET_FILTER:
   6129	case BPF_PROG_TYPE_SCHED_CLS:
   6130	case BPF_PROG_TYPE_SCHED_ACT:
   6131	case BPF_PROG_TYPE_XDP:
   6132	case BPF_PROG_TYPE_SK_REUSEPORT:
   6133	case BPF_PROG_TYPE_FLOW_DISSECTOR:
   6134	case BPF_PROG_TYPE_SK_LOOKUP:
   6135		return true;
   6136	default:
   6137		break;
   6138	}
   6139
   6140	verbose(env, "cannot update sockmap in this context\n");
   6141	return false;
   6142}
   6143
   6144static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
   6145{
   6146	return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
   6147}
   6148
   6149static int check_map_func_compatibility(struct bpf_verifier_env *env,
   6150					struct bpf_map *map, int func_id)
   6151{
   6152	if (!map)
   6153		return 0;
   6154
   6155	/* We need a two way check, first is from map perspective ... */
   6156	switch (map->map_type) {
   6157	case BPF_MAP_TYPE_PROG_ARRAY:
   6158		if (func_id != BPF_FUNC_tail_call)
   6159			goto error;
   6160		break;
   6161	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
   6162		if (func_id != BPF_FUNC_perf_event_read &&
   6163		    func_id != BPF_FUNC_perf_event_output &&
   6164		    func_id != BPF_FUNC_skb_output &&
   6165		    func_id != BPF_FUNC_perf_event_read_value &&
   6166		    func_id != BPF_FUNC_xdp_output)
   6167			goto error;
   6168		break;
   6169	case BPF_MAP_TYPE_RINGBUF:
   6170		if (func_id != BPF_FUNC_ringbuf_output &&
   6171		    func_id != BPF_FUNC_ringbuf_reserve &&
   6172		    func_id != BPF_FUNC_ringbuf_query &&
   6173		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
   6174		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
   6175		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
   6176			goto error;
   6177		break;
   6178	case BPF_MAP_TYPE_STACK_TRACE:
   6179		if (func_id != BPF_FUNC_get_stackid)
   6180			goto error;
   6181		break;
   6182	case BPF_MAP_TYPE_CGROUP_ARRAY:
   6183		if (func_id != BPF_FUNC_skb_under_cgroup &&
   6184		    func_id != BPF_FUNC_current_task_under_cgroup)
   6185			goto error;
   6186		break;
   6187	case BPF_MAP_TYPE_CGROUP_STORAGE:
   6188	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
   6189		if (func_id != BPF_FUNC_get_local_storage)
   6190			goto error;
   6191		break;
   6192	case BPF_MAP_TYPE_DEVMAP:
   6193	case BPF_MAP_TYPE_DEVMAP_HASH:
   6194		if (func_id != BPF_FUNC_redirect_map &&
   6195		    func_id != BPF_FUNC_map_lookup_elem)
   6196			goto error;
   6197		break;
   6198	/* Restrict bpf side of cpumap and xskmap, open when use-cases
   6199	 * appear.
   6200	 */
   6201	case BPF_MAP_TYPE_CPUMAP:
   6202		if (func_id != BPF_FUNC_redirect_map)
   6203			goto error;
   6204		break;
   6205	case BPF_MAP_TYPE_XSKMAP:
   6206		if (func_id != BPF_FUNC_redirect_map &&
   6207		    func_id != BPF_FUNC_map_lookup_elem)
   6208			goto error;
   6209		break;
   6210	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
   6211	case BPF_MAP_TYPE_HASH_OF_MAPS:
   6212		if (func_id != BPF_FUNC_map_lookup_elem)
   6213			goto error;
   6214		break;
   6215	case BPF_MAP_TYPE_SOCKMAP:
   6216		if (func_id != BPF_FUNC_sk_redirect_map &&
   6217		    func_id != BPF_FUNC_sock_map_update &&
   6218		    func_id != BPF_FUNC_map_delete_elem &&
   6219		    func_id != BPF_FUNC_msg_redirect_map &&
   6220		    func_id != BPF_FUNC_sk_select_reuseport &&
   6221		    func_id != BPF_FUNC_map_lookup_elem &&
   6222		    !may_update_sockmap(env, func_id))
   6223			goto error;
   6224		break;
   6225	case BPF_MAP_TYPE_SOCKHASH:
   6226		if (func_id != BPF_FUNC_sk_redirect_hash &&
   6227		    func_id != BPF_FUNC_sock_hash_update &&
   6228		    func_id != BPF_FUNC_map_delete_elem &&
   6229		    func_id != BPF_FUNC_msg_redirect_hash &&
   6230		    func_id != BPF_FUNC_sk_select_reuseport &&
   6231		    func_id != BPF_FUNC_map_lookup_elem &&
   6232		    !may_update_sockmap(env, func_id))
   6233			goto error;
   6234		break;
   6235	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
   6236		if (func_id != BPF_FUNC_sk_select_reuseport)
   6237			goto error;
   6238		break;
   6239	case BPF_MAP_TYPE_QUEUE:
   6240	case BPF_MAP_TYPE_STACK:
   6241		if (func_id != BPF_FUNC_map_peek_elem &&
   6242		    func_id != BPF_FUNC_map_pop_elem &&
   6243		    func_id != BPF_FUNC_map_push_elem)
   6244			goto error;
   6245		break;
   6246	case BPF_MAP_TYPE_SK_STORAGE:
   6247		if (func_id != BPF_FUNC_sk_storage_get &&
   6248		    func_id != BPF_FUNC_sk_storage_delete)
   6249			goto error;
   6250		break;
   6251	case BPF_MAP_TYPE_INODE_STORAGE:
   6252		if (func_id != BPF_FUNC_inode_storage_get &&
   6253		    func_id != BPF_FUNC_inode_storage_delete)
   6254			goto error;
   6255		break;
   6256	case BPF_MAP_TYPE_TASK_STORAGE:
   6257		if (func_id != BPF_FUNC_task_storage_get &&
   6258		    func_id != BPF_FUNC_task_storage_delete)
   6259			goto error;
   6260		break;
   6261	case BPF_MAP_TYPE_BLOOM_FILTER:
   6262		if (func_id != BPF_FUNC_map_peek_elem &&
   6263		    func_id != BPF_FUNC_map_push_elem)
   6264			goto error;
   6265		break;
   6266	default:
   6267		break;
   6268	}
   6269
   6270	/* ... and second from the function itself. */
   6271	switch (func_id) {
   6272	case BPF_FUNC_tail_call:
   6273		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
   6274			goto error;
   6275		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
   6276			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
   6277			return -EINVAL;
   6278		}
   6279		break;
   6280	case BPF_FUNC_perf_event_read:
   6281	case BPF_FUNC_perf_event_output:
   6282	case BPF_FUNC_perf_event_read_value:
   6283	case BPF_FUNC_skb_output:
   6284	case BPF_FUNC_xdp_output:
   6285		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
   6286			goto error;
   6287		break;
   6288	case BPF_FUNC_ringbuf_output:
   6289	case BPF_FUNC_ringbuf_reserve:
   6290	case BPF_FUNC_ringbuf_query:
   6291	case BPF_FUNC_ringbuf_reserve_dynptr:
   6292	case BPF_FUNC_ringbuf_submit_dynptr:
   6293	case BPF_FUNC_ringbuf_discard_dynptr:
   6294		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
   6295			goto error;
   6296		break;
   6297	case BPF_FUNC_get_stackid:
   6298		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
   6299			goto error;
   6300		break;
   6301	case BPF_FUNC_current_task_under_cgroup:
   6302	case BPF_FUNC_skb_under_cgroup:
   6303		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
   6304			goto error;
   6305		break;
   6306	case BPF_FUNC_redirect_map:
   6307		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
   6308		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
   6309		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
   6310		    map->map_type != BPF_MAP_TYPE_XSKMAP)
   6311			goto error;
   6312		break;
   6313	case BPF_FUNC_sk_redirect_map:
   6314	case BPF_FUNC_msg_redirect_map:
   6315	case BPF_FUNC_sock_map_update:
   6316		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
   6317			goto error;
   6318		break;
   6319	case BPF_FUNC_sk_redirect_hash:
   6320	case BPF_FUNC_msg_redirect_hash:
   6321	case BPF_FUNC_sock_hash_update:
   6322		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
   6323			goto error;
   6324		break;
   6325	case BPF_FUNC_get_local_storage:
   6326		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
   6327		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
   6328			goto error;
   6329		break;
   6330	case BPF_FUNC_sk_select_reuseport:
   6331		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
   6332		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
   6333		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
   6334			goto error;
   6335		break;
   6336	case BPF_FUNC_map_pop_elem:
   6337		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
   6338		    map->map_type != BPF_MAP_TYPE_STACK)
   6339			goto error;
   6340		break;
   6341	case BPF_FUNC_map_peek_elem:
   6342	case BPF_FUNC_map_push_elem:
   6343		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
   6344		    map->map_type != BPF_MAP_TYPE_STACK &&
   6345		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
   6346			goto error;
   6347		break;
   6348	case BPF_FUNC_map_lookup_percpu_elem:
   6349		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
   6350		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
   6351		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
   6352			goto error;
   6353		break;
   6354	case BPF_FUNC_sk_storage_get:
   6355	case BPF_FUNC_sk_storage_delete:
   6356		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
   6357			goto error;
   6358		break;
   6359	case BPF_FUNC_inode_storage_get:
   6360	case BPF_FUNC_inode_storage_delete:
   6361		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
   6362			goto error;
   6363		break;
   6364	case BPF_FUNC_task_storage_get:
   6365	case BPF_FUNC_task_storage_delete:
   6366		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
   6367			goto error;
   6368		break;
   6369	default:
   6370		break;
   6371	}
   6372
   6373	return 0;
   6374error:
   6375	verbose(env, "cannot pass map_type %d into func %s#%d\n",
   6376		map->map_type, func_id_name(func_id), func_id);
   6377	return -EINVAL;
   6378}
   6379
   6380static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
   6381{
   6382	int count = 0;
   6383
   6384	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
   6385		count++;
   6386	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
   6387		count++;
   6388	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
   6389		count++;
   6390	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
   6391		count++;
   6392	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
   6393		count++;
   6394
   6395	/* We only support one arg being in raw mode at the moment,
   6396	 * which is sufficient for the helper functions we have
   6397	 * right now.
   6398	 */
   6399	return count <= 1;
   6400}
   6401
   6402static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
   6403				    enum bpf_arg_type arg_next)
   6404{
   6405	return (base_type(arg_curr) == ARG_PTR_TO_MEM) !=
   6406		arg_type_is_mem_size(arg_next);
   6407}
   6408
   6409static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
   6410{
   6411	/* bpf_xxx(..., buf, len) call will access 'len'
   6412	 * bytes from memory 'buf'. Both arg types need
   6413	 * to be paired, so make sure there's no buggy
   6414	 * helper function specification.
   6415	 */
   6416	if (arg_type_is_mem_size(fn->arg1_type) ||
   6417	    base_type(fn->arg5_type) == ARG_PTR_TO_MEM ||
   6418	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
   6419	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
   6420	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
   6421	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
   6422		return false;
   6423
   6424	return true;
   6425}
   6426
   6427static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
   6428{
   6429	int count = 0;
   6430
   6431	if (arg_type_may_be_refcounted(fn->arg1_type))
   6432		count++;
   6433	if (arg_type_may_be_refcounted(fn->arg2_type))
   6434		count++;
   6435	if (arg_type_may_be_refcounted(fn->arg3_type))
   6436		count++;
   6437	if (arg_type_may_be_refcounted(fn->arg4_type))
   6438		count++;
   6439	if (arg_type_may_be_refcounted(fn->arg5_type))
   6440		count++;
   6441
   6442	/* A reference acquiring function cannot acquire
   6443	 * another refcounted ptr.
   6444	 */
   6445	if (may_be_acquire_function(func_id) && count)
   6446		return false;
   6447
   6448	/* We only support one arg being unreferenced at the moment,
   6449	 * which is sufficient for the helper functions we have right now.
   6450	 */
   6451	return count <= 1;
   6452}
   6453
   6454static bool check_btf_id_ok(const struct bpf_func_proto *fn)
   6455{
   6456	int i;
   6457
   6458	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
   6459		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
   6460			return false;
   6461
   6462		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
   6463			return false;
   6464	}
   6465
   6466	return true;
   6467}
   6468
   6469static int check_func_proto(const struct bpf_func_proto *fn, int func_id,
   6470			    struct bpf_call_arg_meta *meta)
   6471{
   6472	return check_raw_mode_ok(fn) &&
   6473	       check_arg_pair_ok(fn) &&
   6474	       check_btf_id_ok(fn) &&
   6475	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
   6476}
   6477
   6478/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
   6479 * are now invalid, so turn them into unknown SCALAR_VALUE.
   6480 */
   6481static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
   6482				     struct bpf_func_state *state)
   6483{
   6484	struct bpf_reg_state *regs = state->regs, *reg;
   6485	int i;
   6486
   6487	for (i = 0; i < MAX_BPF_REG; i++)
   6488		if (reg_is_pkt_pointer_any(&regs[i]))
   6489			mark_reg_unknown(env, regs, i);
   6490
   6491	bpf_for_each_spilled_reg(i, state, reg) {
   6492		if (!reg)
   6493			continue;
   6494		if (reg_is_pkt_pointer_any(reg))
   6495			__mark_reg_unknown(env, reg);
   6496	}
   6497}
   6498
   6499static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
   6500{
   6501	struct bpf_verifier_state *vstate = env->cur_state;
   6502	int i;
   6503
   6504	for (i = 0; i <= vstate->curframe; i++)
   6505		__clear_all_pkt_pointers(env, vstate->frame[i]);
   6506}
   6507
   6508enum {
   6509	AT_PKT_END = -1,
   6510	BEYOND_PKT_END = -2,
   6511};
   6512
   6513static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
   6514{
   6515	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   6516	struct bpf_reg_state *reg = &state->regs[regn];
   6517
   6518	if (reg->type != PTR_TO_PACKET)
   6519		/* PTR_TO_PACKET_META is not supported yet */
   6520		return;
   6521
   6522	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
   6523	 * How far beyond pkt_end it goes is unknown.
   6524	 * if (!range_open) it's the case of pkt >= pkt_end
   6525	 * if (range_open) it's the case of pkt > pkt_end
   6526	 * hence this pointer is at least 1 byte bigger than pkt_end
   6527	 */
   6528	if (range_open)
   6529		reg->range = BEYOND_PKT_END;
   6530	else
   6531		reg->range = AT_PKT_END;
   6532}
   6533
   6534static void release_reg_references(struct bpf_verifier_env *env,
   6535				   struct bpf_func_state *state,
   6536				   int ref_obj_id)
   6537{
   6538	struct bpf_reg_state *regs = state->regs, *reg;
   6539	int i;
   6540
   6541	for (i = 0; i < MAX_BPF_REG; i++)
   6542		if (regs[i].ref_obj_id == ref_obj_id)
   6543			mark_reg_unknown(env, regs, i);
   6544
   6545	bpf_for_each_spilled_reg(i, state, reg) {
   6546		if (!reg)
   6547			continue;
   6548		if (reg->ref_obj_id == ref_obj_id)
   6549			__mark_reg_unknown(env, reg);
   6550	}
   6551}
   6552
   6553/* The pointer with the specified id has released its reference to kernel
   6554 * resources. Identify all copies of the same pointer and clear the reference.
   6555 */
   6556static int release_reference(struct bpf_verifier_env *env,
   6557			     int ref_obj_id)
   6558{
   6559	struct bpf_verifier_state *vstate = env->cur_state;
   6560	int err;
   6561	int i;
   6562
   6563	err = release_reference_state(cur_func(env), ref_obj_id);
   6564	if (err)
   6565		return err;
   6566
   6567	for (i = 0; i <= vstate->curframe; i++)
   6568		release_reg_references(env, vstate->frame[i], ref_obj_id);
   6569
   6570	return 0;
   6571}
   6572
   6573static void clear_caller_saved_regs(struct bpf_verifier_env *env,
   6574				    struct bpf_reg_state *regs)
   6575{
   6576	int i;
   6577
   6578	/* after the call registers r0 - r5 were scratched */
   6579	for (i = 0; i < CALLER_SAVED_REGS; i++) {
   6580		mark_reg_not_init(env, regs, caller_saved[i]);
   6581		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
   6582	}
   6583}
   6584
   6585typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
   6586				   struct bpf_func_state *caller,
   6587				   struct bpf_func_state *callee,
   6588				   int insn_idx);
   6589
   6590static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
   6591			     int *insn_idx, int subprog,
   6592			     set_callee_state_fn set_callee_state_cb)
   6593{
   6594	struct bpf_verifier_state *state = env->cur_state;
   6595	struct bpf_func_info_aux *func_info_aux;
   6596	struct bpf_func_state *caller, *callee;
   6597	int err;
   6598	bool is_global = false;
   6599
   6600	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
   6601		verbose(env, "the call stack of %d frames is too deep\n",
   6602			state->curframe + 2);
   6603		return -E2BIG;
   6604	}
   6605
   6606	caller = state->frame[state->curframe];
   6607	if (state->frame[state->curframe + 1]) {
   6608		verbose(env, "verifier bug. Frame %d already allocated\n",
   6609			state->curframe + 1);
   6610		return -EFAULT;
   6611	}
   6612
   6613	func_info_aux = env->prog->aux->func_info_aux;
   6614	if (func_info_aux)
   6615		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
   6616	err = btf_check_subprog_arg_match(env, subprog, caller->regs);
   6617	if (err == -EFAULT)
   6618		return err;
   6619	if (is_global) {
   6620		if (err) {
   6621			verbose(env, "Caller passes invalid args into func#%d\n",
   6622				subprog);
   6623			return err;
   6624		} else {
   6625			if (env->log.level & BPF_LOG_LEVEL)
   6626				verbose(env,
   6627					"Func#%d is global and valid. Skipping.\n",
   6628					subprog);
   6629			clear_caller_saved_regs(env, caller->regs);
   6630
   6631			/* All global functions return a 64-bit SCALAR_VALUE */
   6632			mark_reg_unknown(env, caller->regs, BPF_REG_0);
   6633			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
   6634
   6635			/* continue with next insn after call */
   6636			return 0;
   6637		}
   6638	}
   6639
   6640	if (insn->code == (BPF_JMP | BPF_CALL) &&
   6641	    insn->src_reg == 0 &&
   6642	    insn->imm == BPF_FUNC_timer_set_callback) {
   6643		struct bpf_verifier_state *async_cb;
   6644
   6645		/* there is no real recursion here. timer callbacks are async */
   6646		env->subprog_info[subprog].is_async_cb = true;
   6647		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
   6648					 *insn_idx, subprog);
   6649		if (!async_cb)
   6650			return -EFAULT;
   6651		callee = async_cb->frame[0];
   6652		callee->async_entry_cnt = caller->async_entry_cnt + 1;
   6653
   6654		/* Convert bpf_timer_set_callback() args into timer callback args */
   6655		err = set_callee_state_cb(env, caller, callee, *insn_idx);
   6656		if (err)
   6657			return err;
   6658
   6659		clear_caller_saved_regs(env, caller->regs);
   6660		mark_reg_unknown(env, caller->regs, BPF_REG_0);
   6661		caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
   6662		/* continue with next insn after call */
   6663		return 0;
   6664	}
   6665
   6666	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
   6667	if (!callee)
   6668		return -ENOMEM;
   6669	state->frame[state->curframe + 1] = callee;
   6670
   6671	/* callee cannot access r0, r6 - r9 for reading and has to write
   6672	 * into its own stack before reading from it.
   6673	 * callee can read/write into caller's stack
   6674	 */
   6675	init_func_state(env, callee,
   6676			/* remember the callsite, it will be used by bpf_exit */
   6677			*insn_idx /* callsite */,
   6678			state->curframe + 1 /* frameno within this callchain */,
   6679			subprog /* subprog number within this prog */);
   6680
   6681	/* Transfer references to the callee */
   6682	err = copy_reference_state(callee, caller);
   6683	if (err)
   6684		return err;
   6685
   6686	err = set_callee_state_cb(env, caller, callee, *insn_idx);
   6687	if (err)
   6688		return err;
   6689
   6690	clear_caller_saved_regs(env, caller->regs);
   6691
   6692	/* only increment it after check_reg_arg() finished */
   6693	state->curframe++;
   6694
   6695	/* and go analyze first insn of the callee */
   6696	*insn_idx = env->subprog_info[subprog].start - 1;
   6697
   6698	if (env->log.level & BPF_LOG_LEVEL) {
   6699		verbose(env, "caller:\n");
   6700		print_verifier_state(env, caller, true);
   6701		verbose(env, "callee:\n");
   6702		print_verifier_state(env, callee, true);
   6703	}
   6704	return 0;
   6705}
   6706
   6707int map_set_for_each_callback_args(struct bpf_verifier_env *env,
   6708				   struct bpf_func_state *caller,
   6709				   struct bpf_func_state *callee)
   6710{
   6711	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
   6712	 *      void *callback_ctx, u64 flags);
   6713	 * callback_fn(struct bpf_map *map, void *key, void *value,
   6714	 *      void *callback_ctx);
   6715	 */
   6716	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
   6717
   6718	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
   6719	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
   6720	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
   6721
   6722	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
   6723	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
   6724	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
   6725
   6726	/* pointer to stack or null */
   6727	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
   6728
   6729	/* unused */
   6730	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
   6731	return 0;
   6732}
   6733
   6734static int set_callee_state(struct bpf_verifier_env *env,
   6735			    struct bpf_func_state *caller,
   6736			    struct bpf_func_state *callee, int insn_idx)
   6737{
   6738	int i;
   6739
   6740	/* copy r1 - r5 args that callee can access.  The copy includes parent
   6741	 * pointers, which connects us up to the liveness chain
   6742	 */
   6743	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
   6744		callee->regs[i] = caller->regs[i];
   6745	return 0;
   6746}
   6747
   6748static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
   6749			   int *insn_idx)
   6750{
   6751	int subprog, target_insn;
   6752
   6753	target_insn = *insn_idx + insn->imm + 1;
   6754	subprog = find_subprog(env, target_insn);
   6755	if (subprog < 0) {
   6756		verbose(env, "verifier bug. No program starts at insn %d\n",
   6757			target_insn);
   6758		return -EFAULT;
   6759	}
   6760
   6761	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
   6762}
   6763
   6764static int set_map_elem_callback_state(struct bpf_verifier_env *env,
   6765				       struct bpf_func_state *caller,
   6766				       struct bpf_func_state *callee,
   6767				       int insn_idx)
   6768{
   6769	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
   6770	struct bpf_map *map;
   6771	int err;
   6772
   6773	if (bpf_map_ptr_poisoned(insn_aux)) {
   6774		verbose(env, "tail_call abusing map_ptr\n");
   6775		return -EINVAL;
   6776	}
   6777
   6778	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
   6779	if (!map->ops->map_set_for_each_callback_args ||
   6780	    !map->ops->map_for_each_callback) {
   6781		verbose(env, "callback function not allowed for map\n");
   6782		return -ENOTSUPP;
   6783	}
   6784
   6785	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
   6786	if (err)
   6787		return err;
   6788
   6789	callee->in_callback_fn = true;
   6790	return 0;
   6791}
   6792
   6793static int set_loop_callback_state(struct bpf_verifier_env *env,
   6794				   struct bpf_func_state *caller,
   6795				   struct bpf_func_state *callee,
   6796				   int insn_idx)
   6797{
   6798	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
   6799	 *	    u64 flags);
   6800	 * callback_fn(u32 index, void *callback_ctx);
   6801	 */
   6802	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
   6803	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
   6804
   6805	/* unused */
   6806	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
   6807	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
   6808	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
   6809
   6810	callee->in_callback_fn = true;
   6811	return 0;
   6812}
   6813
   6814static int set_timer_callback_state(struct bpf_verifier_env *env,
   6815				    struct bpf_func_state *caller,
   6816				    struct bpf_func_state *callee,
   6817				    int insn_idx)
   6818{
   6819	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
   6820
   6821	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
   6822	 * callback_fn(struct bpf_map *map, void *key, void *value);
   6823	 */
   6824	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
   6825	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
   6826	callee->regs[BPF_REG_1].map_ptr = map_ptr;
   6827
   6828	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
   6829	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
   6830	callee->regs[BPF_REG_2].map_ptr = map_ptr;
   6831
   6832	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
   6833	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
   6834	callee->regs[BPF_REG_3].map_ptr = map_ptr;
   6835
   6836	/* unused */
   6837	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
   6838	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
   6839	callee->in_async_callback_fn = true;
   6840	return 0;
   6841}
   6842
   6843static int set_find_vma_callback_state(struct bpf_verifier_env *env,
   6844				       struct bpf_func_state *caller,
   6845				       struct bpf_func_state *callee,
   6846				       int insn_idx)
   6847{
   6848	/* bpf_find_vma(struct task_struct *task, u64 addr,
   6849	 *               void *callback_fn, void *callback_ctx, u64 flags)
   6850	 * (callback_fn)(struct task_struct *task,
   6851	 *               struct vm_area_struct *vma, void *callback_ctx);
   6852	 */
   6853	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
   6854
   6855	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
   6856	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
   6857	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
   6858	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
   6859
   6860	/* pointer to stack or null */
   6861	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
   6862
   6863	/* unused */
   6864	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
   6865	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
   6866	callee->in_callback_fn = true;
   6867	return 0;
   6868}
   6869
   6870static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
   6871{
   6872	struct bpf_verifier_state *state = env->cur_state;
   6873	struct bpf_func_state *caller, *callee;
   6874	struct bpf_reg_state *r0;
   6875	int err;
   6876
   6877	callee = state->frame[state->curframe];
   6878	r0 = &callee->regs[BPF_REG_0];
   6879	if (r0->type == PTR_TO_STACK) {
   6880		/* technically it's ok to return caller's stack pointer
   6881		 * (or caller's caller's pointer) back to the caller,
   6882		 * since these pointers are valid. Only current stack
   6883		 * pointer will be invalid as soon as function exits,
   6884		 * but let's be conservative
   6885		 */
   6886		verbose(env, "cannot return stack pointer to the caller\n");
   6887		return -EINVAL;
   6888	}
   6889
   6890	state->curframe--;
   6891	caller = state->frame[state->curframe];
   6892	if (callee->in_callback_fn) {
   6893		/* enforce R0 return value range [0, 1]. */
   6894		struct tnum range = tnum_range(0, 1);
   6895
   6896		if (r0->type != SCALAR_VALUE) {
   6897			verbose(env, "R0 not a scalar value\n");
   6898			return -EACCES;
   6899		}
   6900		if (!tnum_in(range, r0->var_off)) {
   6901			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
   6902			return -EINVAL;
   6903		}
   6904	} else {
   6905		/* return to the caller whatever r0 had in the callee */
   6906		caller->regs[BPF_REG_0] = *r0;
   6907	}
   6908
   6909	/* Transfer references to the caller */
   6910	err = copy_reference_state(caller, callee);
   6911	if (err)
   6912		return err;
   6913
   6914	*insn_idx = callee->callsite + 1;
   6915	if (env->log.level & BPF_LOG_LEVEL) {
   6916		verbose(env, "returning from callee:\n");
   6917		print_verifier_state(env, callee, true);
   6918		verbose(env, "to caller at %d:\n", *insn_idx);
   6919		print_verifier_state(env, caller, true);
   6920	}
   6921	/* clear everything in the callee */
   6922	free_func_state(callee);
   6923	state->frame[state->curframe + 1] = NULL;
   6924	return 0;
   6925}
   6926
   6927static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
   6928				   int func_id,
   6929				   struct bpf_call_arg_meta *meta)
   6930{
   6931	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
   6932
   6933	if (ret_type != RET_INTEGER ||
   6934	    (func_id != BPF_FUNC_get_stack &&
   6935	     func_id != BPF_FUNC_get_task_stack &&
   6936	     func_id != BPF_FUNC_probe_read_str &&
   6937	     func_id != BPF_FUNC_probe_read_kernel_str &&
   6938	     func_id != BPF_FUNC_probe_read_user_str))
   6939		return;
   6940
   6941	ret_reg->smax_value = meta->msize_max_value;
   6942	ret_reg->s32_max_value = meta->msize_max_value;
   6943	ret_reg->smin_value = -MAX_ERRNO;
   6944	ret_reg->s32_min_value = -MAX_ERRNO;
   6945	reg_bounds_sync(ret_reg);
   6946}
   6947
   6948static int
   6949record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
   6950		int func_id, int insn_idx)
   6951{
   6952	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
   6953	struct bpf_map *map = meta->map_ptr;
   6954
   6955	if (func_id != BPF_FUNC_tail_call &&
   6956	    func_id != BPF_FUNC_map_lookup_elem &&
   6957	    func_id != BPF_FUNC_map_update_elem &&
   6958	    func_id != BPF_FUNC_map_delete_elem &&
   6959	    func_id != BPF_FUNC_map_push_elem &&
   6960	    func_id != BPF_FUNC_map_pop_elem &&
   6961	    func_id != BPF_FUNC_map_peek_elem &&
   6962	    func_id != BPF_FUNC_for_each_map_elem &&
   6963	    func_id != BPF_FUNC_redirect_map &&
   6964	    func_id != BPF_FUNC_map_lookup_percpu_elem)
   6965		return 0;
   6966
   6967	if (map == NULL) {
   6968		verbose(env, "kernel subsystem misconfigured verifier\n");
   6969		return -EINVAL;
   6970	}
   6971
   6972	/* In case of read-only, some additional restrictions
   6973	 * need to be applied in order to prevent altering the
   6974	 * state of the map from program side.
   6975	 */
   6976	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
   6977	    (func_id == BPF_FUNC_map_delete_elem ||
   6978	     func_id == BPF_FUNC_map_update_elem ||
   6979	     func_id == BPF_FUNC_map_push_elem ||
   6980	     func_id == BPF_FUNC_map_pop_elem)) {
   6981		verbose(env, "write into map forbidden\n");
   6982		return -EACCES;
   6983	}
   6984
   6985	if (!BPF_MAP_PTR(aux->map_ptr_state))
   6986		bpf_map_ptr_store(aux, meta->map_ptr,
   6987				  !meta->map_ptr->bypass_spec_v1);
   6988	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
   6989		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
   6990				  !meta->map_ptr->bypass_spec_v1);
   6991	return 0;
   6992}
   6993
   6994static int
   6995record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
   6996		int func_id, int insn_idx)
   6997{
   6998	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
   6999	struct bpf_reg_state *regs = cur_regs(env), *reg;
   7000	struct bpf_map *map = meta->map_ptr;
   7001	struct tnum range;
   7002	u64 val;
   7003	int err;
   7004
   7005	if (func_id != BPF_FUNC_tail_call)
   7006		return 0;
   7007	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
   7008		verbose(env, "kernel subsystem misconfigured verifier\n");
   7009		return -EINVAL;
   7010	}
   7011
   7012	range = tnum_range(0, map->max_entries - 1);
   7013	reg = &regs[BPF_REG_3];
   7014
   7015	if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
   7016		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
   7017		return 0;
   7018	}
   7019
   7020	err = mark_chain_precision(env, BPF_REG_3);
   7021	if (err)
   7022		return err;
   7023
   7024	val = reg->var_off.value;
   7025	if (bpf_map_key_unseen(aux))
   7026		bpf_map_key_store(aux, val);
   7027	else if (!bpf_map_key_poisoned(aux) &&
   7028		  bpf_map_key_immediate(aux) != val)
   7029		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
   7030	return 0;
   7031}
   7032
   7033static int check_reference_leak(struct bpf_verifier_env *env)
   7034{
   7035	struct bpf_func_state *state = cur_func(env);
   7036	int i;
   7037
   7038	for (i = 0; i < state->acquired_refs; i++) {
   7039		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
   7040			state->refs[i].id, state->refs[i].insn_idx);
   7041	}
   7042	return state->acquired_refs ? -EINVAL : 0;
   7043}
   7044
   7045static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
   7046				   struct bpf_reg_state *regs)
   7047{
   7048	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
   7049	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
   7050	struct bpf_map *fmt_map = fmt_reg->map_ptr;
   7051	int err, fmt_map_off, num_args;
   7052	u64 fmt_addr;
   7053	char *fmt;
   7054
   7055	/* data must be an array of u64 */
   7056	if (data_len_reg->var_off.value % 8)
   7057		return -EINVAL;
   7058	num_args = data_len_reg->var_off.value / 8;
   7059
   7060	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
   7061	 * and map_direct_value_addr is set.
   7062	 */
   7063	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
   7064	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
   7065						  fmt_map_off);
   7066	if (err) {
   7067		verbose(env, "verifier bug\n");
   7068		return -EFAULT;
   7069	}
   7070	fmt = (char *)(long)fmt_addr + fmt_map_off;
   7071
   7072	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
   7073	 * can focus on validating the format specifiers.
   7074	 */
   7075	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
   7076	if (err < 0)
   7077		verbose(env, "Invalid format string\n");
   7078
   7079	return err;
   7080}
   7081
   7082static int check_get_func_ip(struct bpf_verifier_env *env)
   7083{
   7084	enum bpf_prog_type type = resolve_prog_type(env->prog);
   7085	int func_id = BPF_FUNC_get_func_ip;
   7086
   7087	if (type == BPF_PROG_TYPE_TRACING) {
   7088		if (!bpf_prog_has_trampoline(env->prog)) {
   7089			verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
   7090				func_id_name(func_id), func_id);
   7091			return -ENOTSUPP;
   7092		}
   7093		return 0;
   7094	} else if (type == BPF_PROG_TYPE_KPROBE) {
   7095		return 0;
   7096	}
   7097
   7098	verbose(env, "func %s#%d not supported for program type %d\n",
   7099		func_id_name(func_id), func_id, type);
   7100	return -ENOTSUPP;
   7101}
   7102
   7103static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
   7104			     int *insn_idx_p)
   7105{
   7106	const struct bpf_func_proto *fn = NULL;
   7107	enum bpf_return_type ret_type;
   7108	enum bpf_type_flag ret_flag;
   7109	struct bpf_reg_state *regs;
   7110	struct bpf_call_arg_meta meta;
   7111	int insn_idx = *insn_idx_p;
   7112	bool changes_data;
   7113	int i, err, func_id;
   7114
   7115	/* find function prototype */
   7116	func_id = insn->imm;
   7117	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
   7118		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
   7119			func_id);
   7120		return -EINVAL;
   7121	}
   7122
   7123	if (env->ops->get_func_proto)
   7124		fn = env->ops->get_func_proto(func_id, env->prog);
   7125	if (!fn) {
   7126		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
   7127			func_id);
   7128		return -EINVAL;
   7129	}
   7130
   7131	/* eBPF programs must be GPL compatible to use GPL-ed functions */
   7132	if (!env->prog->gpl_compatible && fn->gpl_only) {
   7133		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
   7134		return -EINVAL;
   7135	}
   7136
   7137	if (fn->allowed && !fn->allowed(env->prog)) {
   7138		verbose(env, "helper call is not allowed in probe\n");
   7139		return -EINVAL;
   7140	}
   7141
   7142	/* With LD_ABS/IND some JITs save/restore skb from r1. */
   7143	changes_data = bpf_helper_changes_pkt_data(fn->func);
   7144	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
   7145		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
   7146			func_id_name(func_id), func_id);
   7147		return -EINVAL;
   7148	}
   7149
   7150	memset(&meta, 0, sizeof(meta));
   7151	meta.pkt_access = fn->pkt_access;
   7152
   7153	err = check_func_proto(fn, func_id, &meta);
   7154	if (err) {
   7155		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
   7156			func_id_name(func_id), func_id);
   7157		return err;
   7158	}
   7159
   7160	meta.func_id = func_id;
   7161	/* check args */
   7162	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
   7163		err = check_func_arg(env, i, &meta, fn);
   7164		if (err)
   7165			return err;
   7166	}
   7167
   7168	err = record_func_map(env, &meta, func_id, insn_idx);
   7169	if (err)
   7170		return err;
   7171
   7172	err = record_func_key(env, &meta, func_id, insn_idx);
   7173	if (err)
   7174		return err;
   7175
   7176	/* Mark slots with STACK_MISC in case of raw mode, stack offset
   7177	 * is inferred from register state.
   7178	 */
   7179	for (i = 0; i < meta.access_size; i++) {
   7180		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
   7181				       BPF_WRITE, -1, false);
   7182		if (err)
   7183			return err;
   7184	}
   7185
   7186	regs = cur_regs(env);
   7187
   7188	if (meta.uninit_dynptr_regno) {
   7189		/* we write BPF_DW bits (8 bytes) at a time */
   7190		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
   7191			err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
   7192					       i, BPF_DW, BPF_WRITE, -1, false);
   7193			if (err)
   7194				return err;
   7195		}
   7196
   7197		err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
   7198					      fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
   7199					      insn_idx);
   7200		if (err)
   7201			return err;
   7202	}
   7203
   7204	if (meta.release_regno) {
   7205		err = -EINVAL;
   7206		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
   7207			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
   7208		else if (meta.ref_obj_id)
   7209			err = release_reference(env, meta.ref_obj_id);
   7210		/* meta.ref_obj_id can only be 0 if register that is meant to be
   7211		 * released is NULL, which must be > R0.
   7212		 */
   7213		else if (register_is_null(&regs[meta.release_regno]))
   7214			err = 0;
   7215		if (err) {
   7216			verbose(env, "func %s#%d reference has not been acquired before\n",
   7217				func_id_name(func_id), func_id);
   7218			return err;
   7219		}
   7220	}
   7221
   7222	switch (func_id) {
   7223	case BPF_FUNC_tail_call:
   7224		err = check_reference_leak(env);
   7225		if (err) {
   7226			verbose(env, "tail_call would lead to reference leak\n");
   7227			return err;
   7228		}
   7229		break;
   7230	case BPF_FUNC_get_local_storage:
   7231		/* check that flags argument in get_local_storage(map, flags) is 0,
   7232		 * this is required because get_local_storage() can't return an error.
   7233		 */
   7234		if (!register_is_null(&regs[BPF_REG_2])) {
   7235			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
   7236			return -EINVAL;
   7237		}
   7238		break;
   7239	case BPF_FUNC_for_each_map_elem:
   7240		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
   7241					set_map_elem_callback_state);
   7242		break;
   7243	case BPF_FUNC_timer_set_callback:
   7244		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
   7245					set_timer_callback_state);
   7246		break;
   7247	case BPF_FUNC_find_vma:
   7248		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
   7249					set_find_vma_callback_state);
   7250		break;
   7251	case BPF_FUNC_snprintf:
   7252		err = check_bpf_snprintf_call(env, regs);
   7253		break;
   7254	case BPF_FUNC_loop:
   7255		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
   7256					set_loop_callback_state);
   7257		break;
   7258	case BPF_FUNC_dynptr_from_mem:
   7259		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
   7260			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
   7261				reg_type_str(env, regs[BPF_REG_1].type));
   7262			return -EACCES;
   7263		}
   7264	}
   7265
   7266	if (err)
   7267		return err;
   7268
   7269	/* reset caller saved regs */
   7270	for (i = 0; i < CALLER_SAVED_REGS; i++) {
   7271		mark_reg_not_init(env, regs, caller_saved[i]);
   7272		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
   7273	}
   7274
   7275	/* helper call returns 64-bit value. */
   7276	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
   7277
   7278	/* update return register (already marked as written above) */
   7279	ret_type = fn->ret_type;
   7280	ret_flag = type_flag(fn->ret_type);
   7281	if (ret_type == RET_INTEGER) {
   7282		/* sets type to SCALAR_VALUE */
   7283		mark_reg_unknown(env, regs, BPF_REG_0);
   7284	} else if (ret_type == RET_VOID) {
   7285		regs[BPF_REG_0].type = NOT_INIT;
   7286	} else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
   7287		/* There is no offset yet applied, variable or fixed */
   7288		mark_reg_known_zero(env, regs, BPF_REG_0);
   7289		/* remember map_ptr, so that check_map_access()
   7290		 * can check 'value_size' boundary of memory access
   7291		 * to map element returned from bpf_map_lookup_elem()
   7292		 */
   7293		if (meta.map_ptr == NULL) {
   7294			verbose(env,
   7295				"kernel subsystem misconfigured verifier\n");
   7296			return -EINVAL;
   7297		}
   7298		regs[BPF_REG_0].map_ptr = meta.map_ptr;
   7299		regs[BPF_REG_0].map_uid = meta.map_uid;
   7300		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
   7301		if (!type_may_be_null(ret_type) &&
   7302		    map_value_has_spin_lock(meta.map_ptr)) {
   7303			regs[BPF_REG_0].id = ++env->id_gen;
   7304		}
   7305	} else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
   7306		mark_reg_known_zero(env, regs, BPF_REG_0);
   7307		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
   7308	} else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
   7309		mark_reg_known_zero(env, regs, BPF_REG_0);
   7310		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
   7311	} else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
   7312		mark_reg_known_zero(env, regs, BPF_REG_0);
   7313		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
   7314	} else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
   7315		mark_reg_known_zero(env, regs, BPF_REG_0);
   7316		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
   7317		regs[BPF_REG_0].mem_size = meta.mem_size;
   7318	} else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
   7319		const struct btf_type *t;
   7320
   7321		mark_reg_known_zero(env, regs, BPF_REG_0);
   7322		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
   7323		if (!btf_type_is_struct(t)) {
   7324			u32 tsize;
   7325			const struct btf_type *ret;
   7326			const char *tname;
   7327
   7328			/* resolve the type size of ksym. */
   7329			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
   7330			if (IS_ERR(ret)) {
   7331				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
   7332				verbose(env, "unable to resolve the size of type '%s': %ld\n",
   7333					tname, PTR_ERR(ret));
   7334				return -EINVAL;
   7335			}
   7336			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
   7337			regs[BPF_REG_0].mem_size = tsize;
   7338		} else {
   7339			/* MEM_RDONLY may be carried from ret_flag, but it
   7340			 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
   7341			 * it will confuse the check of PTR_TO_BTF_ID in
   7342			 * check_mem_access().
   7343			 */
   7344			ret_flag &= ~MEM_RDONLY;
   7345
   7346			regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
   7347			regs[BPF_REG_0].btf = meta.ret_btf;
   7348			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
   7349		}
   7350	} else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
   7351		struct btf *ret_btf;
   7352		int ret_btf_id;
   7353
   7354		mark_reg_known_zero(env, regs, BPF_REG_0);
   7355		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
   7356		if (func_id == BPF_FUNC_kptr_xchg) {
   7357			ret_btf = meta.kptr_off_desc->kptr.btf;
   7358			ret_btf_id = meta.kptr_off_desc->kptr.btf_id;
   7359		} else {
   7360			ret_btf = btf_vmlinux;
   7361			ret_btf_id = *fn->ret_btf_id;
   7362		}
   7363		if (ret_btf_id == 0) {
   7364			verbose(env, "invalid return type %u of func %s#%d\n",
   7365				base_type(ret_type), func_id_name(func_id),
   7366				func_id);
   7367			return -EINVAL;
   7368		}
   7369		regs[BPF_REG_0].btf = ret_btf;
   7370		regs[BPF_REG_0].btf_id = ret_btf_id;
   7371	} else {
   7372		verbose(env, "unknown return type %u of func %s#%d\n",
   7373			base_type(ret_type), func_id_name(func_id), func_id);
   7374		return -EINVAL;
   7375	}
   7376
   7377	if (type_may_be_null(regs[BPF_REG_0].type))
   7378		regs[BPF_REG_0].id = ++env->id_gen;
   7379
   7380	if (is_ptr_cast_function(func_id)) {
   7381		/* For release_reference() */
   7382		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
   7383	} else if (is_acquire_function(func_id, meta.map_ptr)) {
   7384		int id = acquire_reference_state(env, insn_idx);
   7385
   7386		if (id < 0)
   7387			return id;
   7388		/* For mark_ptr_or_null_reg() */
   7389		regs[BPF_REG_0].id = id;
   7390		/* For release_reference() */
   7391		regs[BPF_REG_0].ref_obj_id = id;
   7392	} else if (func_id == BPF_FUNC_dynptr_data) {
   7393		int dynptr_id = 0, i;
   7394
   7395		/* Find the id of the dynptr we're acquiring a reference to */
   7396		for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
   7397			if (arg_type_is_dynptr(fn->arg_type[i])) {
   7398				if (dynptr_id) {
   7399					verbose(env, "verifier internal error: multiple dynptr args in func\n");
   7400					return -EFAULT;
   7401				}
   7402				dynptr_id = stack_slot_get_id(env, &regs[BPF_REG_1 + i]);
   7403			}
   7404		}
   7405		/* For release_reference() */
   7406		regs[BPF_REG_0].ref_obj_id = dynptr_id;
   7407	}
   7408
   7409	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
   7410
   7411	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
   7412	if (err)
   7413		return err;
   7414
   7415	if ((func_id == BPF_FUNC_get_stack ||
   7416	     func_id == BPF_FUNC_get_task_stack) &&
   7417	    !env->prog->has_callchain_buf) {
   7418		const char *err_str;
   7419
   7420#ifdef CONFIG_PERF_EVENTS
   7421		err = get_callchain_buffers(sysctl_perf_event_max_stack);
   7422		err_str = "cannot get callchain buffer for func %s#%d\n";
   7423#else
   7424		err = -ENOTSUPP;
   7425		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
   7426#endif
   7427		if (err) {
   7428			verbose(env, err_str, func_id_name(func_id), func_id);
   7429			return err;
   7430		}
   7431
   7432		env->prog->has_callchain_buf = true;
   7433	}
   7434
   7435	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
   7436		env->prog->call_get_stack = true;
   7437
   7438	if (func_id == BPF_FUNC_get_func_ip) {
   7439		if (check_get_func_ip(env))
   7440			return -ENOTSUPP;
   7441		env->prog->call_get_func_ip = true;
   7442	}
   7443
   7444	if (changes_data)
   7445		clear_all_pkt_pointers(env);
   7446	return 0;
   7447}
   7448
   7449/* mark_btf_func_reg_size() is used when the reg size is determined by
   7450 * the BTF func_proto's return value size and argument.
   7451 */
   7452static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
   7453				   size_t reg_size)
   7454{
   7455	struct bpf_reg_state *reg = &cur_regs(env)[regno];
   7456
   7457	if (regno == BPF_REG_0) {
   7458		/* Function return value */
   7459		reg->live |= REG_LIVE_WRITTEN;
   7460		reg->subreg_def = reg_size == sizeof(u64) ?
   7461			DEF_NOT_SUBREG : env->insn_idx + 1;
   7462	} else {
   7463		/* Function argument */
   7464		if (reg_size == sizeof(u64)) {
   7465			mark_insn_zext(env, reg);
   7466			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
   7467		} else {
   7468			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
   7469		}
   7470	}
   7471}
   7472
   7473static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
   7474			    int *insn_idx_p)
   7475{
   7476	const struct btf_type *t, *func, *func_proto, *ptr_type;
   7477	struct bpf_reg_state *regs = cur_regs(env);
   7478	const char *func_name, *ptr_type_name;
   7479	u32 i, nargs, func_id, ptr_type_id;
   7480	int err, insn_idx = *insn_idx_p;
   7481	const struct btf_param *args;
   7482	struct btf *desc_btf;
   7483	bool acq;
   7484
   7485	/* skip for now, but return error when we find this in fixup_kfunc_call */
   7486	if (!insn->imm)
   7487		return 0;
   7488
   7489	desc_btf = find_kfunc_desc_btf(env, insn->off);
   7490	if (IS_ERR(desc_btf))
   7491		return PTR_ERR(desc_btf);
   7492
   7493	func_id = insn->imm;
   7494	func = btf_type_by_id(desc_btf, func_id);
   7495	func_name = btf_name_by_offset(desc_btf, func->name_off);
   7496	func_proto = btf_type_by_id(desc_btf, func->type);
   7497
   7498	if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
   7499				      BTF_KFUNC_TYPE_CHECK, func_id)) {
   7500		verbose(env, "calling kernel function %s is not allowed\n",
   7501			func_name);
   7502		return -EACCES;
   7503	}
   7504
   7505	acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
   7506					BTF_KFUNC_TYPE_ACQUIRE, func_id);
   7507
   7508	/* Check the arguments */
   7509	err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
   7510	if (err < 0)
   7511		return err;
   7512	/* In case of release function, we get register number of refcounted
   7513	 * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
   7514	 */
   7515	if (err) {
   7516		err = release_reference(env, regs[err].ref_obj_id);
   7517		if (err) {
   7518			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
   7519				func_name, func_id);
   7520			return err;
   7521		}
   7522	}
   7523
   7524	for (i = 0; i < CALLER_SAVED_REGS; i++)
   7525		mark_reg_not_init(env, regs, caller_saved[i]);
   7526
   7527	/* Check return type */
   7528	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
   7529
   7530	if (acq && !btf_type_is_ptr(t)) {
   7531		verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
   7532		return -EINVAL;
   7533	}
   7534
   7535	if (btf_type_is_scalar(t)) {
   7536		mark_reg_unknown(env, regs, BPF_REG_0);
   7537		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
   7538	} else if (btf_type_is_ptr(t)) {
   7539		ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
   7540						   &ptr_type_id);
   7541		if (!btf_type_is_struct(ptr_type)) {
   7542			ptr_type_name = btf_name_by_offset(desc_btf,
   7543							   ptr_type->name_off);
   7544			verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
   7545				func_name, btf_type_str(ptr_type),
   7546				ptr_type_name);
   7547			return -EINVAL;
   7548		}
   7549		mark_reg_known_zero(env, regs, BPF_REG_0);
   7550		regs[BPF_REG_0].btf = desc_btf;
   7551		regs[BPF_REG_0].type = PTR_TO_BTF_ID;
   7552		regs[BPF_REG_0].btf_id = ptr_type_id;
   7553		if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
   7554					      BTF_KFUNC_TYPE_RET_NULL, func_id)) {
   7555			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
   7556			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
   7557			regs[BPF_REG_0].id = ++env->id_gen;
   7558		}
   7559		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
   7560		if (acq) {
   7561			int id = acquire_reference_state(env, insn_idx);
   7562
   7563			if (id < 0)
   7564				return id;
   7565			regs[BPF_REG_0].id = id;
   7566			regs[BPF_REG_0].ref_obj_id = id;
   7567		}
   7568	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
   7569
   7570	nargs = btf_type_vlen(func_proto);
   7571	args = (const struct btf_param *)(func_proto + 1);
   7572	for (i = 0; i < nargs; i++) {
   7573		u32 regno = i + 1;
   7574
   7575		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
   7576		if (btf_type_is_ptr(t))
   7577			mark_btf_func_reg_size(env, regno, sizeof(void *));
   7578		else
   7579			/* scalar. ensured by btf_check_kfunc_arg_match() */
   7580			mark_btf_func_reg_size(env, regno, t->size);
   7581	}
   7582
   7583	return 0;
   7584}
   7585
   7586static bool signed_add_overflows(s64 a, s64 b)
   7587{
   7588	/* Do the add in u64, where overflow is well-defined */
   7589	s64 res = (s64)((u64)a + (u64)b);
   7590
   7591	if (b < 0)
   7592		return res > a;
   7593	return res < a;
   7594}
   7595
   7596static bool signed_add32_overflows(s32 a, s32 b)
   7597{
   7598	/* Do the add in u32, where overflow is well-defined */
   7599	s32 res = (s32)((u32)a + (u32)b);
   7600
   7601	if (b < 0)
   7602		return res > a;
   7603	return res < a;
   7604}
   7605
   7606static bool signed_sub_overflows(s64 a, s64 b)
   7607{
   7608	/* Do the sub in u64, where overflow is well-defined */
   7609	s64 res = (s64)((u64)a - (u64)b);
   7610
   7611	if (b < 0)
   7612		return res < a;
   7613	return res > a;
   7614}
   7615
   7616static bool signed_sub32_overflows(s32 a, s32 b)
   7617{
   7618	/* Do the sub in u32, where overflow is well-defined */
   7619	s32 res = (s32)((u32)a - (u32)b);
   7620
   7621	if (b < 0)
   7622		return res < a;
   7623	return res > a;
   7624}
   7625
   7626static bool check_reg_sane_offset(struct bpf_verifier_env *env,
   7627				  const struct bpf_reg_state *reg,
   7628				  enum bpf_reg_type type)
   7629{
   7630	bool known = tnum_is_const(reg->var_off);
   7631	s64 val = reg->var_off.value;
   7632	s64 smin = reg->smin_value;
   7633
   7634	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
   7635		verbose(env, "math between %s pointer and %lld is not allowed\n",
   7636			reg_type_str(env, type), val);
   7637		return false;
   7638	}
   7639
   7640	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
   7641		verbose(env, "%s pointer offset %d is not allowed\n",
   7642			reg_type_str(env, type), reg->off);
   7643		return false;
   7644	}
   7645
   7646	if (smin == S64_MIN) {
   7647		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
   7648			reg_type_str(env, type));
   7649		return false;
   7650	}
   7651
   7652	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
   7653		verbose(env, "value %lld makes %s pointer be out of bounds\n",
   7654			smin, reg_type_str(env, type));
   7655		return false;
   7656	}
   7657
   7658	return true;
   7659}
   7660
   7661static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
   7662{
   7663	return &env->insn_aux_data[env->insn_idx];
   7664}
   7665
   7666enum {
   7667	REASON_BOUNDS	= -1,
   7668	REASON_TYPE	= -2,
   7669	REASON_PATHS	= -3,
   7670	REASON_LIMIT	= -4,
   7671	REASON_STACK	= -5,
   7672};
   7673
   7674static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
   7675			      u32 *alu_limit, bool mask_to_left)
   7676{
   7677	u32 max = 0, ptr_limit = 0;
   7678
   7679	switch (ptr_reg->type) {
   7680	case PTR_TO_STACK:
   7681		/* Offset 0 is out-of-bounds, but acceptable start for the
   7682		 * left direction, see BPF_REG_FP. Also, unknown scalar
   7683		 * offset where we would need to deal with min/max bounds is
   7684		 * currently prohibited for unprivileged.
   7685		 */
   7686		max = MAX_BPF_STACK + mask_to_left;
   7687		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
   7688		break;
   7689	case PTR_TO_MAP_VALUE:
   7690		max = ptr_reg->map_ptr->value_size;
   7691		ptr_limit = (mask_to_left ?
   7692			     ptr_reg->smin_value :
   7693			     ptr_reg->umax_value) + ptr_reg->off;
   7694		break;
   7695	default:
   7696		return REASON_TYPE;
   7697	}
   7698
   7699	if (ptr_limit >= max)
   7700		return REASON_LIMIT;
   7701	*alu_limit = ptr_limit;
   7702	return 0;
   7703}
   7704
   7705static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
   7706				    const struct bpf_insn *insn)
   7707{
   7708	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
   7709}
   7710
   7711static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
   7712				       u32 alu_state, u32 alu_limit)
   7713{
   7714	/* If we arrived here from different branches with different
   7715	 * state or limits to sanitize, then this won't work.
   7716	 */
   7717	if (aux->alu_state &&
   7718	    (aux->alu_state != alu_state ||
   7719	     aux->alu_limit != alu_limit))
   7720		return REASON_PATHS;
   7721
   7722	/* Corresponding fixup done in do_misc_fixups(). */
   7723	aux->alu_state = alu_state;
   7724	aux->alu_limit = alu_limit;
   7725	return 0;
   7726}
   7727
   7728static int sanitize_val_alu(struct bpf_verifier_env *env,
   7729			    struct bpf_insn *insn)
   7730{
   7731	struct bpf_insn_aux_data *aux = cur_aux(env);
   7732
   7733	if (can_skip_alu_sanitation(env, insn))
   7734		return 0;
   7735
   7736	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
   7737}
   7738
   7739static bool sanitize_needed(u8 opcode)
   7740{
   7741	return opcode == BPF_ADD || opcode == BPF_SUB;
   7742}
   7743
   7744struct bpf_sanitize_info {
   7745	struct bpf_insn_aux_data aux;
   7746	bool mask_to_left;
   7747};
   7748
   7749static struct bpf_verifier_state *
   7750sanitize_speculative_path(struct bpf_verifier_env *env,
   7751			  const struct bpf_insn *insn,
   7752			  u32 next_idx, u32 curr_idx)
   7753{
   7754	struct bpf_verifier_state *branch;
   7755	struct bpf_reg_state *regs;
   7756
   7757	branch = push_stack(env, next_idx, curr_idx, true);
   7758	if (branch && insn) {
   7759		regs = branch->frame[branch->curframe]->regs;
   7760		if (BPF_SRC(insn->code) == BPF_K) {
   7761			mark_reg_unknown(env, regs, insn->dst_reg);
   7762		} else if (BPF_SRC(insn->code) == BPF_X) {
   7763			mark_reg_unknown(env, regs, insn->dst_reg);
   7764			mark_reg_unknown(env, regs, insn->src_reg);
   7765		}
   7766	}
   7767	return branch;
   7768}
   7769
   7770static int sanitize_ptr_alu(struct bpf_verifier_env *env,
   7771			    struct bpf_insn *insn,
   7772			    const struct bpf_reg_state *ptr_reg,
   7773			    const struct bpf_reg_state *off_reg,
   7774			    struct bpf_reg_state *dst_reg,
   7775			    struct bpf_sanitize_info *info,
   7776			    const bool commit_window)
   7777{
   7778	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
   7779	struct bpf_verifier_state *vstate = env->cur_state;
   7780	bool off_is_imm = tnum_is_const(off_reg->var_off);
   7781	bool off_is_neg = off_reg->smin_value < 0;
   7782	bool ptr_is_dst_reg = ptr_reg == dst_reg;
   7783	u8 opcode = BPF_OP(insn->code);
   7784	u32 alu_state, alu_limit;
   7785	struct bpf_reg_state tmp;
   7786	bool ret;
   7787	int err;
   7788
   7789	if (can_skip_alu_sanitation(env, insn))
   7790		return 0;
   7791
   7792	/* We already marked aux for masking from non-speculative
   7793	 * paths, thus we got here in the first place. We only care
   7794	 * to explore bad access from here.
   7795	 */
   7796	if (vstate->speculative)
   7797		goto do_sim;
   7798
   7799	if (!commit_window) {
   7800		if (!tnum_is_const(off_reg->var_off) &&
   7801		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
   7802			return REASON_BOUNDS;
   7803
   7804		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
   7805				     (opcode == BPF_SUB && !off_is_neg);
   7806	}
   7807
   7808	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
   7809	if (err < 0)
   7810		return err;
   7811
   7812	if (commit_window) {
   7813		/* In commit phase we narrow the masking window based on
   7814		 * the observed pointer move after the simulated operation.
   7815		 */
   7816		alu_state = info->aux.alu_state;
   7817		alu_limit = abs(info->aux.alu_limit - alu_limit);
   7818	} else {
   7819		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
   7820		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
   7821		alu_state |= ptr_is_dst_reg ?
   7822			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
   7823
   7824		/* Limit pruning on unknown scalars to enable deep search for
   7825		 * potential masking differences from other program paths.
   7826		 */
   7827		if (!off_is_imm)
   7828			env->explore_alu_limits = true;
   7829	}
   7830
   7831	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
   7832	if (err < 0)
   7833		return err;
   7834do_sim:
   7835	/* If we're in commit phase, we're done here given we already
   7836	 * pushed the truncated dst_reg into the speculative verification
   7837	 * stack.
   7838	 *
   7839	 * Also, when register is a known constant, we rewrite register-based
   7840	 * operation to immediate-based, and thus do not need masking (and as
   7841	 * a consequence, do not need to simulate the zero-truncation either).
   7842	 */
   7843	if (commit_window || off_is_imm)
   7844		return 0;
   7845
   7846	/* Simulate and find potential out-of-bounds access under
   7847	 * speculative execution from truncation as a result of
   7848	 * masking when off was not within expected range. If off
   7849	 * sits in dst, then we temporarily need to move ptr there
   7850	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
   7851	 * for cases where we use K-based arithmetic in one direction
   7852	 * and truncated reg-based in the other in order to explore
   7853	 * bad access.
   7854	 */
   7855	if (!ptr_is_dst_reg) {
   7856		tmp = *dst_reg;
   7857		*dst_reg = *ptr_reg;
   7858	}
   7859	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
   7860					env->insn_idx);
   7861	if (!ptr_is_dst_reg && ret)
   7862		*dst_reg = tmp;
   7863	return !ret ? REASON_STACK : 0;
   7864}
   7865
   7866static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
   7867{
   7868	struct bpf_verifier_state *vstate = env->cur_state;
   7869
   7870	/* If we simulate paths under speculation, we don't update the
   7871	 * insn as 'seen' such that when we verify unreachable paths in
   7872	 * the non-speculative domain, sanitize_dead_code() can still
   7873	 * rewrite/sanitize them.
   7874	 */
   7875	if (!vstate->speculative)
   7876		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
   7877}
   7878
   7879static int sanitize_err(struct bpf_verifier_env *env,
   7880			const struct bpf_insn *insn, int reason,
   7881			const struct bpf_reg_state *off_reg,
   7882			const struct bpf_reg_state *dst_reg)
   7883{
   7884	static const char *err = "pointer arithmetic with it prohibited for !root";
   7885	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
   7886	u32 dst = insn->dst_reg, src = insn->src_reg;
   7887
   7888	switch (reason) {
   7889	case REASON_BOUNDS:
   7890		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
   7891			off_reg == dst_reg ? dst : src, err);
   7892		break;
   7893	case REASON_TYPE:
   7894		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
   7895			off_reg == dst_reg ? src : dst, err);
   7896		break;
   7897	case REASON_PATHS:
   7898		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
   7899			dst, op, err);
   7900		break;
   7901	case REASON_LIMIT:
   7902		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
   7903			dst, op, err);
   7904		break;
   7905	case REASON_STACK:
   7906		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
   7907			dst, err);
   7908		break;
   7909	default:
   7910		verbose(env, "verifier internal error: unknown reason (%d)\n",
   7911			reason);
   7912		break;
   7913	}
   7914
   7915	return -EACCES;
   7916}
   7917
   7918/* check that stack access falls within stack limits and that 'reg' doesn't
   7919 * have a variable offset.
   7920 *
   7921 * Variable offset is prohibited for unprivileged mode for simplicity since it
   7922 * requires corresponding support in Spectre masking for stack ALU.  See also
   7923 * retrieve_ptr_limit().
   7924 *
   7925 *
   7926 * 'off' includes 'reg->off'.
   7927 */
   7928static int check_stack_access_for_ptr_arithmetic(
   7929				struct bpf_verifier_env *env,
   7930				int regno,
   7931				const struct bpf_reg_state *reg,
   7932				int off)
   7933{
   7934	if (!tnum_is_const(reg->var_off)) {
   7935		char tn_buf[48];
   7936
   7937		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
   7938		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
   7939			regno, tn_buf, off);
   7940		return -EACCES;
   7941	}
   7942
   7943	if (off >= 0 || off < -MAX_BPF_STACK) {
   7944		verbose(env, "R%d stack pointer arithmetic goes out of range, "
   7945			"prohibited for !root; off=%d\n", regno, off);
   7946		return -EACCES;
   7947	}
   7948
   7949	return 0;
   7950}
   7951
   7952static int sanitize_check_bounds(struct bpf_verifier_env *env,
   7953				 const struct bpf_insn *insn,
   7954				 const struct bpf_reg_state *dst_reg)
   7955{
   7956	u32 dst = insn->dst_reg;
   7957
   7958	/* For unprivileged we require that resulting offset must be in bounds
   7959	 * in order to be able to sanitize access later on.
   7960	 */
   7961	if (env->bypass_spec_v1)
   7962		return 0;
   7963
   7964	switch (dst_reg->type) {
   7965	case PTR_TO_STACK:
   7966		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
   7967					dst_reg->off + dst_reg->var_off.value))
   7968			return -EACCES;
   7969		break;
   7970	case PTR_TO_MAP_VALUE:
   7971		if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
   7972			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
   7973				"prohibited for !root\n", dst);
   7974			return -EACCES;
   7975		}
   7976		break;
   7977	default:
   7978		break;
   7979	}
   7980
   7981	return 0;
   7982}
   7983
   7984/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
   7985 * Caller should also handle BPF_MOV case separately.
   7986 * If we return -EACCES, caller may want to try again treating pointer as a
   7987 * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
   7988 */
   7989static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
   7990				   struct bpf_insn *insn,
   7991				   const struct bpf_reg_state *ptr_reg,
   7992				   const struct bpf_reg_state *off_reg)
   7993{
   7994	struct bpf_verifier_state *vstate = env->cur_state;
   7995	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   7996	struct bpf_reg_state *regs = state->regs, *dst_reg;
   7997	bool known = tnum_is_const(off_reg->var_off);
   7998	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
   7999	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
   8000	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
   8001	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
   8002	struct bpf_sanitize_info info = {};
   8003	u8 opcode = BPF_OP(insn->code);
   8004	u32 dst = insn->dst_reg;
   8005	int ret;
   8006
   8007	dst_reg = &regs[dst];
   8008
   8009	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
   8010	    smin_val > smax_val || umin_val > umax_val) {
   8011		/* Taint dst register if offset had invalid bounds derived from
   8012		 * e.g. dead branches.
   8013		 */
   8014		__mark_reg_unknown(env, dst_reg);
   8015		return 0;
   8016	}
   8017
   8018	if (BPF_CLASS(insn->code) != BPF_ALU64) {
   8019		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
   8020		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
   8021			__mark_reg_unknown(env, dst_reg);
   8022			return 0;
   8023		}
   8024
   8025		verbose(env,
   8026			"R%d 32-bit pointer arithmetic prohibited\n",
   8027			dst);
   8028		return -EACCES;
   8029	}
   8030
   8031	if (ptr_reg->type & PTR_MAYBE_NULL) {
   8032		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
   8033			dst, reg_type_str(env, ptr_reg->type));
   8034		return -EACCES;
   8035	}
   8036
   8037	switch (base_type(ptr_reg->type)) {
   8038	case CONST_PTR_TO_MAP:
   8039		/* smin_val represents the known value */
   8040		if (known && smin_val == 0 && opcode == BPF_ADD)
   8041			break;
   8042		fallthrough;
   8043	case PTR_TO_PACKET_END:
   8044	case PTR_TO_SOCKET:
   8045	case PTR_TO_SOCK_COMMON:
   8046	case PTR_TO_TCP_SOCK:
   8047	case PTR_TO_XDP_SOCK:
   8048		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
   8049			dst, reg_type_str(env, ptr_reg->type));
   8050		return -EACCES;
   8051	default:
   8052		break;
   8053	}
   8054
   8055	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
   8056	 * The id may be overwritten later if we create a new variable offset.
   8057	 */
   8058	dst_reg->type = ptr_reg->type;
   8059	dst_reg->id = ptr_reg->id;
   8060
   8061	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
   8062	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
   8063		return -EINVAL;
   8064
   8065	/* pointer types do not carry 32-bit bounds at the moment. */
   8066	__mark_reg32_unbounded(dst_reg);
   8067
   8068	if (sanitize_needed(opcode)) {
   8069		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
   8070				       &info, false);
   8071		if (ret < 0)
   8072			return sanitize_err(env, insn, ret, off_reg, dst_reg);
   8073	}
   8074
   8075	switch (opcode) {
   8076	case BPF_ADD:
   8077		/* We can take a fixed offset as long as it doesn't overflow
   8078		 * the s32 'off' field
   8079		 */
   8080		if (known && (ptr_reg->off + smin_val ==
   8081			      (s64)(s32)(ptr_reg->off + smin_val))) {
   8082			/* pointer += K.  Accumulate it into fixed offset */
   8083			dst_reg->smin_value = smin_ptr;
   8084			dst_reg->smax_value = smax_ptr;
   8085			dst_reg->umin_value = umin_ptr;
   8086			dst_reg->umax_value = umax_ptr;
   8087			dst_reg->var_off = ptr_reg->var_off;
   8088			dst_reg->off = ptr_reg->off + smin_val;
   8089			dst_reg->raw = ptr_reg->raw;
   8090			break;
   8091		}
   8092		/* A new variable offset is created.  Note that off_reg->off
   8093		 * == 0, since it's a scalar.
   8094		 * dst_reg gets the pointer type and since some positive
   8095		 * integer value was added to the pointer, give it a new 'id'
   8096		 * if it's a PTR_TO_PACKET.
   8097		 * this creates a new 'base' pointer, off_reg (variable) gets
   8098		 * added into the variable offset, and we copy the fixed offset
   8099		 * from ptr_reg.
   8100		 */
   8101		if (signed_add_overflows(smin_ptr, smin_val) ||
   8102		    signed_add_overflows(smax_ptr, smax_val)) {
   8103			dst_reg->smin_value = S64_MIN;
   8104			dst_reg->smax_value = S64_MAX;
   8105		} else {
   8106			dst_reg->smin_value = smin_ptr + smin_val;
   8107			dst_reg->smax_value = smax_ptr + smax_val;
   8108		}
   8109		if (umin_ptr + umin_val < umin_ptr ||
   8110		    umax_ptr + umax_val < umax_ptr) {
   8111			dst_reg->umin_value = 0;
   8112			dst_reg->umax_value = U64_MAX;
   8113		} else {
   8114			dst_reg->umin_value = umin_ptr + umin_val;
   8115			dst_reg->umax_value = umax_ptr + umax_val;
   8116		}
   8117		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
   8118		dst_reg->off = ptr_reg->off;
   8119		dst_reg->raw = ptr_reg->raw;
   8120		if (reg_is_pkt_pointer(ptr_reg)) {
   8121			dst_reg->id = ++env->id_gen;
   8122			/* something was added to pkt_ptr, set range to zero */
   8123			memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
   8124		}
   8125		break;
   8126	case BPF_SUB:
   8127		if (dst_reg == off_reg) {
   8128			/* scalar -= pointer.  Creates an unknown scalar */
   8129			verbose(env, "R%d tried to subtract pointer from scalar\n",
   8130				dst);
   8131			return -EACCES;
   8132		}
   8133		/* We don't allow subtraction from FP, because (according to
   8134		 * test_verifier.c test "invalid fp arithmetic", JITs might not
   8135		 * be able to deal with it.
   8136		 */
   8137		if (ptr_reg->type == PTR_TO_STACK) {
   8138			verbose(env, "R%d subtraction from stack pointer prohibited\n",
   8139				dst);
   8140			return -EACCES;
   8141		}
   8142		if (known && (ptr_reg->off - smin_val ==
   8143			      (s64)(s32)(ptr_reg->off - smin_val))) {
   8144			/* pointer -= K.  Subtract it from fixed offset */
   8145			dst_reg->smin_value = smin_ptr;
   8146			dst_reg->smax_value = smax_ptr;
   8147			dst_reg->umin_value = umin_ptr;
   8148			dst_reg->umax_value = umax_ptr;
   8149			dst_reg->var_off = ptr_reg->var_off;
   8150			dst_reg->id = ptr_reg->id;
   8151			dst_reg->off = ptr_reg->off - smin_val;
   8152			dst_reg->raw = ptr_reg->raw;
   8153			break;
   8154		}
   8155		/* A new variable offset is created.  If the subtrahend is known
   8156		 * nonnegative, then any reg->range we had before is still good.
   8157		 */
   8158		if (signed_sub_overflows(smin_ptr, smax_val) ||
   8159		    signed_sub_overflows(smax_ptr, smin_val)) {
   8160			/* Overflow possible, we know nothing */
   8161			dst_reg->smin_value = S64_MIN;
   8162			dst_reg->smax_value = S64_MAX;
   8163		} else {
   8164			dst_reg->smin_value = smin_ptr - smax_val;
   8165			dst_reg->smax_value = smax_ptr - smin_val;
   8166		}
   8167		if (umin_ptr < umax_val) {
   8168			/* Overflow possible, we know nothing */
   8169			dst_reg->umin_value = 0;
   8170			dst_reg->umax_value = U64_MAX;
   8171		} else {
   8172			/* Cannot overflow (as long as bounds are consistent) */
   8173			dst_reg->umin_value = umin_ptr - umax_val;
   8174			dst_reg->umax_value = umax_ptr - umin_val;
   8175		}
   8176		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
   8177		dst_reg->off = ptr_reg->off;
   8178		dst_reg->raw = ptr_reg->raw;
   8179		if (reg_is_pkt_pointer(ptr_reg)) {
   8180			dst_reg->id = ++env->id_gen;
   8181			/* something was added to pkt_ptr, set range to zero */
   8182			if (smin_val < 0)
   8183				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
   8184		}
   8185		break;
   8186	case BPF_AND:
   8187	case BPF_OR:
   8188	case BPF_XOR:
   8189		/* bitwise ops on pointers are troublesome, prohibit. */
   8190		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
   8191			dst, bpf_alu_string[opcode >> 4]);
   8192		return -EACCES;
   8193	default:
   8194		/* other operators (e.g. MUL,LSH) produce non-pointer results */
   8195		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
   8196			dst, bpf_alu_string[opcode >> 4]);
   8197		return -EACCES;
   8198	}
   8199
   8200	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
   8201		return -EINVAL;
   8202	reg_bounds_sync(dst_reg);
   8203	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
   8204		return -EACCES;
   8205	if (sanitize_needed(opcode)) {
   8206		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
   8207				       &info, true);
   8208		if (ret < 0)
   8209			return sanitize_err(env, insn, ret, off_reg, dst_reg);
   8210	}
   8211
   8212	return 0;
   8213}
   8214
   8215static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
   8216				 struct bpf_reg_state *src_reg)
   8217{
   8218	s32 smin_val = src_reg->s32_min_value;
   8219	s32 smax_val = src_reg->s32_max_value;
   8220	u32 umin_val = src_reg->u32_min_value;
   8221	u32 umax_val = src_reg->u32_max_value;
   8222
   8223	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
   8224	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
   8225		dst_reg->s32_min_value = S32_MIN;
   8226		dst_reg->s32_max_value = S32_MAX;
   8227	} else {
   8228		dst_reg->s32_min_value += smin_val;
   8229		dst_reg->s32_max_value += smax_val;
   8230	}
   8231	if (dst_reg->u32_min_value + umin_val < umin_val ||
   8232	    dst_reg->u32_max_value + umax_val < umax_val) {
   8233		dst_reg->u32_min_value = 0;
   8234		dst_reg->u32_max_value = U32_MAX;
   8235	} else {
   8236		dst_reg->u32_min_value += umin_val;
   8237		dst_reg->u32_max_value += umax_val;
   8238	}
   8239}
   8240
   8241static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
   8242			       struct bpf_reg_state *src_reg)
   8243{
   8244	s64 smin_val = src_reg->smin_value;
   8245	s64 smax_val = src_reg->smax_value;
   8246	u64 umin_val = src_reg->umin_value;
   8247	u64 umax_val = src_reg->umax_value;
   8248
   8249	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
   8250	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
   8251		dst_reg->smin_value = S64_MIN;
   8252		dst_reg->smax_value = S64_MAX;
   8253	} else {
   8254		dst_reg->smin_value += smin_val;
   8255		dst_reg->smax_value += smax_val;
   8256	}
   8257	if (dst_reg->umin_value + umin_val < umin_val ||
   8258	    dst_reg->umax_value + umax_val < umax_val) {
   8259		dst_reg->umin_value = 0;
   8260		dst_reg->umax_value = U64_MAX;
   8261	} else {
   8262		dst_reg->umin_value += umin_val;
   8263		dst_reg->umax_value += umax_val;
   8264	}
   8265}
   8266
   8267static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
   8268				 struct bpf_reg_state *src_reg)
   8269{
   8270	s32 smin_val = src_reg->s32_min_value;
   8271	s32 smax_val = src_reg->s32_max_value;
   8272	u32 umin_val = src_reg->u32_min_value;
   8273	u32 umax_val = src_reg->u32_max_value;
   8274
   8275	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
   8276	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
   8277		/* Overflow possible, we know nothing */
   8278		dst_reg->s32_min_value = S32_MIN;
   8279		dst_reg->s32_max_value = S32_MAX;
   8280	} else {
   8281		dst_reg->s32_min_value -= smax_val;
   8282		dst_reg->s32_max_value -= smin_val;
   8283	}
   8284	if (dst_reg->u32_min_value < umax_val) {
   8285		/* Overflow possible, we know nothing */
   8286		dst_reg->u32_min_value = 0;
   8287		dst_reg->u32_max_value = U32_MAX;
   8288	} else {
   8289		/* Cannot overflow (as long as bounds are consistent) */
   8290		dst_reg->u32_min_value -= umax_val;
   8291		dst_reg->u32_max_value -= umin_val;
   8292	}
   8293}
   8294
   8295static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
   8296			       struct bpf_reg_state *src_reg)
   8297{
   8298	s64 smin_val = src_reg->smin_value;
   8299	s64 smax_val = src_reg->smax_value;
   8300	u64 umin_val = src_reg->umin_value;
   8301	u64 umax_val = src_reg->umax_value;
   8302
   8303	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
   8304	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
   8305		/* Overflow possible, we know nothing */
   8306		dst_reg->smin_value = S64_MIN;
   8307		dst_reg->smax_value = S64_MAX;
   8308	} else {
   8309		dst_reg->smin_value -= smax_val;
   8310		dst_reg->smax_value -= smin_val;
   8311	}
   8312	if (dst_reg->umin_value < umax_val) {
   8313		/* Overflow possible, we know nothing */
   8314		dst_reg->umin_value = 0;
   8315		dst_reg->umax_value = U64_MAX;
   8316	} else {
   8317		/* Cannot overflow (as long as bounds are consistent) */
   8318		dst_reg->umin_value -= umax_val;
   8319		dst_reg->umax_value -= umin_val;
   8320	}
   8321}
   8322
   8323static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
   8324				 struct bpf_reg_state *src_reg)
   8325{
   8326	s32 smin_val = src_reg->s32_min_value;
   8327	u32 umin_val = src_reg->u32_min_value;
   8328	u32 umax_val = src_reg->u32_max_value;
   8329
   8330	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
   8331		/* Ain't nobody got time to multiply that sign */
   8332		__mark_reg32_unbounded(dst_reg);
   8333		return;
   8334	}
   8335	/* Both values are positive, so we can work with unsigned and
   8336	 * copy the result to signed (unless it exceeds S32_MAX).
   8337	 */
   8338	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
   8339		/* Potential overflow, we know nothing */
   8340		__mark_reg32_unbounded(dst_reg);
   8341		return;
   8342	}
   8343	dst_reg->u32_min_value *= umin_val;
   8344	dst_reg->u32_max_value *= umax_val;
   8345	if (dst_reg->u32_max_value > S32_MAX) {
   8346		/* Overflow possible, we know nothing */
   8347		dst_reg->s32_min_value = S32_MIN;
   8348		dst_reg->s32_max_value = S32_MAX;
   8349	} else {
   8350		dst_reg->s32_min_value = dst_reg->u32_min_value;
   8351		dst_reg->s32_max_value = dst_reg->u32_max_value;
   8352	}
   8353}
   8354
   8355static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
   8356			       struct bpf_reg_state *src_reg)
   8357{
   8358	s64 smin_val = src_reg->smin_value;
   8359	u64 umin_val = src_reg->umin_value;
   8360	u64 umax_val = src_reg->umax_value;
   8361
   8362	if (smin_val < 0 || dst_reg->smin_value < 0) {
   8363		/* Ain't nobody got time to multiply that sign */
   8364		__mark_reg64_unbounded(dst_reg);
   8365		return;
   8366	}
   8367	/* Both values are positive, so we can work with unsigned and
   8368	 * copy the result to signed (unless it exceeds S64_MAX).
   8369	 */
   8370	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
   8371		/* Potential overflow, we know nothing */
   8372		__mark_reg64_unbounded(dst_reg);
   8373		return;
   8374	}
   8375	dst_reg->umin_value *= umin_val;
   8376	dst_reg->umax_value *= umax_val;
   8377	if (dst_reg->umax_value > S64_MAX) {
   8378		/* Overflow possible, we know nothing */
   8379		dst_reg->smin_value = S64_MIN;
   8380		dst_reg->smax_value = S64_MAX;
   8381	} else {
   8382		dst_reg->smin_value = dst_reg->umin_value;
   8383		dst_reg->smax_value = dst_reg->umax_value;
   8384	}
   8385}
   8386
   8387static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
   8388				 struct bpf_reg_state *src_reg)
   8389{
   8390	bool src_known = tnum_subreg_is_const(src_reg->var_off);
   8391	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
   8392	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
   8393	s32 smin_val = src_reg->s32_min_value;
   8394	u32 umax_val = src_reg->u32_max_value;
   8395
   8396	if (src_known && dst_known) {
   8397		__mark_reg32_known(dst_reg, var32_off.value);
   8398		return;
   8399	}
   8400
   8401	/* We get our minimum from the var_off, since that's inherently
   8402	 * bitwise.  Our maximum is the minimum of the operands' maxima.
   8403	 */
   8404	dst_reg->u32_min_value = var32_off.value;
   8405	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
   8406	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
   8407		/* Lose signed bounds when ANDing negative numbers,
   8408		 * ain't nobody got time for that.
   8409		 */
   8410		dst_reg->s32_min_value = S32_MIN;
   8411		dst_reg->s32_max_value = S32_MAX;
   8412	} else {
   8413		/* ANDing two positives gives a positive, so safe to
   8414		 * cast result into s64.
   8415		 */
   8416		dst_reg->s32_min_value = dst_reg->u32_min_value;
   8417		dst_reg->s32_max_value = dst_reg->u32_max_value;
   8418	}
   8419}
   8420
   8421static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
   8422			       struct bpf_reg_state *src_reg)
   8423{
   8424	bool src_known = tnum_is_const(src_reg->var_off);
   8425	bool dst_known = tnum_is_const(dst_reg->var_off);
   8426	s64 smin_val = src_reg->smin_value;
   8427	u64 umax_val = src_reg->umax_value;
   8428
   8429	if (src_known && dst_known) {
   8430		__mark_reg_known(dst_reg, dst_reg->var_off.value);
   8431		return;
   8432	}
   8433
   8434	/* We get our minimum from the var_off, since that's inherently
   8435	 * bitwise.  Our maximum is the minimum of the operands' maxima.
   8436	 */
   8437	dst_reg->umin_value = dst_reg->var_off.value;
   8438	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
   8439	if (dst_reg->smin_value < 0 || smin_val < 0) {
   8440		/* Lose signed bounds when ANDing negative numbers,
   8441		 * ain't nobody got time for that.
   8442		 */
   8443		dst_reg->smin_value = S64_MIN;
   8444		dst_reg->smax_value = S64_MAX;
   8445	} else {
   8446		/* ANDing two positives gives a positive, so safe to
   8447		 * cast result into s64.
   8448		 */
   8449		dst_reg->smin_value = dst_reg->umin_value;
   8450		dst_reg->smax_value = dst_reg->umax_value;
   8451	}
   8452	/* We may learn something more from the var_off */
   8453	__update_reg_bounds(dst_reg);
   8454}
   8455
   8456static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
   8457				struct bpf_reg_state *src_reg)
   8458{
   8459	bool src_known = tnum_subreg_is_const(src_reg->var_off);
   8460	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
   8461	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
   8462	s32 smin_val = src_reg->s32_min_value;
   8463	u32 umin_val = src_reg->u32_min_value;
   8464
   8465	if (src_known && dst_known) {
   8466		__mark_reg32_known(dst_reg, var32_off.value);
   8467		return;
   8468	}
   8469
   8470	/* We get our maximum from the var_off, and our minimum is the
   8471	 * maximum of the operands' minima
   8472	 */
   8473	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
   8474	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
   8475	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
   8476		/* Lose signed bounds when ORing negative numbers,
   8477		 * ain't nobody got time for that.
   8478		 */
   8479		dst_reg->s32_min_value = S32_MIN;
   8480		dst_reg->s32_max_value = S32_MAX;
   8481	} else {
   8482		/* ORing two positives gives a positive, so safe to
   8483		 * cast result into s64.
   8484		 */
   8485		dst_reg->s32_min_value = dst_reg->u32_min_value;
   8486		dst_reg->s32_max_value = dst_reg->u32_max_value;
   8487	}
   8488}
   8489
   8490static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
   8491			      struct bpf_reg_state *src_reg)
   8492{
   8493	bool src_known = tnum_is_const(src_reg->var_off);
   8494	bool dst_known = tnum_is_const(dst_reg->var_off);
   8495	s64 smin_val = src_reg->smin_value;
   8496	u64 umin_val = src_reg->umin_value;
   8497
   8498	if (src_known && dst_known) {
   8499		__mark_reg_known(dst_reg, dst_reg->var_off.value);
   8500		return;
   8501	}
   8502
   8503	/* We get our maximum from the var_off, and our minimum is the
   8504	 * maximum of the operands' minima
   8505	 */
   8506	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
   8507	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
   8508	if (dst_reg->smin_value < 0 || smin_val < 0) {
   8509		/* Lose signed bounds when ORing negative numbers,
   8510		 * ain't nobody got time for that.
   8511		 */
   8512		dst_reg->smin_value = S64_MIN;
   8513		dst_reg->smax_value = S64_MAX;
   8514	} else {
   8515		/* ORing two positives gives a positive, so safe to
   8516		 * cast result into s64.
   8517		 */
   8518		dst_reg->smin_value = dst_reg->umin_value;
   8519		dst_reg->smax_value = dst_reg->umax_value;
   8520	}
   8521	/* We may learn something more from the var_off */
   8522	__update_reg_bounds(dst_reg);
   8523}
   8524
   8525static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
   8526				 struct bpf_reg_state *src_reg)
   8527{
   8528	bool src_known = tnum_subreg_is_const(src_reg->var_off);
   8529	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
   8530	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
   8531	s32 smin_val = src_reg->s32_min_value;
   8532
   8533	if (src_known && dst_known) {
   8534		__mark_reg32_known(dst_reg, var32_off.value);
   8535		return;
   8536	}
   8537
   8538	/* We get both minimum and maximum from the var32_off. */
   8539	dst_reg->u32_min_value = var32_off.value;
   8540	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
   8541
   8542	if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
   8543		/* XORing two positive sign numbers gives a positive,
   8544		 * so safe to cast u32 result into s32.
   8545		 */
   8546		dst_reg->s32_min_value = dst_reg->u32_min_value;
   8547		dst_reg->s32_max_value = dst_reg->u32_max_value;
   8548	} else {
   8549		dst_reg->s32_min_value = S32_MIN;
   8550		dst_reg->s32_max_value = S32_MAX;
   8551	}
   8552}
   8553
   8554static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
   8555			       struct bpf_reg_state *src_reg)
   8556{
   8557	bool src_known = tnum_is_const(src_reg->var_off);
   8558	bool dst_known = tnum_is_const(dst_reg->var_off);
   8559	s64 smin_val = src_reg->smin_value;
   8560
   8561	if (src_known && dst_known) {
   8562		/* dst_reg->var_off.value has been updated earlier */
   8563		__mark_reg_known(dst_reg, dst_reg->var_off.value);
   8564		return;
   8565	}
   8566
   8567	/* We get both minimum and maximum from the var_off. */
   8568	dst_reg->umin_value = dst_reg->var_off.value;
   8569	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
   8570
   8571	if (dst_reg->smin_value >= 0 && smin_val >= 0) {
   8572		/* XORing two positive sign numbers gives a positive,
   8573		 * so safe to cast u64 result into s64.
   8574		 */
   8575		dst_reg->smin_value = dst_reg->umin_value;
   8576		dst_reg->smax_value = dst_reg->umax_value;
   8577	} else {
   8578		dst_reg->smin_value = S64_MIN;
   8579		dst_reg->smax_value = S64_MAX;
   8580	}
   8581
   8582	__update_reg_bounds(dst_reg);
   8583}
   8584
   8585static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
   8586				   u64 umin_val, u64 umax_val)
   8587{
   8588	/* We lose all sign bit information (except what we can pick
   8589	 * up from var_off)
   8590	 */
   8591	dst_reg->s32_min_value = S32_MIN;
   8592	dst_reg->s32_max_value = S32_MAX;
   8593	/* If we might shift our top bit out, then we know nothing */
   8594	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
   8595		dst_reg->u32_min_value = 0;
   8596		dst_reg->u32_max_value = U32_MAX;
   8597	} else {
   8598		dst_reg->u32_min_value <<= umin_val;
   8599		dst_reg->u32_max_value <<= umax_val;
   8600	}
   8601}
   8602
   8603static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
   8604				 struct bpf_reg_state *src_reg)
   8605{
   8606	u32 umax_val = src_reg->u32_max_value;
   8607	u32 umin_val = src_reg->u32_min_value;
   8608	/* u32 alu operation will zext upper bits */
   8609	struct tnum subreg = tnum_subreg(dst_reg->var_off);
   8610
   8611	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
   8612	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
   8613	/* Not required but being careful mark reg64 bounds as unknown so
   8614	 * that we are forced to pick them up from tnum and zext later and
   8615	 * if some path skips this step we are still safe.
   8616	 */
   8617	__mark_reg64_unbounded(dst_reg);
   8618	__update_reg32_bounds(dst_reg);
   8619}
   8620
   8621static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
   8622				   u64 umin_val, u64 umax_val)
   8623{
   8624	/* Special case <<32 because it is a common compiler pattern to sign
   8625	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
   8626	 * positive we know this shift will also be positive so we can track
   8627	 * bounds correctly. Otherwise we lose all sign bit information except
   8628	 * what we can pick up from var_off. Perhaps we can generalize this
   8629	 * later to shifts of any length.
   8630	 */
   8631	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
   8632		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
   8633	else
   8634		dst_reg->smax_value = S64_MAX;
   8635
   8636	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
   8637		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
   8638	else
   8639		dst_reg->smin_value = S64_MIN;
   8640
   8641	/* If we might shift our top bit out, then we know nothing */
   8642	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
   8643		dst_reg->umin_value = 0;
   8644		dst_reg->umax_value = U64_MAX;
   8645	} else {
   8646		dst_reg->umin_value <<= umin_val;
   8647		dst_reg->umax_value <<= umax_val;
   8648	}
   8649}
   8650
   8651static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
   8652			       struct bpf_reg_state *src_reg)
   8653{
   8654	u64 umax_val = src_reg->umax_value;
   8655	u64 umin_val = src_reg->umin_value;
   8656
   8657	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
   8658	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
   8659	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
   8660
   8661	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
   8662	/* We may learn something more from the var_off */
   8663	__update_reg_bounds(dst_reg);
   8664}
   8665
   8666static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
   8667				 struct bpf_reg_state *src_reg)
   8668{
   8669	struct tnum subreg = tnum_subreg(dst_reg->var_off);
   8670	u32 umax_val = src_reg->u32_max_value;
   8671	u32 umin_val = src_reg->u32_min_value;
   8672
   8673	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
   8674	 * be negative, then either:
   8675	 * 1) src_reg might be zero, so the sign bit of the result is
   8676	 *    unknown, so we lose our signed bounds
   8677	 * 2) it's known negative, thus the unsigned bounds capture the
   8678	 *    signed bounds
   8679	 * 3) the signed bounds cross zero, so they tell us nothing
   8680	 *    about the result
   8681	 * If the value in dst_reg is known nonnegative, then again the
   8682	 * unsigned bounds capture the signed bounds.
   8683	 * Thus, in all cases it suffices to blow away our signed bounds
   8684	 * and rely on inferring new ones from the unsigned bounds and
   8685	 * var_off of the result.
   8686	 */
   8687	dst_reg->s32_min_value = S32_MIN;
   8688	dst_reg->s32_max_value = S32_MAX;
   8689
   8690	dst_reg->var_off = tnum_rshift(subreg, umin_val);
   8691	dst_reg->u32_min_value >>= umax_val;
   8692	dst_reg->u32_max_value >>= umin_val;
   8693
   8694	__mark_reg64_unbounded(dst_reg);
   8695	__update_reg32_bounds(dst_reg);
   8696}
   8697
   8698static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
   8699			       struct bpf_reg_state *src_reg)
   8700{
   8701	u64 umax_val = src_reg->umax_value;
   8702	u64 umin_val = src_reg->umin_value;
   8703
   8704	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
   8705	 * be negative, then either:
   8706	 * 1) src_reg might be zero, so the sign bit of the result is
   8707	 *    unknown, so we lose our signed bounds
   8708	 * 2) it's known negative, thus the unsigned bounds capture the
   8709	 *    signed bounds
   8710	 * 3) the signed bounds cross zero, so they tell us nothing
   8711	 *    about the result
   8712	 * If the value in dst_reg is known nonnegative, then again the
   8713	 * unsigned bounds capture the signed bounds.
   8714	 * Thus, in all cases it suffices to blow away our signed bounds
   8715	 * and rely on inferring new ones from the unsigned bounds and
   8716	 * var_off of the result.
   8717	 */
   8718	dst_reg->smin_value = S64_MIN;
   8719	dst_reg->smax_value = S64_MAX;
   8720	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
   8721	dst_reg->umin_value >>= umax_val;
   8722	dst_reg->umax_value >>= umin_val;
   8723
   8724	/* Its not easy to operate on alu32 bounds here because it depends
   8725	 * on bits being shifted in. Take easy way out and mark unbounded
   8726	 * so we can recalculate later from tnum.
   8727	 */
   8728	__mark_reg32_unbounded(dst_reg);
   8729	__update_reg_bounds(dst_reg);
   8730}
   8731
   8732static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
   8733				  struct bpf_reg_state *src_reg)
   8734{
   8735	u64 umin_val = src_reg->u32_min_value;
   8736
   8737	/* Upon reaching here, src_known is true and
   8738	 * umax_val is equal to umin_val.
   8739	 */
   8740	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
   8741	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
   8742
   8743	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
   8744
   8745	/* blow away the dst_reg umin_value/umax_value and rely on
   8746	 * dst_reg var_off to refine the result.
   8747	 */
   8748	dst_reg->u32_min_value = 0;
   8749	dst_reg->u32_max_value = U32_MAX;
   8750
   8751	__mark_reg64_unbounded(dst_reg);
   8752	__update_reg32_bounds(dst_reg);
   8753}
   8754
   8755static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
   8756				struct bpf_reg_state *src_reg)
   8757{
   8758	u64 umin_val = src_reg->umin_value;
   8759
   8760	/* Upon reaching here, src_known is true and umax_val is equal
   8761	 * to umin_val.
   8762	 */
   8763	dst_reg->smin_value >>= umin_val;
   8764	dst_reg->smax_value >>= umin_val;
   8765
   8766	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
   8767
   8768	/* blow away the dst_reg umin_value/umax_value and rely on
   8769	 * dst_reg var_off to refine the result.
   8770	 */
   8771	dst_reg->umin_value = 0;
   8772	dst_reg->umax_value = U64_MAX;
   8773
   8774	/* Its not easy to operate on alu32 bounds here because it depends
   8775	 * on bits being shifted in from upper 32-bits. Take easy way out
   8776	 * and mark unbounded so we can recalculate later from tnum.
   8777	 */
   8778	__mark_reg32_unbounded(dst_reg);
   8779	__update_reg_bounds(dst_reg);
   8780}
   8781
   8782/* WARNING: This function does calculations on 64-bit values, but the actual
   8783 * execution may occur on 32-bit values. Therefore, things like bitshifts
   8784 * need extra checks in the 32-bit case.
   8785 */
   8786static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
   8787				      struct bpf_insn *insn,
   8788				      struct bpf_reg_state *dst_reg,
   8789				      struct bpf_reg_state src_reg)
   8790{
   8791	struct bpf_reg_state *regs = cur_regs(env);
   8792	u8 opcode = BPF_OP(insn->code);
   8793	bool src_known;
   8794	s64 smin_val, smax_val;
   8795	u64 umin_val, umax_val;
   8796	s32 s32_min_val, s32_max_val;
   8797	u32 u32_min_val, u32_max_val;
   8798	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
   8799	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
   8800	int ret;
   8801
   8802	smin_val = src_reg.smin_value;
   8803	smax_val = src_reg.smax_value;
   8804	umin_val = src_reg.umin_value;
   8805	umax_val = src_reg.umax_value;
   8806
   8807	s32_min_val = src_reg.s32_min_value;
   8808	s32_max_val = src_reg.s32_max_value;
   8809	u32_min_val = src_reg.u32_min_value;
   8810	u32_max_val = src_reg.u32_max_value;
   8811
   8812	if (alu32) {
   8813		src_known = tnum_subreg_is_const(src_reg.var_off);
   8814		if ((src_known &&
   8815		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
   8816		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
   8817			/* Taint dst register if offset had invalid bounds
   8818			 * derived from e.g. dead branches.
   8819			 */
   8820			__mark_reg_unknown(env, dst_reg);
   8821			return 0;
   8822		}
   8823	} else {
   8824		src_known = tnum_is_const(src_reg.var_off);
   8825		if ((src_known &&
   8826		     (smin_val != smax_val || umin_val != umax_val)) ||
   8827		    smin_val > smax_val || umin_val > umax_val) {
   8828			/* Taint dst register if offset had invalid bounds
   8829			 * derived from e.g. dead branches.
   8830			 */
   8831			__mark_reg_unknown(env, dst_reg);
   8832			return 0;
   8833		}
   8834	}
   8835
   8836	if (!src_known &&
   8837	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
   8838		__mark_reg_unknown(env, dst_reg);
   8839		return 0;
   8840	}
   8841
   8842	if (sanitize_needed(opcode)) {
   8843		ret = sanitize_val_alu(env, insn);
   8844		if (ret < 0)
   8845			return sanitize_err(env, insn, ret, NULL, NULL);
   8846	}
   8847
   8848	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
   8849	 * There are two classes of instructions: The first class we track both
   8850	 * alu32 and alu64 sign/unsigned bounds independently this provides the
   8851	 * greatest amount of precision when alu operations are mixed with jmp32
   8852	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
   8853	 * and BPF_OR. This is possible because these ops have fairly easy to
   8854	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
   8855	 * See alu32 verifier tests for examples. The second class of
   8856	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
   8857	 * with regards to tracking sign/unsigned bounds because the bits may
   8858	 * cross subreg boundaries in the alu64 case. When this happens we mark
   8859	 * the reg unbounded in the subreg bound space and use the resulting
   8860	 * tnum to calculate an approximation of the sign/unsigned bounds.
   8861	 */
   8862	switch (opcode) {
   8863	case BPF_ADD:
   8864		scalar32_min_max_add(dst_reg, &src_reg);
   8865		scalar_min_max_add(dst_reg, &src_reg);
   8866		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
   8867		break;
   8868	case BPF_SUB:
   8869		scalar32_min_max_sub(dst_reg, &src_reg);
   8870		scalar_min_max_sub(dst_reg, &src_reg);
   8871		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
   8872		break;
   8873	case BPF_MUL:
   8874		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
   8875		scalar32_min_max_mul(dst_reg, &src_reg);
   8876		scalar_min_max_mul(dst_reg, &src_reg);
   8877		break;
   8878	case BPF_AND:
   8879		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
   8880		scalar32_min_max_and(dst_reg, &src_reg);
   8881		scalar_min_max_and(dst_reg, &src_reg);
   8882		break;
   8883	case BPF_OR:
   8884		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
   8885		scalar32_min_max_or(dst_reg, &src_reg);
   8886		scalar_min_max_or(dst_reg, &src_reg);
   8887		break;
   8888	case BPF_XOR:
   8889		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
   8890		scalar32_min_max_xor(dst_reg, &src_reg);
   8891		scalar_min_max_xor(dst_reg, &src_reg);
   8892		break;
   8893	case BPF_LSH:
   8894		if (umax_val >= insn_bitness) {
   8895			/* Shifts greater than 31 or 63 are undefined.
   8896			 * This includes shifts by a negative number.
   8897			 */
   8898			mark_reg_unknown(env, regs, insn->dst_reg);
   8899			break;
   8900		}
   8901		if (alu32)
   8902			scalar32_min_max_lsh(dst_reg, &src_reg);
   8903		else
   8904			scalar_min_max_lsh(dst_reg, &src_reg);
   8905		break;
   8906	case BPF_RSH:
   8907		if (umax_val >= insn_bitness) {
   8908			/* Shifts greater than 31 or 63 are undefined.
   8909			 * This includes shifts by a negative number.
   8910			 */
   8911			mark_reg_unknown(env, regs, insn->dst_reg);
   8912			break;
   8913		}
   8914		if (alu32)
   8915			scalar32_min_max_rsh(dst_reg, &src_reg);
   8916		else
   8917			scalar_min_max_rsh(dst_reg, &src_reg);
   8918		break;
   8919	case BPF_ARSH:
   8920		if (umax_val >= insn_bitness) {
   8921			/* Shifts greater than 31 or 63 are undefined.
   8922			 * This includes shifts by a negative number.
   8923			 */
   8924			mark_reg_unknown(env, regs, insn->dst_reg);
   8925			break;
   8926		}
   8927		if (alu32)
   8928			scalar32_min_max_arsh(dst_reg, &src_reg);
   8929		else
   8930			scalar_min_max_arsh(dst_reg, &src_reg);
   8931		break;
   8932	default:
   8933		mark_reg_unknown(env, regs, insn->dst_reg);
   8934		break;
   8935	}
   8936
   8937	/* ALU32 ops are zero extended into 64bit register */
   8938	if (alu32)
   8939		zext_32_to_64(dst_reg);
   8940	reg_bounds_sync(dst_reg);
   8941	return 0;
   8942}
   8943
   8944/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
   8945 * and var_off.
   8946 */
   8947static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
   8948				   struct bpf_insn *insn)
   8949{
   8950	struct bpf_verifier_state *vstate = env->cur_state;
   8951	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   8952	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
   8953	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
   8954	u8 opcode = BPF_OP(insn->code);
   8955	int err;
   8956
   8957	dst_reg = &regs[insn->dst_reg];
   8958	src_reg = NULL;
   8959	if (dst_reg->type != SCALAR_VALUE)
   8960		ptr_reg = dst_reg;
   8961	else
   8962		/* Make sure ID is cleared otherwise dst_reg min/max could be
   8963		 * incorrectly propagated into other registers by find_equal_scalars()
   8964		 */
   8965		dst_reg->id = 0;
   8966	if (BPF_SRC(insn->code) == BPF_X) {
   8967		src_reg = &regs[insn->src_reg];
   8968		if (src_reg->type != SCALAR_VALUE) {
   8969			if (dst_reg->type != SCALAR_VALUE) {
   8970				/* Combining two pointers by any ALU op yields
   8971				 * an arbitrary scalar. Disallow all math except
   8972				 * pointer subtraction
   8973				 */
   8974				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
   8975					mark_reg_unknown(env, regs, insn->dst_reg);
   8976					return 0;
   8977				}
   8978				verbose(env, "R%d pointer %s pointer prohibited\n",
   8979					insn->dst_reg,
   8980					bpf_alu_string[opcode >> 4]);
   8981				return -EACCES;
   8982			} else {
   8983				/* scalar += pointer
   8984				 * This is legal, but we have to reverse our
   8985				 * src/dest handling in computing the range
   8986				 */
   8987				err = mark_chain_precision(env, insn->dst_reg);
   8988				if (err)
   8989					return err;
   8990				return adjust_ptr_min_max_vals(env, insn,
   8991							       src_reg, dst_reg);
   8992			}
   8993		} else if (ptr_reg) {
   8994			/* pointer += scalar */
   8995			err = mark_chain_precision(env, insn->src_reg);
   8996			if (err)
   8997				return err;
   8998			return adjust_ptr_min_max_vals(env, insn,
   8999						       dst_reg, src_reg);
   9000		}
   9001	} else {
   9002		/* Pretend the src is a reg with a known value, since we only
   9003		 * need to be able to read from this state.
   9004		 */
   9005		off_reg.type = SCALAR_VALUE;
   9006		__mark_reg_known(&off_reg, insn->imm);
   9007		src_reg = &off_reg;
   9008		if (ptr_reg) /* pointer += K */
   9009			return adjust_ptr_min_max_vals(env, insn,
   9010						       ptr_reg, src_reg);
   9011	}
   9012
   9013	/* Got here implies adding two SCALAR_VALUEs */
   9014	if (WARN_ON_ONCE(ptr_reg)) {
   9015		print_verifier_state(env, state, true);
   9016		verbose(env, "verifier internal error: unexpected ptr_reg\n");
   9017		return -EINVAL;
   9018	}
   9019	if (WARN_ON(!src_reg)) {
   9020		print_verifier_state(env, state, true);
   9021		verbose(env, "verifier internal error: no src_reg\n");
   9022		return -EINVAL;
   9023	}
   9024	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
   9025}
   9026
   9027/* check validity of 32-bit and 64-bit arithmetic operations */
   9028static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
   9029{
   9030	struct bpf_reg_state *regs = cur_regs(env);
   9031	u8 opcode = BPF_OP(insn->code);
   9032	int err;
   9033
   9034	if (opcode == BPF_END || opcode == BPF_NEG) {
   9035		if (opcode == BPF_NEG) {
   9036			if (BPF_SRC(insn->code) != 0 ||
   9037			    insn->src_reg != BPF_REG_0 ||
   9038			    insn->off != 0 || insn->imm != 0) {
   9039				verbose(env, "BPF_NEG uses reserved fields\n");
   9040				return -EINVAL;
   9041			}
   9042		} else {
   9043			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
   9044			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
   9045			    BPF_CLASS(insn->code) == BPF_ALU64) {
   9046				verbose(env, "BPF_END uses reserved fields\n");
   9047				return -EINVAL;
   9048			}
   9049		}
   9050
   9051		/* check src operand */
   9052		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   9053		if (err)
   9054			return err;
   9055
   9056		if (is_pointer_value(env, insn->dst_reg)) {
   9057			verbose(env, "R%d pointer arithmetic prohibited\n",
   9058				insn->dst_reg);
   9059			return -EACCES;
   9060		}
   9061
   9062		/* check dest operand */
   9063		err = check_reg_arg(env, insn->dst_reg, DST_OP);
   9064		if (err)
   9065			return err;
   9066
   9067	} else if (opcode == BPF_MOV) {
   9068
   9069		if (BPF_SRC(insn->code) == BPF_X) {
   9070			if (insn->imm != 0 || insn->off != 0) {
   9071				verbose(env, "BPF_MOV uses reserved fields\n");
   9072				return -EINVAL;
   9073			}
   9074
   9075			/* check src operand */
   9076			err = check_reg_arg(env, insn->src_reg, SRC_OP);
   9077			if (err)
   9078				return err;
   9079		} else {
   9080			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
   9081				verbose(env, "BPF_MOV uses reserved fields\n");
   9082				return -EINVAL;
   9083			}
   9084		}
   9085
   9086		/* check dest operand, mark as required later */
   9087		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
   9088		if (err)
   9089			return err;
   9090
   9091		if (BPF_SRC(insn->code) == BPF_X) {
   9092			struct bpf_reg_state *src_reg = regs + insn->src_reg;
   9093			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
   9094
   9095			if (BPF_CLASS(insn->code) == BPF_ALU64) {
   9096				/* case: R1 = R2
   9097				 * copy register state to dest reg
   9098				 */
   9099				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
   9100					/* Assign src and dst registers the same ID
   9101					 * that will be used by find_equal_scalars()
   9102					 * to propagate min/max range.
   9103					 */
   9104					src_reg->id = ++env->id_gen;
   9105				*dst_reg = *src_reg;
   9106				dst_reg->live |= REG_LIVE_WRITTEN;
   9107				dst_reg->subreg_def = DEF_NOT_SUBREG;
   9108			} else {
   9109				/* R1 = (u32) R2 */
   9110				if (is_pointer_value(env, insn->src_reg)) {
   9111					verbose(env,
   9112						"R%d partial copy of pointer\n",
   9113						insn->src_reg);
   9114					return -EACCES;
   9115				} else if (src_reg->type == SCALAR_VALUE) {
   9116					*dst_reg = *src_reg;
   9117					/* Make sure ID is cleared otherwise
   9118					 * dst_reg min/max could be incorrectly
   9119					 * propagated into src_reg by find_equal_scalars()
   9120					 */
   9121					dst_reg->id = 0;
   9122					dst_reg->live |= REG_LIVE_WRITTEN;
   9123					dst_reg->subreg_def = env->insn_idx + 1;
   9124				} else {
   9125					mark_reg_unknown(env, regs,
   9126							 insn->dst_reg);
   9127				}
   9128				zext_32_to_64(dst_reg);
   9129				reg_bounds_sync(dst_reg);
   9130			}
   9131		} else {
   9132			/* case: R = imm
   9133			 * remember the value we stored into this reg
   9134			 */
   9135			/* clear any state __mark_reg_known doesn't set */
   9136			mark_reg_unknown(env, regs, insn->dst_reg);
   9137			regs[insn->dst_reg].type = SCALAR_VALUE;
   9138			if (BPF_CLASS(insn->code) == BPF_ALU64) {
   9139				__mark_reg_known(regs + insn->dst_reg,
   9140						 insn->imm);
   9141			} else {
   9142				__mark_reg_known(regs + insn->dst_reg,
   9143						 (u32)insn->imm);
   9144			}
   9145		}
   9146
   9147	} else if (opcode > BPF_END) {
   9148		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
   9149		return -EINVAL;
   9150
   9151	} else {	/* all other ALU ops: and, sub, xor, add, ... */
   9152
   9153		if (BPF_SRC(insn->code) == BPF_X) {
   9154			if (insn->imm != 0 || insn->off != 0) {
   9155				verbose(env, "BPF_ALU uses reserved fields\n");
   9156				return -EINVAL;
   9157			}
   9158			/* check src1 operand */
   9159			err = check_reg_arg(env, insn->src_reg, SRC_OP);
   9160			if (err)
   9161				return err;
   9162		} else {
   9163			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
   9164				verbose(env, "BPF_ALU uses reserved fields\n");
   9165				return -EINVAL;
   9166			}
   9167		}
   9168
   9169		/* check src2 operand */
   9170		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
   9171		if (err)
   9172			return err;
   9173
   9174		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
   9175		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
   9176			verbose(env, "div by zero\n");
   9177			return -EINVAL;
   9178		}
   9179
   9180		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
   9181		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
   9182			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
   9183
   9184			if (insn->imm < 0 || insn->imm >= size) {
   9185				verbose(env, "invalid shift %d\n", insn->imm);
   9186				return -EINVAL;
   9187			}
   9188		}
   9189
   9190		/* check dest operand */
   9191		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
   9192		if (err)
   9193			return err;
   9194
   9195		return adjust_reg_min_max_vals(env, insn);
   9196	}
   9197
   9198	return 0;
   9199}
   9200
   9201static void __find_good_pkt_pointers(struct bpf_func_state *state,
   9202				     struct bpf_reg_state *dst_reg,
   9203				     enum bpf_reg_type type, int new_range)
   9204{
   9205	struct bpf_reg_state *reg;
   9206	int i;
   9207
   9208	for (i = 0; i < MAX_BPF_REG; i++) {
   9209		reg = &state->regs[i];
   9210		if (reg->type == type && reg->id == dst_reg->id)
   9211			/* keep the maximum range already checked */
   9212			reg->range = max(reg->range, new_range);
   9213	}
   9214
   9215	bpf_for_each_spilled_reg(i, state, reg) {
   9216		if (!reg)
   9217			continue;
   9218		if (reg->type == type && reg->id == dst_reg->id)
   9219			reg->range = max(reg->range, new_range);
   9220	}
   9221}
   9222
   9223static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
   9224				   struct bpf_reg_state *dst_reg,
   9225				   enum bpf_reg_type type,
   9226				   bool range_right_open)
   9227{
   9228	int new_range, i;
   9229
   9230	if (dst_reg->off < 0 ||
   9231	    (dst_reg->off == 0 && range_right_open))
   9232		/* This doesn't give us any range */
   9233		return;
   9234
   9235	if (dst_reg->umax_value > MAX_PACKET_OFF ||
   9236	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
   9237		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
   9238		 * than pkt_end, but that's because it's also less than pkt.
   9239		 */
   9240		return;
   9241
   9242	new_range = dst_reg->off;
   9243	if (range_right_open)
   9244		new_range++;
   9245
   9246	/* Examples for register markings:
   9247	 *
   9248	 * pkt_data in dst register:
   9249	 *
   9250	 *   r2 = r3;
   9251	 *   r2 += 8;
   9252	 *   if (r2 > pkt_end) goto <handle exception>
   9253	 *   <access okay>
   9254	 *
   9255	 *   r2 = r3;
   9256	 *   r2 += 8;
   9257	 *   if (r2 < pkt_end) goto <access okay>
   9258	 *   <handle exception>
   9259	 *
   9260	 *   Where:
   9261	 *     r2 == dst_reg, pkt_end == src_reg
   9262	 *     r2=pkt(id=n,off=8,r=0)
   9263	 *     r3=pkt(id=n,off=0,r=0)
   9264	 *
   9265	 * pkt_data in src register:
   9266	 *
   9267	 *   r2 = r3;
   9268	 *   r2 += 8;
   9269	 *   if (pkt_end >= r2) goto <access okay>
   9270	 *   <handle exception>
   9271	 *
   9272	 *   r2 = r3;
   9273	 *   r2 += 8;
   9274	 *   if (pkt_end <= r2) goto <handle exception>
   9275	 *   <access okay>
   9276	 *
   9277	 *   Where:
   9278	 *     pkt_end == dst_reg, r2 == src_reg
   9279	 *     r2=pkt(id=n,off=8,r=0)
   9280	 *     r3=pkt(id=n,off=0,r=0)
   9281	 *
   9282	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
   9283	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
   9284	 * and [r3, r3 + 8-1) respectively is safe to access depending on
   9285	 * the check.
   9286	 */
   9287
   9288	/* If our ids match, then we must have the same max_value.  And we
   9289	 * don't care about the other reg's fixed offset, since if it's too big
   9290	 * the range won't allow anything.
   9291	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
   9292	 */
   9293	for (i = 0; i <= vstate->curframe; i++)
   9294		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
   9295					 new_range);
   9296}
   9297
   9298static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
   9299{
   9300	struct tnum subreg = tnum_subreg(reg->var_off);
   9301	s32 sval = (s32)val;
   9302
   9303	switch (opcode) {
   9304	case BPF_JEQ:
   9305		if (tnum_is_const(subreg))
   9306			return !!tnum_equals_const(subreg, val);
   9307		break;
   9308	case BPF_JNE:
   9309		if (tnum_is_const(subreg))
   9310			return !tnum_equals_const(subreg, val);
   9311		break;
   9312	case BPF_JSET:
   9313		if ((~subreg.mask & subreg.value) & val)
   9314			return 1;
   9315		if (!((subreg.mask | subreg.value) & val))
   9316			return 0;
   9317		break;
   9318	case BPF_JGT:
   9319		if (reg->u32_min_value > val)
   9320			return 1;
   9321		else if (reg->u32_max_value <= val)
   9322			return 0;
   9323		break;
   9324	case BPF_JSGT:
   9325		if (reg->s32_min_value > sval)
   9326			return 1;
   9327		else if (reg->s32_max_value <= sval)
   9328			return 0;
   9329		break;
   9330	case BPF_JLT:
   9331		if (reg->u32_max_value < val)
   9332			return 1;
   9333		else if (reg->u32_min_value >= val)
   9334			return 0;
   9335		break;
   9336	case BPF_JSLT:
   9337		if (reg->s32_max_value < sval)
   9338			return 1;
   9339		else if (reg->s32_min_value >= sval)
   9340			return 0;
   9341		break;
   9342	case BPF_JGE:
   9343		if (reg->u32_min_value >= val)
   9344			return 1;
   9345		else if (reg->u32_max_value < val)
   9346			return 0;
   9347		break;
   9348	case BPF_JSGE:
   9349		if (reg->s32_min_value >= sval)
   9350			return 1;
   9351		else if (reg->s32_max_value < sval)
   9352			return 0;
   9353		break;
   9354	case BPF_JLE:
   9355		if (reg->u32_max_value <= val)
   9356			return 1;
   9357		else if (reg->u32_min_value > val)
   9358			return 0;
   9359		break;
   9360	case BPF_JSLE:
   9361		if (reg->s32_max_value <= sval)
   9362			return 1;
   9363		else if (reg->s32_min_value > sval)
   9364			return 0;
   9365		break;
   9366	}
   9367
   9368	return -1;
   9369}
   9370
   9371
   9372static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
   9373{
   9374	s64 sval = (s64)val;
   9375
   9376	switch (opcode) {
   9377	case BPF_JEQ:
   9378		if (tnum_is_const(reg->var_off))
   9379			return !!tnum_equals_const(reg->var_off, val);
   9380		break;
   9381	case BPF_JNE:
   9382		if (tnum_is_const(reg->var_off))
   9383			return !tnum_equals_const(reg->var_off, val);
   9384		break;
   9385	case BPF_JSET:
   9386		if ((~reg->var_off.mask & reg->var_off.value) & val)
   9387			return 1;
   9388		if (!((reg->var_off.mask | reg->var_off.value) & val))
   9389			return 0;
   9390		break;
   9391	case BPF_JGT:
   9392		if (reg->umin_value > val)
   9393			return 1;
   9394		else if (reg->umax_value <= val)
   9395			return 0;
   9396		break;
   9397	case BPF_JSGT:
   9398		if (reg->smin_value > sval)
   9399			return 1;
   9400		else if (reg->smax_value <= sval)
   9401			return 0;
   9402		break;
   9403	case BPF_JLT:
   9404		if (reg->umax_value < val)
   9405			return 1;
   9406		else if (reg->umin_value >= val)
   9407			return 0;
   9408		break;
   9409	case BPF_JSLT:
   9410		if (reg->smax_value < sval)
   9411			return 1;
   9412		else if (reg->smin_value >= sval)
   9413			return 0;
   9414		break;
   9415	case BPF_JGE:
   9416		if (reg->umin_value >= val)
   9417			return 1;
   9418		else if (reg->umax_value < val)
   9419			return 0;
   9420		break;
   9421	case BPF_JSGE:
   9422		if (reg->smin_value >= sval)
   9423			return 1;
   9424		else if (reg->smax_value < sval)
   9425			return 0;
   9426		break;
   9427	case BPF_JLE:
   9428		if (reg->umax_value <= val)
   9429			return 1;
   9430		else if (reg->umin_value > val)
   9431			return 0;
   9432		break;
   9433	case BPF_JSLE:
   9434		if (reg->smax_value <= sval)
   9435			return 1;
   9436		else if (reg->smin_value > sval)
   9437			return 0;
   9438		break;
   9439	}
   9440
   9441	return -1;
   9442}
   9443
   9444/* compute branch direction of the expression "if (reg opcode val) goto target;"
   9445 * and return:
   9446 *  1 - branch will be taken and "goto target" will be executed
   9447 *  0 - branch will not be taken and fall-through to next insn
   9448 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
   9449 *      range [0,10]
   9450 */
   9451static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
   9452			   bool is_jmp32)
   9453{
   9454	if (__is_pointer_value(false, reg)) {
   9455		if (!reg_type_not_null(reg->type))
   9456			return -1;
   9457
   9458		/* If pointer is valid tests against zero will fail so we can
   9459		 * use this to direct branch taken.
   9460		 */
   9461		if (val != 0)
   9462			return -1;
   9463
   9464		switch (opcode) {
   9465		case BPF_JEQ:
   9466			return 0;
   9467		case BPF_JNE:
   9468			return 1;
   9469		default:
   9470			return -1;
   9471		}
   9472	}
   9473
   9474	if (is_jmp32)
   9475		return is_branch32_taken(reg, val, opcode);
   9476	return is_branch64_taken(reg, val, opcode);
   9477}
   9478
   9479static int flip_opcode(u32 opcode)
   9480{
   9481	/* How can we transform "a <op> b" into "b <op> a"? */
   9482	static const u8 opcode_flip[16] = {
   9483		/* these stay the same */
   9484		[BPF_JEQ  >> 4] = BPF_JEQ,
   9485		[BPF_JNE  >> 4] = BPF_JNE,
   9486		[BPF_JSET >> 4] = BPF_JSET,
   9487		/* these swap "lesser" and "greater" (L and G in the opcodes) */
   9488		[BPF_JGE  >> 4] = BPF_JLE,
   9489		[BPF_JGT  >> 4] = BPF_JLT,
   9490		[BPF_JLE  >> 4] = BPF_JGE,
   9491		[BPF_JLT  >> 4] = BPF_JGT,
   9492		[BPF_JSGE >> 4] = BPF_JSLE,
   9493		[BPF_JSGT >> 4] = BPF_JSLT,
   9494		[BPF_JSLE >> 4] = BPF_JSGE,
   9495		[BPF_JSLT >> 4] = BPF_JSGT
   9496	};
   9497	return opcode_flip[opcode >> 4];
   9498}
   9499
   9500static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
   9501				   struct bpf_reg_state *src_reg,
   9502				   u8 opcode)
   9503{
   9504	struct bpf_reg_state *pkt;
   9505
   9506	if (src_reg->type == PTR_TO_PACKET_END) {
   9507		pkt = dst_reg;
   9508	} else if (dst_reg->type == PTR_TO_PACKET_END) {
   9509		pkt = src_reg;
   9510		opcode = flip_opcode(opcode);
   9511	} else {
   9512		return -1;
   9513	}
   9514
   9515	if (pkt->range >= 0)
   9516		return -1;
   9517
   9518	switch (opcode) {
   9519	case BPF_JLE:
   9520		/* pkt <= pkt_end */
   9521		fallthrough;
   9522	case BPF_JGT:
   9523		/* pkt > pkt_end */
   9524		if (pkt->range == BEYOND_PKT_END)
   9525			/* pkt has at last one extra byte beyond pkt_end */
   9526			return opcode == BPF_JGT;
   9527		break;
   9528	case BPF_JLT:
   9529		/* pkt < pkt_end */
   9530		fallthrough;
   9531	case BPF_JGE:
   9532		/* pkt >= pkt_end */
   9533		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
   9534			return opcode == BPF_JGE;
   9535		break;
   9536	}
   9537	return -1;
   9538}
   9539
   9540/* Adjusts the register min/max values in the case that the dst_reg is the
   9541 * variable register that we are working on, and src_reg is a constant or we're
   9542 * simply doing a BPF_K check.
   9543 * In JEQ/JNE cases we also adjust the var_off values.
   9544 */
   9545static void reg_set_min_max(struct bpf_reg_state *true_reg,
   9546			    struct bpf_reg_state *false_reg,
   9547			    u64 val, u32 val32,
   9548			    u8 opcode, bool is_jmp32)
   9549{
   9550	struct tnum false_32off = tnum_subreg(false_reg->var_off);
   9551	struct tnum false_64off = false_reg->var_off;
   9552	struct tnum true_32off = tnum_subreg(true_reg->var_off);
   9553	struct tnum true_64off = true_reg->var_off;
   9554	s64 sval = (s64)val;
   9555	s32 sval32 = (s32)val32;
   9556
   9557	/* If the dst_reg is a pointer, we can't learn anything about its
   9558	 * variable offset from the compare (unless src_reg were a pointer into
   9559	 * the same object, but we don't bother with that.
   9560	 * Since false_reg and true_reg have the same type by construction, we
   9561	 * only need to check one of them for pointerness.
   9562	 */
   9563	if (__is_pointer_value(false, false_reg))
   9564		return;
   9565
   9566	switch (opcode) {
   9567	/* JEQ/JNE comparison doesn't change the register equivalence.
   9568	 *
   9569	 * r1 = r2;
   9570	 * if (r1 == 42) goto label;
   9571	 * ...
   9572	 * label: // here both r1 and r2 are known to be 42.
   9573	 *
   9574	 * Hence when marking register as known preserve it's ID.
   9575	 */
   9576	case BPF_JEQ:
   9577		if (is_jmp32) {
   9578			__mark_reg32_known(true_reg, val32);
   9579			true_32off = tnum_subreg(true_reg->var_off);
   9580		} else {
   9581			___mark_reg_known(true_reg, val);
   9582			true_64off = true_reg->var_off;
   9583		}
   9584		break;
   9585	case BPF_JNE:
   9586		if (is_jmp32) {
   9587			__mark_reg32_known(false_reg, val32);
   9588			false_32off = tnum_subreg(false_reg->var_off);
   9589		} else {
   9590			___mark_reg_known(false_reg, val);
   9591			false_64off = false_reg->var_off;
   9592		}
   9593		break;
   9594	case BPF_JSET:
   9595		if (is_jmp32) {
   9596			false_32off = tnum_and(false_32off, tnum_const(~val32));
   9597			if (is_power_of_2(val32))
   9598				true_32off = tnum_or(true_32off,
   9599						     tnum_const(val32));
   9600		} else {
   9601			false_64off = tnum_and(false_64off, tnum_const(~val));
   9602			if (is_power_of_2(val))
   9603				true_64off = tnum_or(true_64off,
   9604						     tnum_const(val));
   9605		}
   9606		break;
   9607	case BPF_JGE:
   9608	case BPF_JGT:
   9609	{
   9610		if (is_jmp32) {
   9611			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
   9612			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
   9613
   9614			false_reg->u32_max_value = min(false_reg->u32_max_value,
   9615						       false_umax);
   9616			true_reg->u32_min_value = max(true_reg->u32_min_value,
   9617						      true_umin);
   9618		} else {
   9619			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
   9620			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
   9621
   9622			false_reg->umax_value = min(false_reg->umax_value, false_umax);
   9623			true_reg->umin_value = max(true_reg->umin_value, true_umin);
   9624		}
   9625		break;
   9626	}
   9627	case BPF_JSGE:
   9628	case BPF_JSGT:
   9629	{
   9630		if (is_jmp32) {
   9631			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
   9632			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
   9633
   9634			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
   9635			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
   9636		} else {
   9637			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
   9638			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
   9639
   9640			false_reg->smax_value = min(false_reg->smax_value, false_smax);
   9641			true_reg->smin_value = max(true_reg->smin_value, true_smin);
   9642		}
   9643		break;
   9644	}
   9645	case BPF_JLE:
   9646	case BPF_JLT:
   9647	{
   9648		if (is_jmp32) {
   9649			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
   9650			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
   9651
   9652			false_reg->u32_min_value = max(false_reg->u32_min_value,
   9653						       false_umin);
   9654			true_reg->u32_max_value = min(true_reg->u32_max_value,
   9655						      true_umax);
   9656		} else {
   9657			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
   9658			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
   9659
   9660			false_reg->umin_value = max(false_reg->umin_value, false_umin);
   9661			true_reg->umax_value = min(true_reg->umax_value, true_umax);
   9662		}
   9663		break;
   9664	}
   9665	case BPF_JSLE:
   9666	case BPF_JSLT:
   9667	{
   9668		if (is_jmp32) {
   9669			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
   9670			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
   9671
   9672			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
   9673			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
   9674		} else {
   9675			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
   9676			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
   9677
   9678			false_reg->smin_value = max(false_reg->smin_value, false_smin);
   9679			true_reg->smax_value = min(true_reg->smax_value, true_smax);
   9680		}
   9681		break;
   9682	}
   9683	default:
   9684		return;
   9685	}
   9686
   9687	if (is_jmp32) {
   9688		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
   9689					     tnum_subreg(false_32off));
   9690		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
   9691					    tnum_subreg(true_32off));
   9692		__reg_combine_32_into_64(false_reg);
   9693		__reg_combine_32_into_64(true_reg);
   9694	} else {
   9695		false_reg->var_off = false_64off;
   9696		true_reg->var_off = true_64off;
   9697		__reg_combine_64_into_32(false_reg);
   9698		__reg_combine_64_into_32(true_reg);
   9699	}
   9700}
   9701
   9702/* Same as above, but for the case that dst_reg holds a constant and src_reg is
   9703 * the variable reg.
   9704 */
   9705static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
   9706				struct bpf_reg_state *false_reg,
   9707				u64 val, u32 val32,
   9708				u8 opcode, bool is_jmp32)
   9709{
   9710	opcode = flip_opcode(opcode);
   9711	/* This uses zero as "not present in table"; luckily the zero opcode,
   9712	 * BPF_JA, can't get here.
   9713	 */
   9714	if (opcode)
   9715		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
   9716}
   9717
   9718/* Regs are known to be equal, so intersect their min/max/var_off */
   9719static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
   9720				  struct bpf_reg_state *dst_reg)
   9721{
   9722	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
   9723							dst_reg->umin_value);
   9724	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
   9725							dst_reg->umax_value);
   9726	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
   9727							dst_reg->smin_value);
   9728	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
   9729							dst_reg->smax_value);
   9730	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
   9731							     dst_reg->var_off);
   9732	reg_bounds_sync(src_reg);
   9733	reg_bounds_sync(dst_reg);
   9734}
   9735
   9736static void reg_combine_min_max(struct bpf_reg_state *true_src,
   9737				struct bpf_reg_state *true_dst,
   9738				struct bpf_reg_state *false_src,
   9739				struct bpf_reg_state *false_dst,
   9740				u8 opcode)
   9741{
   9742	switch (opcode) {
   9743	case BPF_JEQ:
   9744		__reg_combine_min_max(true_src, true_dst);
   9745		break;
   9746	case BPF_JNE:
   9747		__reg_combine_min_max(false_src, false_dst);
   9748		break;
   9749	}
   9750}
   9751
   9752static void mark_ptr_or_null_reg(struct bpf_func_state *state,
   9753				 struct bpf_reg_state *reg, u32 id,
   9754				 bool is_null)
   9755{
   9756	if (type_may_be_null(reg->type) && reg->id == id &&
   9757	    !WARN_ON_ONCE(!reg->id)) {
   9758		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
   9759				 !tnum_equals_const(reg->var_off, 0) ||
   9760				 reg->off)) {
   9761			/* Old offset (both fixed and variable parts) should
   9762			 * have been known-zero, because we don't allow pointer
   9763			 * arithmetic on pointers that might be NULL. If we
   9764			 * see this happening, don't convert the register.
   9765			 */
   9766			return;
   9767		}
   9768		if (is_null) {
   9769			reg->type = SCALAR_VALUE;
   9770			/* We don't need id and ref_obj_id from this point
   9771			 * onwards anymore, thus we should better reset it,
   9772			 * so that state pruning has chances to take effect.
   9773			 */
   9774			reg->id = 0;
   9775			reg->ref_obj_id = 0;
   9776
   9777			return;
   9778		}
   9779
   9780		mark_ptr_not_null_reg(reg);
   9781
   9782		if (!reg_may_point_to_spin_lock(reg)) {
   9783			/* For not-NULL ptr, reg->ref_obj_id will be reset
   9784			 * in release_reg_references().
   9785			 *
   9786			 * reg->id is still used by spin_lock ptr. Other
   9787			 * than spin_lock ptr type, reg->id can be reset.
   9788			 */
   9789			reg->id = 0;
   9790		}
   9791	}
   9792}
   9793
   9794static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
   9795				    bool is_null)
   9796{
   9797	struct bpf_reg_state *reg;
   9798	int i;
   9799
   9800	for (i = 0; i < MAX_BPF_REG; i++)
   9801		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
   9802
   9803	bpf_for_each_spilled_reg(i, state, reg) {
   9804		if (!reg)
   9805			continue;
   9806		mark_ptr_or_null_reg(state, reg, id, is_null);
   9807	}
   9808}
   9809
   9810/* The logic is similar to find_good_pkt_pointers(), both could eventually
   9811 * be folded together at some point.
   9812 */
   9813static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
   9814				  bool is_null)
   9815{
   9816	struct bpf_func_state *state = vstate->frame[vstate->curframe];
   9817	struct bpf_reg_state *regs = state->regs;
   9818	u32 ref_obj_id = regs[regno].ref_obj_id;
   9819	u32 id = regs[regno].id;
   9820	int i;
   9821
   9822	if (ref_obj_id && ref_obj_id == id && is_null)
   9823		/* regs[regno] is in the " == NULL" branch.
   9824		 * No one could have freed the reference state before
   9825		 * doing the NULL check.
   9826		 */
   9827		WARN_ON_ONCE(release_reference_state(state, id));
   9828
   9829	for (i = 0; i <= vstate->curframe; i++)
   9830		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
   9831}
   9832
   9833static bool try_match_pkt_pointers(const struct bpf_insn *insn,
   9834				   struct bpf_reg_state *dst_reg,
   9835				   struct bpf_reg_state *src_reg,
   9836				   struct bpf_verifier_state *this_branch,
   9837				   struct bpf_verifier_state *other_branch)
   9838{
   9839	if (BPF_SRC(insn->code) != BPF_X)
   9840		return false;
   9841
   9842	/* Pointers are always 64-bit. */
   9843	if (BPF_CLASS(insn->code) == BPF_JMP32)
   9844		return false;
   9845
   9846	switch (BPF_OP(insn->code)) {
   9847	case BPF_JGT:
   9848		if ((dst_reg->type == PTR_TO_PACKET &&
   9849		     src_reg->type == PTR_TO_PACKET_END) ||
   9850		    (dst_reg->type == PTR_TO_PACKET_META &&
   9851		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
   9852			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
   9853			find_good_pkt_pointers(this_branch, dst_reg,
   9854					       dst_reg->type, false);
   9855			mark_pkt_end(other_branch, insn->dst_reg, true);
   9856		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
   9857			    src_reg->type == PTR_TO_PACKET) ||
   9858			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
   9859			    src_reg->type == PTR_TO_PACKET_META)) {
   9860			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
   9861			find_good_pkt_pointers(other_branch, src_reg,
   9862					       src_reg->type, true);
   9863			mark_pkt_end(this_branch, insn->src_reg, false);
   9864		} else {
   9865			return false;
   9866		}
   9867		break;
   9868	case BPF_JLT:
   9869		if ((dst_reg->type == PTR_TO_PACKET &&
   9870		     src_reg->type == PTR_TO_PACKET_END) ||
   9871		    (dst_reg->type == PTR_TO_PACKET_META &&
   9872		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
   9873			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
   9874			find_good_pkt_pointers(other_branch, dst_reg,
   9875					       dst_reg->type, true);
   9876			mark_pkt_end(this_branch, insn->dst_reg, false);
   9877		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
   9878			    src_reg->type == PTR_TO_PACKET) ||
   9879			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
   9880			    src_reg->type == PTR_TO_PACKET_META)) {
   9881			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
   9882			find_good_pkt_pointers(this_branch, src_reg,
   9883					       src_reg->type, false);
   9884			mark_pkt_end(other_branch, insn->src_reg, true);
   9885		} else {
   9886			return false;
   9887		}
   9888		break;
   9889	case BPF_JGE:
   9890		if ((dst_reg->type == PTR_TO_PACKET &&
   9891		     src_reg->type == PTR_TO_PACKET_END) ||
   9892		    (dst_reg->type == PTR_TO_PACKET_META &&
   9893		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
   9894			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
   9895			find_good_pkt_pointers(this_branch, dst_reg,
   9896					       dst_reg->type, true);
   9897			mark_pkt_end(other_branch, insn->dst_reg, false);
   9898		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
   9899			    src_reg->type == PTR_TO_PACKET) ||
   9900			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
   9901			    src_reg->type == PTR_TO_PACKET_META)) {
   9902			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
   9903			find_good_pkt_pointers(other_branch, src_reg,
   9904					       src_reg->type, false);
   9905			mark_pkt_end(this_branch, insn->src_reg, true);
   9906		} else {
   9907			return false;
   9908		}
   9909		break;
   9910	case BPF_JLE:
   9911		if ((dst_reg->type == PTR_TO_PACKET &&
   9912		     src_reg->type == PTR_TO_PACKET_END) ||
   9913		    (dst_reg->type == PTR_TO_PACKET_META &&
   9914		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
   9915			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
   9916			find_good_pkt_pointers(other_branch, dst_reg,
   9917					       dst_reg->type, false);
   9918			mark_pkt_end(this_branch, insn->dst_reg, true);
   9919		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
   9920			    src_reg->type == PTR_TO_PACKET) ||
   9921			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
   9922			    src_reg->type == PTR_TO_PACKET_META)) {
   9923			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
   9924			find_good_pkt_pointers(this_branch, src_reg,
   9925					       src_reg->type, true);
   9926			mark_pkt_end(other_branch, insn->src_reg, false);
   9927		} else {
   9928			return false;
   9929		}
   9930		break;
   9931	default:
   9932		return false;
   9933	}
   9934
   9935	return true;
   9936}
   9937
   9938static void find_equal_scalars(struct bpf_verifier_state *vstate,
   9939			       struct bpf_reg_state *known_reg)
   9940{
   9941	struct bpf_func_state *state;
   9942	struct bpf_reg_state *reg;
   9943	int i, j;
   9944
   9945	for (i = 0; i <= vstate->curframe; i++) {
   9946		state = vstate->frame[i];
   9947		for (j = 0; j < MAX_BPF_REG; j++) {
   9948			reg = &state->regs[j];
   9949			if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
   9950				*reg = *known_reg;
   9951		}
   9952
   9953		bpf_for_each_spilled_reg(j, state, reg) {
   9954			if (!reg)
   9955				continue;
   9956			if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
   9957				*reg = *known_reg;
   9958		}
   9959	}
   9960}
   9961
   9962static int check_cond_jmp_op(struct bpf_verifier_env *env,
   9963			     struct bpf_insn *insn, int *insn_idx)
   9964{
   9965	struct bpf_verifier_state *this_branch = env->cur_state;
   9966	struct bpf_verifier_state *other_branch;
   9967	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
   9968	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
   9969	u8 opcode = BPF_OP(insn->code);
   9970	bool is_jmp32;
   9971	int pred = -1;
   9972	int err;
   9973
   9974	/* Only conditional jumps are expected to reach here. */
   9975	if (opcode == BPF_JA || opcode > BPF_JSLE) {
   9976		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
   9977		return -EINVAL;
   9978	}
   9979
   9980	if (BPF_SRC(insn->code) == BPF_X) {
   9981		if (insn->imm != 0) {
   9982			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
   9983			return -EINVAL;
   9984		}
   9985
   9986		/* check src1 operand */
   9987		err = check_reg_arg(env, insn->src_reg, SRC_OP);
   9988		if (err)
   9989			return err;
   9990
   9991		if (is_pointer_value(env, insn->src_reg)) {
   9992			verbose(env, "R%d pointer comparison prohibited\n",
   9993				insn->src_reg);
   9994			return -EACCES;
   9995		}
   9996		src_reg = &regs[insn->src_reg];
   9997	} else {
   9998		if (insn->src_reg != BPF_REG_0) {
   9999			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
  10000			return -EINVAL;
  10001		}
  10002	}
  10003
  10004	/* check src2 operand */
  10005	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  10006	if (err)
  10007		return err;
  10008
  10009	dst_reg = &regs[insn->dst_reg];
  10010	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
  10011
  10012	if (BPF_SRC(insn->code) == BPF_K) {
  10013		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
  10014	} else if (src_reg->type == SCALAR_VALUE &&
  10015		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
  10016		pred = is_branch_taken(dst_reg,
  10017				       tnum_subreg(src_reg->var_off).value,
  10018				       opcode,
  10019				       is_jmp32);
  10020	} else if (src_reg->type == SCALAR_VALUE &&
  10021		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
  10022		pred = is_branch_taken(dst_reg,
  10023				       src_reg->var_off.value,
  10024				       opcode,
  10025				       is_jmp32);
  10026	} else if (reg_is_pkt_pointer_any(dst_reg) &&
  10027		   reg_is_pkt_pointer_any(src_reg) &&
  10028		   !is_jmp32) {
  10029		pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
  10030	}
  10031
  10032	if (pred >= 0) {
  10033		/* If we get here with a dst_reg pointer type it is because
  10034		 * above is_branch_taken() special cased the 0 comparison.
  10035		 */
  10036		if (!__is_pointer_value(false, dst_reg))
  10037			err = mark_chain_precision(env, insn->dst_reg);
  10038		if (BPF_SRC(insn->code) == BPF_X && !err &&
  10039		    !__is_pointer_value(false, src_reg))
  10040			err = mark_chain_precision(env, insn->src_reg);
  10041		if (err)
  10042			return err;
  10043	}
  10044
  10045	if (pred == 1) {
  10046		/* Only follow the goto, ignore fall-through. If needed, push
  10047		 * the fall-through branch for simulation under speculative
  10048		 * execution.
  10049		 */
  10050		if (!env->bypass_spec_v1 &&
  10051		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
  10052					       *insn_idx))
  10053			return -EFAULT;
  10054		*insn_idx += insn->off;
  10055		return 0;
  10056	} else if (pred == 0) {
  10057		/* Only follow the fall-through branch, since that's where the
  10058		 * program will go. If needed, push the goto branch for
  10059		 * simulation under speculative execution.
  10060		 */
  10061		if (!env->bypass_spec_v1 &&
  10062		    !sanitize_speculative_path(env, insn,
  10063					       *insn_idx + insn->off + 1,
  10064					       *insn_idx))
  10065			return -EFAULT;
  10066		return 0;
  10067	}
  10068
  10069	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
  10070				  false);
  10071	if (!other_branch)
  10072		return -EFAULT;
  10073	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
  10074
  10075	/* detect if we are comparing against a constant value so we can adjust
  10076	 * our min/max values for our dst register.
  10077	 * this is only legit if both are scalars (or pointers to the same
  10078	 * object, I suppose, but we don't support that right now), because
  10079	 * otherwise the different base pointers mean the offsets aren't
  10080	 * comparable.
  10081	 */
  10082	if (BPF_SRC(insn->code) == BPF_X) {
  10083		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
  10084
  10085		if (dst_reg->type == SCALAR_VALUE &&
  10086		    src_reg->type == SCALAR_VALUE) {
  10087			if (tnum_is_const(src_reg->var_off) ||
  10088			    (is_jmp32 &&
  10089			     tnum_is_const(tnum_subreg(src_reg->var_off))))
  10090				reg_set_min_max(&other_branch_regs[insn->dst_reg],
  10091						dst_reg,
  10092						src_reg->var_off.value,
  10093						tnum_subreg(src_reg->var_off).value,
  10094						opcode, is_jmp32);
  10095			else if (tnum_is_const(dst_reg->var_off) ||
  10096				 (is_jmp32 &&
  10097				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
  10098				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
  10099						    src_reg,
  10100						    dst_reg->var_off.value,
  10101						    tnum_subreg(dst_reg->var_off).value,
  10102						    opcode, is_jmp32);
  10103			else if (!is_jmp32 &&
  10104				 (opcode == BPF_JEQ || opcode == BPF_JNE))
  10105				/* Comparing for equality, we can combine knowledge */
  10106				reg_combine_min_max(&other_branch_regs[insn->src_reg],
  10107						    &other_branch_regs[insn->dst_reg],
  10108						    src_reg, dst_reg, opcode);
  10109			if (src_reg->id &&
  10110			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
  10111				find_equal_scalars(this_branch, src_reg);
  10112				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
  10113			}
  10114
  10115		}
  10116	} else if (dst_reg->type == SCALAR_VALUE) {
  10117		reg_set_min_max(&other_branch_regs[insn->dst_reg],
  10118					dst_reg, insn->imm, (u32)insn->imm,
  10119					opcode, is_jmp32);
  10120	}
  10121
  10122	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
  10123	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
  10124		find_equal_scalars(this_branch, dst_reg);
  10125		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
  10126	}
  10127
  10128	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
  10129	 * NOTE: these optimizations below are related with pointer comparison
  10130	 *       which will never be JMP32.
  10131	 */
  10132	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
  10133	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
  10134	    type_may_be_null(dst_reg->type)) {
  10135		/* Mark all identical registers in each branch as either
  10136		 * safe or unknown depending R == 0 or R != 0 conditional.
  10137		 */
  10138		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
  10139				      opcode == BPF_JNE);
  10140		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
  10141				      opcode == BPF_JEQ);
  10142	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
  10143					   this_branch, other_branch) &&
  10144		   is_pointer_value(env, insn->dst_reg)) {
  10145		verbose(env, "R%d pointer comparison prohibited\n",
  10146			insn->dst_reg);
  10147		return -EACCES;
  10148	}
  10149	if (env->log.level & BPF_LOG_LEVEL)
  10150		print_insn_state(env, this_branch->frame[this_branch->curframe]);
  10151	return 0;
  10152}
  10153
  10154/* verify BPF_LD_IMM64 instruction */
  10155static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
  10156{
  10157	struct bpf_insn_aux_data *aux = cur_aux(env);
  10158	struct bpf_reg_state *regs = cur_regs(env);
  10159	struct bpf_reg_state *dst_reg;
  10160	struct bpf_map *map;
  10161	int err;
  10162
  10163	if (BPF_SIZE(insn->code) != BPF_DW) {
  10164		verbose(env, "invalid BPF_LD_IMM insn\n");
  10165		return -EINVAL;
  10166	}
  10167	if (insn->off != 0) {
  10168		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
  10169		return -EINVAL;
  10170	}
  10171
  10172	err = check_reg_arg(env, insn->dst_reg, DST_OP);
  10173	if (err)
  10174		return err;
  10175
  10176	dst_reg = &regs[insn->dst_reg];
  10177	if (insn->src_reg == 0) {
  10178		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
  10179
  10180		dst_reg->type = SCALAR_VALUE;
  10181		__mark_reg_known(&regs[insn->dst_reg], imm);
  10182		return 0;
  10183	}
  10184
  10185	/* All special src_reg cases are listed below. From this point onwards
  10186	 * we either succeed and assign a corresponding dst_reg->type after
  10187	 * zeroing the offset, or fail and reject the program.
  10188	 */
  10189	mark_reg_known_zero(env, regs, insn->dst_reg);
  10190
  10191	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
  10192		dst_reg->type = aux->btf_var.reg_type;
  10193		switch (base_type(dst_reg->type)) {
  10194		case PTR_TO_MEM:
  10195			dst_reg->mem_size = aux->btf_var.mem_size;
  10196			break;
  10197		case PTR_TO_BTF_ID:
  10198			dst_reg->btf = aux->btf_var.btf;
  10199			dst_reg->btf_id = aux->btf_var.btf_id;
  10200			break;
  10201		default:
  10202			verbose(env, "bpf verifier is misconfigured\n");
  10203			return -EFAULT;
  10204		}
  10205		return 0;
  10206	}
  10207
  10208	if (insn->src_reg == BPF_PSEUDO_FUNC) {
  10209		struct bpf_prog_aux *aux = env->prog->aux;
  10210		u32 subprogno = find_subprog(env,
  10211					     env->insn_idx + insn->imm + 1);
  10212
  10213		if (!aux->func_info) {
  10214			verbose(env, "missing btf func_info\n");
  10215			return -EINVAL;
  10216		}
  10217		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
  10218			verbose(env, "callback function not static\n");
  10219			return -EINVAL;
  10220		}
  10221
  10222		dst_reg->type = PTR_TO_FUNC;
  10223		dst_reg->subprogno = subprogno;
  10224		return 0;
  10225	}
  10226
  10227	map = env->used_maps[aux->map_index];
  10228	dst_reg->map_ptr = map;
  10229
  10230	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
  10231	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
  10232		dst_reg->type = PTR_TO_MAP_VALUE;
  10233		dst_reg->off = aux->map_off;
  10234		if (map_value_has_spin_lock(map))
  10235			dst_reg->id = ++env->id_gen;
  10236	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
  10237		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
  10238		dst_reg->type = CONST_PTR_TO_MAP;
  10239	} else {
  10240		verbose(env, "bpf verifier is misconfigured\n");
  10241		return -EINVAL;
  10242	}
  10243
  10244	return 0;
  10245}
  10246
  10247static bool may_access_skb(enum bpf_prog_type type)
  10248{
  10249	switch (type) {
  10250	case BPF_PROG_TYPE_SOCKET_FILTER:
  10251	case BPF_PROG_TYPE_SCHED_CLS:
  10252	case BPF_PROG_TYPE_SCHED_ACT:
  10253		return true;
  10254	default:
  10255		return false;
  10256	}
  10257}
  10258
  10259/* verify safety of LD_ABS|LD_IND instructions:
  10260 * - they can only appear in the programs where ctx == skb
  10261 * - since they are wrappers of function calls, they scratch R1-R5 registers,
  10262 *   preserve R6-R9, and store return value into R0
  10263 *
  10264 * Implicit input:
  10265 *   ctx == skb == R6 == CTX
  10266 *
  10267 * Explicit input:
  10268 *   SRC == any register
  10269 *   IMM == 32-bit immediate
  10270 *
  10271 * Output:
  10272 *   R0 - 8/16/32-bit skb data converted to cpu endianness
  10273 */
  10274static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
  10275{
  10276	struct bpf_reg_state *regs = cur_regs(env);
  10277	static const int ctx_reg = BPF_REG_6;
  10278	u8 mode = BPF_MODE(insn->code);
  10279	int i, err;
  10280
  10281	if (!may_access_skb(resolve_prog_type(env->prog))) {
  10282		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
  10283		return -EINVAL;
  10284	}
  10285
  10286	if (!env->ops->gen_ld_abs) {
  10287		verbose(env, "bpf verifier is misconfigured\n");
  10288		return -EINVAL;
  10289	}
  10290
  10291	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
  10292	    BPF_SIZE(insn->code) == BPF_DW ||
  10293	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
  10294		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
  10295		return -EINVAL;
  10296	}
  10297
  10298	/* check whether implicit source operand (register R6) is readable */
  10299	err = check_reg_arg(env, ctx_reg, SRC_OP);
  10300	if (err)
  10301		return err;
  10302
  10303	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
  10304	 * gen_ld_abs() may terminate the program at runtime, leading to
  10305	 * reference leak.
  10306	 */
  10307	err = check_reference_leak(env);
  10308	if (err) {
  10309		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
  10310		return err;
  10311	}
  10312
  10313	if (env->cur_state->active_spin_lock) {
  10314		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
  10315		return -EINVAL;
  10316	}
  10317
  10318	if (regs[ctx_reg].type != PTR_TO_CTX) {
  10319		verbose(env,
  10320			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
  10321		return -EINVAL;
  10322	}
  10323
  10324	if (mode == BPF_IND) {
  10325		/* check explicit source operand */
  10326		err = check_reg_arg(env, insn->src_reg, SRC_OP);
  10327		if (err)
  10328			return err;
  10329	}
  10330
  10331	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
  10332	if (err < 0)
  10333		return err;
  10334
  10335	/* reset caller saved regs to unreadable */
  10336	for (i = 0; i < CALLER_SAVED_REGS; i++) {
  10337		mark_reg_not_init(env, regs, caller_saved[i]);
  10338		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
  10339	}
  10340
  10341	/* mark destination R0 register as readable, since it contains
  10342	 * the value fetched from the packet.
  10343	 * Already marked as written above.
  10344	 */
  10345	mark_reg_unknown(env, regs, BPF_REG_0);
  10346	/* ld_abs load up to 32-bit skb data. */
  10347	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
  10348	return 0;
  10349}
  10350
  10351static int check_return_code(struct bpf_verifier_env *env)
  10352{
  10353	struct tnum enforce_attach_type_range = tnum_unknown;
  10354	const struct bpf_prog *prog = env->prog;
  10355	struct bpf_reg_state *reg;
  10356	struct tnum range = tnum_range(0, 1);
  10357	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
  10358	int err;
  10359	struct bpf_func_state *frame = env->cur_state->frame[0];
  10360	const bool is_subprog = frame->subprogno;
  10361
  10362	/* LSM and struct_ops func-ptr's return type could be "void" */
  10363	if (!is_subprog &&
  10364	    (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
  10365	     prog_type == BPF_PROG_TYPE_LSM) &&
  10366	    !prog->aux->attach_func_proto->type)
  10367		return 0;
  10368
  10369	/* eBPF calling convention is such that R0 is used
  10370	 * to return the value from eBPF program.
  10371	 * Make sure that it's readable at this time
  10372	 * of bpf_exit, which means that program wrote
  10373	 * something into it earlier
  10374	 */
  10375	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
  10376	if (err)
  10377		return err;
  10378
  10379	if (is_pointer_value(env, BPF_REG_0)) {
  10380		verbose(env, "R0 leaks addr as return value\n");
  10381		return -EACCES;
  10382	}
  10383
  10384	reg = cur_regs(env) + BPF_REG_0;
  10385
  10386	if (frame->in_async_callback_fn) {
  10387		/* enforce return zero from async callbacks like timer */
  10388		if (reg->type != SCALAR_VALUE) {
  10389			verbose(env, "In async callback the register R0 is not a known value (%s)\n",
  10390				reg_type_str(env, reg->type));
  10391			return -EINVAL;
  10392		}
  10393
  10394		if (!tnum_in(tnum_const(0), reg->var_off)) {
  10395			verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
  10396			return -EINVAL;
  10397		}
  10398		return 0;
  10399	}
  10400
  10401	if (is_subprog) {
  10402		if (reg->type != SCALAR_VALUE) {
  10403			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
  10404				reg_type_str(env, reg->type));
  10405			return -EINVAL;
  10406		}
  10407		return 0;
  10408	}
  10409
  10410	switch (prog_type) {
  10411	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
  10412		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
  10413		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
  10414		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
  10415		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
  10416		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
  10417		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
  10418			range = tnum_range(1, 1);
  10419		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
  10420		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
  10421			range = tnum_range(0, 3);
  10422		break;
  10423	case BPF_PROG_TYPE_CGROUP_SKB:
  10424		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
  10425			range = tnum_range(0, 3);
  10426			enforce_attach_type_range = tnum_range(2, 3);
  10427		}
  10428		break;
  10429	case BPF_PROG_TYPE_CGROUP_SOCK:
  10430	case BPF_PROG_TYPE_SOCK_OPS:
  10431	case BPF_PROG_TYPE_CGROUP_DEVICE:
  10432	case BPF_PROG_TYPE_CGROUP_SYSCTL:
  10433	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
  10434		break;
  10435	case BPF_PROG_TYPE_RAW_TRACEPOINT:
  10436		if (!env->prog->aux->attach_btf_id)
  10437			return 0;
  10438		range = tnum_const(0);
  10439		break;
  10440	case BPF_PROG_TYPE_TRACING:
  10441		switch (env->prog->expected_attach_type) {
  10442		case BPF_TRACE_FENTRY:
  10443		case BPF_TRACE_FEXIT:
  10444			range = tnum_const(0);
  10445			break;
  10446		case BPF_TRACE_RAW_TP:
  10447		case BPF_MODIFY_RETURN:
  10448			return 0;
  10449		case BPF_TRACE_ITER:
  10450			break;
  10451		default:
  10452			return -ENOTSUPP;
  10453		}
  10454		break;
  10455	case BPF_PROG_TYPE_SK_LOOKUP:
  10456		range = tnum_range(SK_DROP, SK_PASS);
  10457		break;
  10458	case BPF_PROG_TYPE_EXT:
  10459		/* freplace program can return anything as its return value
  10460		 * depends on the to-be-replaced kernel func or bpf program.
  10461		 */
  10462	default:
  10463		return 0;
  10464	}
  10465
  10466	if (reg->type != SCALAR_VALUE) {
  10467		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
  10468			reg_type_str(env, reg->type));
  10469		return -EINVAL;
  10470	}
  10471
  10472	if (!tnum_in(range, reg->var_off)) {
  10473		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
  10474		return -EINVAL;
  10475	}
  10476
  10477	if (!tnum_is_unknown(enforce_attach_type_range) &&
  10478	    tnum_in(enforce_attach_type_range, reg->var_off))
  10479		env->prog->enforce_expected_attach_type = 1;
  10480	return 0;
  10481}
  10482
  10483/* non-recursive DFS pseudo code
  10484 * 1  procedure DFS-iterative(G,v):
  10485 * 2      label v as discovered
  10486 * 3      let S be a stack
  10487 * 4      S.push(v)
  10488 * 5      while S is not empty
  10489 * 6            t <- S.pop()
  10490 * 7            if t is what we're looking for:
  10491 * 8                return t
  10492 * 9            for all edges e in G.adjacentEdges(t) do
  10493 * 10               if edge e is already labelled
  10494 * 11                   continue with the next edge
  10495 * 12               w <- G.adjacentVertex(t,e)
  10496 * 13               if vertex w is not discovered and not explored
  10497 * 14                   label e as tree-edge
  10498 * 15                   label w as discovered
  10499 * 16                   S.push(w)
  10500 * 17                   continue at 5
  10501 * 18               else if vertex w is discovered
  10502 * 19                   label e as back-edge
  10503 * 20               else
  10504 * 21                   // vertex w is explored
  10505 * 22                   label e as forward- or cross-edge
  10506 * 23           label t as explored
  10507 * 24           S.pop()
  10508 *
  10509 * convention:
  10510 * 0x10 - discovered
  10511 * 0x11 - discovered and fall-through edge labelled
  10512 * 0x12 - discovered and fall-through and branch edges labelled
  10513 * 0x20 - explored
  10514 */
  10515
  10516enum {
  10517	DISCOVERED = 0x10,
  10518	EXPLORED = 0x20,
  10519	FALLTHROUGH = 1,
  10520	BRANCH = 2,
  10521};
  10522
  10523static u32 state_htab_size(struct bpf_verifier_env *env)
  10524{
  10525	return env->prog->len;
  10526}
  10527
  10528static struct bpf_verifier_state_list **explored_state(
  10529					struct bpf_verifier_env *env,
  10530					int idx)
  10531{
  10532	struct bpf_verifier_state *cur = env->cur_state;
  10533	struct bpf_func_state *state = cur->frame[cur->curframe];
  10534
  10535	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
  10536}
  10537
  10538static void init_explored_state(struct bpf_verifier_env *env, int idx)
  10539{
  10540	env->insn_aux_data[idx].prune_point = true;
  10541}
  10542
  10543enum {
  10544	DONE_EXPLORING = 0,
  10545	KEEP_EXPLORING = 1,
  10546};
  10547
  10548/* t, w, e - match pseudo-code above:
  10549 * t - index of current instruction
  10550 * w - next instruction
  10551 * e - edge
  10552 */
  10553static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
  10554		     bool loop_ok)
  10555{
  10556	int *insn_stack = env->cfg.insn_stack;
  10557	int *insn_state = env->cfg.insn_state;
  10558
  10559	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
  10560		return DONE_EXPLORING;
  10561
  10562	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
  10563		return DONE_EXPLORING;
  10564
  10565	if (w < 0 || w >= env->prog->len) {
  10566		verbose_linfo(env, t, "%d: ", t);
  10567		verbose(env, "jump out of range from insn %d to %d\n", t, w);
  10568		return -EINVAL;
  10569	}
  10570
  10571	if (e == BRANCH)
  10572		/* mark branch target for state pruning */
  10573		init_explored_state(env, w);
  10574
  10575	if (insn_state[w] == 0) {
  10576		/* tree-edge */
  10577		insn_state[t] = DISCOVERED | e;
  10578		insn_state[w] = DISCOVERED;
  10579		if (env->cfg.cur_stack >= env->prog->len)
  10580			return -E2BIG;
  10581		insn_stack[env->cfg.cur_stack++] = w;
  10582		return KEEP_EXPLORING;
  10583	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
  10584		if (loop_ok && env->bpf_capable)
  10585			return DONE_EXPLORING;
  10586		verbose_linfo(env, t, "%d: ", t);
  10587		verbose_linfo(env, w, "%d: ", w);
  10588		verbose(env, "back-edge from insn %d to %d\n", t, w);
  10589		return -EINVAL;
  10590	} else if (insn_state[w] == EXPLORED) {
  10591		/* forward- or cross-edge */
  10592		insn_state[t] = DISCOVERED | e;
  10593	} else {
  10594		verbose(env, "insn state internal bug\n");
  10595		return -EFAULT;
  10596	}
  10597	return DONE_EXPLORING;
  10598}
  10599
  10600static int visit_func_call_insn(int t, int insn_cnt,
  10601				struct bpf_insn *insns,
  10602				struct bpf_verifier_env *env,
  10603				bool visit_callee)
  10604{
  10605	int ret;
  10606
  10607	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
  10608	if (ret)
  10609		return ret;
  10610
  10611	if (t + 1 < insn_cnt)
  10612		init_explored_state(env, t + 1);
  10613	if (visit_callee) {
  10614		init_explored_state(env, t);
  10615		ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
  10616				/* It's ok to allow recursion from CFG point of
  10617				 * view. __check_func_call() will do the actual
  10618				 * check.
  10619				 */
  10620				bpf_pseudo_func(insns + t));
  10621	}
  10622	return ret;
  10623}
  10624
  10625/* Visits the instruction at index t and returns one of the following:
  10626 *  < 0 - an error occurred
  10627 *  DONE_EXPLORING - the instruction was fully explored
  10628 *  KEEP_EXPLORING - there is still work to be done before it is fully explored
  10629 */
  10630static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
  10631{
  10632	struct bpf_insn *insns = env->prog->insnsi;
  10633	int ret;
  10634
  10635	if (bpf_pseudo_func(insns + t))
  10636		return visit_func_call_insn(t, insn_cnt, insns, env, true);
  10637
  10638	/* All non-branch instructions have a single fall-through edge. */
  10639	if (BPF_CLASS(insns[t].code) != BPF_JMP &&
  10640	    BPF_CLASS(insns[t].code) != BPF_JMP32)
  10641		return push_insn(t, t + 1, FALLTHROUGH, env, false);
  10642
  10643	switch (BPF_OP(insns[t].code)) {
  10644	case BPF_EXIT:
  10645		return DONE_EXPLORING;
  10646
  10647	case BPF_CALL:
  10648		if (insns[t].imm == BPF_FUNC_timer_set_callback)
  10649			/* Mark this call insn to trigger is_state_visited() check
  10650			 * before call itself is processed by __check_func_call().
  10651			 * Otherwise new async state will be pushed for further
  10652			 * exploration.
  10653			 */
  10654			init_explored_state(env, t);
  10655		return visit_func_call_insn(t, insn_cnt, insns, env,
  10656					    insns[t].src_reg == BPF_PSEUDO_CALL);
  10657
  10658	case BPF_JA:
  10659		if (BPF_SRC(insns[t].code) != BPF_K)
  10660			return -EINVAL;
  10661
  10662		/* unconditional jump with single edge */
  10663		ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
  10664				true);
  10665		if (ret)
  10666			return ret;
  10667
  10668		/* unconditional jmp is not a good pruning point,
  10669		 * but it's marked, since backtracking needs
  10670		 * to record jmp history in is_state_visited().
  10671		 */
  10672		init_explored_state(env, t + insns[t].off + 1);
  10673		/* tell verifier to check for equivalent states
  10674		 * after every call and jump
  10675		 */
  10676		if (t + 1 < insn_cnt)
  10677			init_explored_state(env, t + 1);
  10678
  10679		return ret;
  10680
  10681	default:
  10682		/* conditional jump with two edges */
  10683		init_explored_state(env, t);
  10684		ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
  10685		if (ret)
  10686			return ret;
  10687
  10688		return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
  10689	}
  10690}
  10691
  10692/* non-recursive depth-first-search to detect loops in BPF program
  10693 * loop == back-edge in directed graph
  10694 */
  10695static int check_cfg(struct bpf_verifier_env *env)
  10696{
  10697	int insn_cnt = env->prog->len;
  10698	int *insn_stack, *insn_state;
  10699	int ret = 0;
  10700	int i;
  10701
  10702	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
  10703	if (!insn_state)
  10704		return -ENOMEM;
  10705
  10706	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
  10707	if (!insn_stack) {
  10708		kvfree(insn_state);
  10709		return -ENOMEM;
  10710	}
  10711
  10712	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
  10713	insn_stack[0] = 0; /* 0 is the first instruction */
  10714	env->cfg.cur_stack = 1;
  10715
  10716	while (env->cfg.cur_stack > 0) {
  10717		int t = insn_stack[env->cfg.cur_stack - 1];
  10718
  10719		ret = visit_insn(t, insn_cnt, env);
  10720		switch (ret) {
  10721		case DONE_EXPLORING:
  10722			insn_state[t] = EXPLORED;
  10723			env->cfg.cur_stack--;
  10724			break;
  10725		case KEEP_EXPLORING:
  10726			break;
  10727		default:
  10728			if (ret > 0) {
  10729				verbose(env, "visit_insn internal bug\n");
  10730				ret = -EFAULT;
  10731			}
  10732			goto err_free;
  10733		}
  10734	}
  10735
  10736	if (env->cfg.cur_stack < 0) {
  10737		verbose(env, "pop stack internal bug\n");
  10738		ret = -EFAULT;
  10739		goto err_free;
  10740	}
  10741
  10742	for (i = 0; i < insn_cnt; i++) {
  10743		if (insn_state[i] != EXPLORED) {
  10744			verbose(env, "unreachable insn %d\n", i);
  10745			ret = -EINVAL;
  10746			goto err_free;
  10747		}
  10748	}
  10749	ret = 0; /* cfg looks good */
  10750
  10751err_free:
  10752	kvfree(insn_state);
  10753	kvfree(insn_stack);
  10754	env->cfg.insn_state = env->cfg.insn_stack = NULL;
  10755	return ret;
  10756}
  10757
  10758static int check_abnormal_return(struct bpf_verifier_env *env)
  10759{
  10760	int i;
  10761
  10762	for (i = 1; i < env->subprog_cnt; i++) {
  10763		if (env->subprog_info[i].has_ld_abs) {
  10764			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
  10765			return -EINVAL;
  10766		}
  10767		if (env->subprog_info[i].has_tail_call) {
  10768			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
  10769			return -EINVAL;
  10770		}
  10771	}
  10772	return 0;
  10773}
  10774
  10775/* The minimum supported BTF func info size */
  10776#define MIN_BPF_FUNCINFO_SIZE	8
  10777#define MAX_FUNCINFO_REC_SIZE	252
  10778
  10779static int check_btf_func(struct bpf_verifier_env *env,
  10780			  const union bpf_attr *attr,
  10781			  bpfptr_t uattr)
  10782{
  10783	const struct btf_type *type, *func_proto, *ret_type;
  10784	u32 i, nfuncs, urec_size, min_size;
  10785	u32 krec_size = sizeof(struct bpf_func_info);
  10786	struct bpf_func_info *krecord;
  10787	struct bpf_func_info_aux *info_aux = NULL;
  10788	struct bpf_prog *prog;
  10789	const struct btf *btf;
  10790	bpfptr_t urecord;
  10791	u32 prev_offset = 0;
  10792	bool scalar_return;
  10793	int ret = -ENOMEM;
  10794
  10795	nfuncs = attr->func_info_cnt;
  10796	if (!nfuncs) {
  10797		if (check_abnormal_return(env))
  10798			return -EINVAL;
  10799		return 0;
  10800	}
  10801
  10802	if (nfuncs != env->subprog_cnt) {
  10803		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
  10804		return -EINVAL;
  10805	}
  10806
  10807	urec_size = attr->func_info_rec_size;
  10808	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
  10809	    urec_size > MAX_FUNCINFO_REC_SIZE ||
  10810	    urec_size % sizeof(u32)) {
  10811		verbose(env, "invalid func info rec size %u\n", urec_size);
  10812		return -EINVAL;
  10813	}
  10814
  10815	prog = env->prog;
  10816	btf = prog->aux->btf;
  10817
  10818	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
  10819	min_size = min_t(u32, krec_size, urec_size);
  10820
  10821	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
  10822	if (!krecord)
  10823		return -ENOMEM;
  10824	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
  10825	if (!info_aux)
  10826		goto err_free;
  10827
  10828	for (i = 0; i < nfuncs; i++) {
  10829		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
  10830		if (ret) {
  10831			if (ret == -E2BIG) {
  10832				verbose(env, "nonzero tailing record in func info");
  10833				/* set the size kernel expects so loader can zero
  10834				 * out the rest of the record.
  10835				 */
  10836				if (copy_to_bpfptr_offset(uattr,
  10837							  offsetof(union bpf_attr, func_info_rec_size),
  10838							  &min_size, sizeof(min_size)))
  10839					ret = -EFAULT;
  10840			}
  10841			goto err_free;
  10842		}
  10843
  10844		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
  10845			ret = -EFAULT;
  10846			goto err_free;
  10847		}
  10848
  10849		/* check insn_off */
  10850		ret = -EINVAL;
  10851		if (i == 0) {
  10852			if (krecord[i].insn_off) {
  10853				verbose(env,
  10854					"nonzero insn_off %u for the first func info record",
  10855					krecord[i].insn_off);
  10856				goto err_free;
  10857			}
  10858		} else if (krecord[i].insn_off <= prev_offset) {
  10859			verbose(env,
  10860				"same or smaller insn offset (%u) than previous func info record (%u)",
  10861				krecord[i].insn_off, prev_offset);
  10862			goto err_free;
  10863		}
  10864
  10865		if (env->subprog_info[i].start != krecord[i].insn_off) {
  10866			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
  10867			goto err_free;
  10868		}
  10869
  10870		/* check type_id */
  10871		type = btf_type_by_id(btf, krecord[i].type_id);
  10872		if (!type || !btf_type_is_func(type)) {
  10873			verbose(env, "invalid type id %d in func info",
  10874				krecord[i].type_id);
  10875			goto err_free;
  10876		}
  10877		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
  10878
  10879		func_proto = btf_type_by_id(btf, type->type);
  10880		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
  10881			/* btf_func_check() already verified it during BTF load */
  10882			goto err_free;
  10883		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
  10884		scalar_return =
  10885			btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
  10886		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
  10887			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
  10888			goto err_free;
  10889		}
  10890		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
  10891			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
  10892			goto err_free;
  10893		}
  10894
  10895		prev_offset = krecord[i].insn_off;
  10896		bpfptr_add(&urecord, urec_size);
  10897	}
  10898
  10899	prog->aux->func_info = krecord;
  10900	prog->aux->func_info_cnt = nfuncs;
  10901	prog->aux->func_info_aux = info_aux;
  10902	return 0;
  10903
  10904err_free:
  10905	kvfree(krecord);
  10906	kfree(info_aux);
  10907	return ret;
  10908}
  10909
  10910static void adjust_btf_func(struct bpf_verifier_env *env)
  10911{
  10912	struct bpf_prog_aux *aux = env->prog->aux;
  10913	int i;
  10914
  10915	if (!aux->func_info)
  10916		return;
  10917
  10918	for (i = 0; i < env->subprog_cnt; i++)
  10919		aux->func_info[i].insn_off = env->subprog_info[i].start;
  10920}
  10921
  10922#define MIN_BPF_LINEINFO_SIZE	offsetofend(struct bpf_line_info, line_col)
  10923#define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
  10924
  10925static int check_btf_line(struct bpf_verifier_env *env,
  10926			  const union bpf_attr *attr,
  10927			  bpfptr_t uattr)
  10928{
  10929	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
  10930	struct bpf_subprog_info *sub;
  10931	struct bpf_line_info *linfo;
  10932	struct bpf_prog *prog;
  10933	const struct btf *btf;
  10934	bpfptr_t ulinfo;
  10935	int err;
  10936
  10937	nr_linfo = attr->line_info_cnt;
  10938	if (!nr_linfo)
  10939		return 0;
  10940	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
  10941		return -EINVAL;
  10942
  10943	rec_size = attr->line_info_rec_size;
  10944	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
  10945	    rec_size > MAX_LINEINFO_REC_SIZE ||
  10946	    rec_size & (sizeof(u32) - 1))
  10947		return -EINVAL;
  10948
  10949	/* Need to zero it in case the userspace may
  10950	 * pass in a smaller bpf_line_info object.
  10951	 */
  10952	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
  10953			 GFP_KERNEL | __GFP_NOWARN);
  10954	if (!linfo)
  10955		return -ENOMEM;
  10956
  10957	prog = env->prog;
  10958	btf = prog->aux->btf;
  10959
  10960	s = 0;
  10961	sub = env->subprog_info;
  10962	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
  10963	expected_size = sizeof(struct bpf_line_info);
  10964	ncopy = min_t(u32, expected_size, rec_size);
  10965	for (i = 0; i < nr_linfo; i++) {
  10966		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
  10967		if (err) {
  10968			if (err == -E2BIG) {
  10969				verbose(env, "nonzero tailing record in line_info");
  10970				if (copy_to_bpfptr_offset(uattr,
  10971							  offsetof(union bpf_attr, line_info_rec_size),
  10972							  &expected_size, sizeof(expected_size)))
  10973					err = -EFAULT;
  10974			}
  10975			goto err_free;
  10976		}
  10977
  10978		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
  10979			err = -EFAULT;
  10980			goto err_free;
  10981		}
  10982
  10983		/*
  10984		 * Check insn_off to ensure
  10985		 * 1) strictly increasing AND
  10986		 * 2) bounded by prog->len
  10987		 *
  10988		 * The linfo[0].insn_off == 0 check logically falls into
  10989		 * the later "missing bpf_line_info for func..." case
  10990		 * because the first linfo[0].insn_off must be the
  10991		 * first sub also and the first sub must have
  10992		 * subprog_info[0].start == 0.
  10993		 */
  10994		if ((i && linfo[i].insn_off <= prev_offset) ||
  10995		    linfo[i].insn_off >= prog->len) {
  10996			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
  10997				i, linfo[i].insn_off, prev_offset,
  10998				prog->len);
  10999			err = -EINVAL;
  11000			goto err_free;
  11001		}
  11002
  11003		if (!prog->insnsi[linfo[i].insn_off].code) {
  11004			verbose(env,
  11005				"Invalid insn code at line_info[%u].insn_off\n",
  11006				i);
  11007			err = -EINVAL;
  11008			goto err_free;
  11009		}
  11010
  11011		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
  11012		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
  11013			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
  11014			err = -EINVAL;
  11015			goto err_free;
  11016		}
  11017
  11018		if (s != env->subprog_cnt) {
  11019			if (linfo[i].insn_off == sub[s].start) {
  11020				sub[s].linfo_idx = i;
  11021				s++;
  11022			} else if (sub[s].start < linfo[i].insn_off) {
  11023				verbose(env, "missing bpf_line_info for func#%u\n", s);
  11024				err = -EINVAL;
  11025				goto err_free;
  11026			}
  11027		}
  11028
  11029		prev_offset = linfo[i].insn_off;
  11030		bpfptr_add(&ulinfo, rec_size);
  11031	}
  11032
  11033	if (s != env->subprog_cnt) {
  11034		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
  11035			env->subprog_cnt - s, s);
  11036		err = -EINVAL;
  11037		goto err_free;
  11038	}
  11039
  11040	prog->aux->linfo = linfo;
  11041	prog->aux->nr_linfo = nr_linfo;
  11042
  11043	return 0;
  11044
  11045err_free:
  11046	kvfree(linfo);
  11047	return err;
  11048}
  11049
  11050#define MIN_CORE_RELO_SIZE	sizeof(struct bpf_core_relo)
  11051#define MAX_CORE_RELO_SIZE	MAX_FUNCINFO_REC_SIZE
  11052
  11053static int check_core_relo(struct bpf_verifier_env *env,
  11054			   const union bpf_attr *attr,
  11055			   bpfptr_t uattr)
  11056{
  11057	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
  11058	struct bpf_core_relo core_relo = {};
  11059	struct bpf_prog *prog = env->prog;
  11060	const struct btf *btf = prog->aux->btf;
  11061	struct bpf_core_ctx ctx = {
  11062		.log = &env->log,
  11063		.btf = btf,
  11064	};
  11065	bpfptr_t u_core_relo;
  11066	int err;
  11067
  11068	nr_core_relo = attr->core_relo_cnt;
  11069	if (!nr_core_relo)
  11070		return 0;
  11071	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
  11072		return -EINVAL;
  11073
  11074	rec_size = attr->core_relo_rec_size;
  11075	if (rec_size < MIN_CORE_RELO_SIZE ||
  11076	    rec_size > MAX_CORE_RELO_SIZE ||
  11077	    rec_size % sizeof(u32))
  11078		return -EINVAL;
  11079
  11080	u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
  11081	expected_size = sizeof(struct bpf_core_relo);
  11082	ncopy = min_t(u32, expected_size, rec_size);
  11083
  11084	/* Unlike func_info and line_info, copy and apply each CO-RE
  11085	 * relocation record one at a time.
  11086	 */
  11087	for (i = 0; i < nr_core_relo; i++) {
  11088		/* future proofing when sizeof(bpf_core_relo) changes */
  11089		err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
  11090		if (err) {
  11091			if (err == -E2BIG) {
  11092				verbose(env, "nonzero tailing record in core_relo");
  11093				if (copy_to_bpfptr_offset(uattr,
  11094							  offsetof(union bpf_attr, core_relo_rec_size),
  11095							  &expected_size, sizeof(expected_size)))
  11096					err = -EFAULT;
  11097			}
  11098			break;
  11099		}
  11100
  11101		if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
  11102			err = -EFAULT;
  11103			break;
  11104		}
  11105
  11106		if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
  11107			verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
  11108				i, core_relo.insn_off, prog->len);
  11109			err = -EINVAL;
  11110			break;
  11111		}
  11112
  11113		err = bpf_core_apply(&ctx, &core_relo, i,
  11114				     &prog->insnsi[core_relo.insn_off / 8]);
  11115		if (err)
  11116			break;
  11117		bpfptr_add(&u_core_relo, rec_size);
  11118	}
  11119	return err;
  11120}
  11121
  11122static int check_btf_info(struct bpf_verifier_env *env,
  11123			  const union bpf_attr *attr,
  11124			  bpfptr_t uattr)
  11125{
  11126	struct btf *btf;
  11127	int err;
  11128
  11129	if (!attr->func_info_cnt && !attr->line_info_cnt) {
  11130		if (check_abnormal_return(env))
  11131			return -EINVAL;
  11132		return 0;
  11133	}
  11134
  11135	btf = btf_get_by_fd(attr->prog_btf_fd);
  11136	if (IS_ERR(btf))
  11137		return PTR_ERR(btf);
  11138	if (btf_is_kernel(btf)) {
  11139		btf_put(btf);
  11140		return -EACCES;
  11141	}
  11142	env->prog->aux->btf = btf;
  11143
  11144	err = check_btf_func(env, attr, uattr);
  11145	if (err)
  11146		return err;
  11147
  11148	err = check_btf_line(env, attr, uattr);
  11149	if (err)
  11150		return err;
  11151
  11152	err = check_core_relo(env, attr, uattr);
  11153	if (err)
  11154		return err;
  11155
  11156	return 0;
  11157}
  11158
  11159/* check %cur's range satisfies %old's */
  11160static bool range_within(struct bpf_reg_state *old,
  11161			 struct bpf_reg_state *cur)
  11162{
  11163	return old->umin_value <= cur->umin_value &&
  11164	       old->umax_value >= cur->umax_value &&
  11165	       old->smin_value <= cur->smin_value &&
  11166	       old->smax_value >= cur->smax_value &&
  11167	       old->u32_min_value <= cur->u32_min_value &&
  11168	       old->u32_max_value >= cur->u32_max_value &&
  11169	       old->s32_min_value <= cur->s32_min_value &&
  11170	       old->s32_max_value >= cur->s32_max_value;
  11171}
  11172
  11173/* If in the old state two registers had the same id, then they need to have
  11174 * the same id in the new state as well.  But that id could be different from
  11175 * the old state, so we need to track the mapping from old to new ids.
  11176 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
  11177 * regs with old id 5 must also have new id 9 for the new state to be safe.  But
  11178 * regs with a different old id could still have new id 9, we don't care about
  11179 * that.
  11180 * So we look through our idmap to see if this old id has been seen before.  If
  11181 * so, we require the new id to match; otherwise, we add the id pair to the map.
  11182 */
  11183static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
  11184{
  11185	unsigned int i;
  11186
  11187	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
  11188		if (!idmap[i].old) {
  11189			/* Reached an empty slot; haven't seen this id before */
  11190			idmap[i].old = old_id;
  11191			idmap[i].cur = cur_id;
  11192			return true;
  11193		}
  11194		if (idmap[i].old == old_id)
  11195			return idmap[i].cur == cur_id;
  11196	}
  11197	/* We ran out of idmap slots, which should be impossible */
  11198	WARN_ON_ONCE(1);
  11199	return false;
  11200}
  11201
  11202static void clean_func_state(struct bpf_verifier_env *env,
  11203			     struct bpf_func_state *st)
  11204{
  11205	enum bpf_reg_liveness live;
  11206	int i, j;
  11207
  11208	for (i = 0; i < BPF_REG_FP; i++) {
  11209		live = st->regs[i].live;
  11210		/* liveness must not touch this register anymore */
  11211		st->regs[i].live |= REG_LIVE_DONE;
  11212		if (!(live & REG_LIVE_READ))
  11213			/* since the register is unused, clear its state
  11214			 * to make further comparison simpler
  11215			 */
  11216			__mark_reg_not_init(env, &st->regs[i]);
  11217	}
  11218
  11219	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
  11220		live = st->stack[i].spilled_ptr.live;
  11221		/* liveness must not touch this stack slot anymore */
  11222		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
  11223		if (!(live & REG_LIVE_READ)) {
  11224			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
  11225			for (j = 0; j < BPF_REG_SIZE; j++)
  11226				st->stack[i].slot_type[j] = STACK_INVALID;
  11227		}
  11228	}
  11229}
  11230
  11231static void clean_verifier_state(struct bpf_verifier_env *env,
  11232				 struct bpf_verifier_state *st)
  11233{
  11234	int i;
  11235
  11236	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
  11237		/* all regs in this state in all frames were already marked */
  11238		return;
  11239
  11240	for (i = 0; i <= st->curframe; i++)
  11241		clean_func_state(env, st->frame[i]);
  11242}
  11243
  11244/* the parentage chains form a tree.
  11245 * the verifier states are added to state lists at given insn and
  11246 * pushed into state stack for future exploration.
  11247 * when the verifier reaches bpf_exit insn some of the verifer states
  11248 * stored in the state lists have their final liveness state already,
  11249 * but a lot of states will get revised from liveness point of view when
  11250 * the verifier explores other branches.
  11251 * Example:
  11252 * 1: r0 = 1
  11253 * 2: if r1 == 100 goto pc+1
  11254 * 3: r0 = 2
  11255 * 4: exit
  11256 * when the verifier reaches exit insn the register r0 in the state list of
  11257 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
  11258 * of insn 2 and goes exploring further. At the insn 4 it will walk the
  11259 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
  11260 *
  11261 * Since the verifier pushes the branch states as it sees them while exploring
  11262 * the program the condition of walking the branch instruction for the second
  11263 * time means that all states below this branch were already explored and
  11264 * their final liveness marks are already propagated.
  11265 * Hence when the verifier completes the search of state list in is_state_visited()
  11266 * we can call this clean_live_states() function to mark all liveness states
  11267 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
  11268 * will not be used.
  11269 * This function also clears the registers and stack for states that !READ
  11270 * to simplify state merging.
  11271 *
  11272 * Important note here that walking the same branch instruction in the callee
  11273 * doesn't meant that the states are DONE. The verifier has to compare
  11274 * the callsites
  11275 */
  11276static void clean_live_states(struct bpf_verifier_env *env, int insn,
  11277			      struct bpf_verifier_state *cur)
  11278{
  11279	struct bpf_verifier_state_list *sl;
  11280	int i;
  11281
  11282	sl = *explored_state(env, insn);
  11283	while (sl) {
  11284		if (sl->state.branches)
  11285			goto next;
  11286		if (sl->state.insn_idx != insn ||
  11287		    sl->state.curframe != cur->curframe)
  11288			goto next;
  11289		for (i = 0; i <= cur->curframe; i++)
  11290			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
  11291				goto next;
  11292		clean_verifier_state(env, &sl->state);
  11293next:
  11294		sl = sl->next;
  11295	}
  11296}
  11297
  11298/* Returns true if (rold safe implies rcur safe) */
  11299static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
  11300		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
  11301{
  11302	bool equal;
  11303
  11304	if (!(rold->live & REG_LIVE_READ))
  11305		/* explored state didn't use this */
  11306		return true;
  11307
  11308	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
  11309
  11310	if (rold->type == PTR_TO_STACK)
  11311		/* two stack pointers are equal only if they're pointing to
  11312		 * the same stack frame, since fp-8 in foo != fp-8 in bar
  11313		 */
  11314		return equal && rold->frameno == rcur->frameno;
  11315
  11316	if (equal)
  11317		return true;
  11318
  11319	if (rold->type == NOT_INIT)
  11320		/* explored state can't have used this */
  11321		return true;
  11322	if (rcur->type == NOT_INIT)
  11323		return false;
  11324	switch (base_type(rold->type)) {
  11325	case SCALAR_VALUE:
  11326		if (env->explore_alu_limits)
  11327			return false;
  11328		if (rcur->type == SCALAR_VALUE) {
  11329			if (!rold->precise && !rcur->precise)
  11330				return true;
  11331			/* new val must satisfy old val knowledge */
  11332			return range_within(rold, rcur) &&
  11333			       tnum_in(rold->var_off, rcur->var_off);
  11334		} else {
  11335			/* We're trying to use a pointer in place of a scalar.
  11336			 * Even if the scalar was unbounded, this could lead to
  11337			 * pointer leaks because scalars are allowed to leak
  11338			 * while pointers are not. We could make this safe in
  11339			 * special cases if root is calling us, but it's
  11340			 * probably not worth the hassle.
  11341			 */
  11342			return false;
  11343		}
  11344	case PTR_TO_MAP_KEY:
  11345	case PTR_TO_MAP_VALUE:
  11346		/* a PTR_TO_MAP_VALUE could be safe to use as a
  11347		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
  11348		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
  11349		 * checked, doing so could have affected others with the same
  11350		 * id, and we can't check for that because we lost the id when
  11351		 * we converted to a PTR_TO_MAP_VALUE.
  11352		 */
  11353		if (type_may_be_null(rold->type)) {
  11354			if (!type_may_be_null(rcur->type))
  11355				return false;
  11356			if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
  11357				return false;
  11358			/* Check our ids match any regs they're supposed to */
  11359			return check_ids(rold->id, rcur->id, idmap);
  11360		}
  11361
  11362		/* If the new min/max/var_off satisfy the old ones and
  11363		 * everything else matches, we are OK.
  11364		 * 'id' is not compared, since it's only used for maps with
  11365		 * bpf_spin_lock inside map element and in such cases if
  11366		 * the rest of the prog is valid for one map element then
  11367		 * it's valid for all map elements regardless of the key
  11368		 * used in bpf_map_lookup()
  11369		 */
  11370		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
  11371		       range_within(rold, rcur) &&
  11372		       tnum_in(rold->var_off, rcur->var_off);
  11373	case PTR_TO_PACKET_META:
  11374	case PTR_TO_PACKET:
  11375		if (rcur->type != rold->type)
  11376			return false;
  11377		/* We must have at least as much range as the old ptr
  11378		 * did, so that any accesses which were safe before are
  11379		 * still safe.  This is true even if old range < old off,
  11380		 * since someone could have accessed through (ptr - k), or
  11381		 * even done ptr -= k in a register, to get a safe access.
  11382		 */
  11383		if (rold->range > rcur->range)
  11384			return false;
  11385		/* If the offsets don't match, we can't trust our alignment;
  11386		 * nor can we be sure that we won't fall out of range.
  11387		 */
  11388		if (rold->off != rcur->off)
  11389			return false;
  11390		/* id relations must be preserved */
  11391		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
  11392			return false;
  11393		/* new val must satisfy old val knowledge */
  11394		return range_within(rold, rcur) &&
  11395		       tnum_in(rold->var_off, rcur->var_off);
  11396	case PTR_TO_CTX:
  11397	case CONST_PTR_TO_MAP:
  11398	case PTR_TO_PACKET_END:
  11399	case PTR_TO_FLOW_KEYS:
  11400	case PTR_TO_SOCKET:
  11401	case PTR_TO_SOCK_COMMON:
  11402	case PTR_TO_TCP_SOCK:
  11403	case PTR_TO_XDP_SOCK:
  11404		/* Only valid matches are exact, which memcmp() above
  11405		 * would have accepted
  11406		 */
  11407	default:
  11408		/* Don't know what's going on, just say it's not safe */
  11409		return false;
  11410	}
  11411
  11412	/* Shouldn't get here; if we do, say it's not safe */
  11413	WARN_ON_ONCE(1);
  11414	return false;
  11415}
  11416
  11417static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
  11418		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
  11419{
  11420	int i, spi;
  11421
  11422	/* walk slots of the explored stack and ignore any additional
  11423	 * slots in the current stack, since explored(safe) state
  11424	 * didn't use them
  11425	 */
  11426	for (i = 0; i < old->allocated_stack; i++) {
  11427		spi = i / BPF_REG_SIZE;
  11428
  11429		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
  11430			i += BPF_REG_SIZE - 1;
  11431			/* explored state didn't use this */
  11432			continue;
  11433		}
  11434
  11435		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
  11436			continue;
  11437
  11438		/* explored stack has more populated slots than current stack
  11439		 * and these slots were used
  11440		 */
  11441		if (i >= cur->allocated_stack)
  11442			return false;
  11443
  11444		/* if old state was safe with misc data in the stack
  11445		 * it will be safe with zero-initialized stack.
  11446		 * The opposite is not true
  11447		 */
  11448		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
  11449		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
  11450			continue;
  11451		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
  11452		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
  11453			/* Ex: old explored (safe) state has STACK_SPILL in
  11454			 * this stack slot, but current has STACK_MISC ->
  11455			 * this verifier states are not equivalent,
  11456			 * return false to continue verification of this path
  11457			 */
  11458			return false;
  11459		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
  11460			continue;
  11461		if (!is_spilled_reg(&old->stack[spi]))
  11462			continue;
  11463		if (!regsafe(env, &old->stack[spi].spilled_ptr,
  11464			     &cur->stack[spi].spilled_ptr, idmap))
  11465			/* when explored and current stack slot are both storing
  11466			 * spilled registers, check that stored pointers types
  11467			 * are the same as well.
  11468			 * Ex: explored safe path could have stored
  11469			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
  11470			 * but current path has stored:
  11471			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
  11472			 * such verifier states are not equivalent.
  11473			 * return false to continue verification of this path
  11474			 */
  11475			return false;
  11476	}
  11477	return true;
  11478}
  11479
  11480static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
  11481{
  11482	if (old->acquired_refs != cur->acquired_refs)
  11483		return false;
  11484	return !memcmp(old->refs, cur->refs,
  11485		       sizeof(*old->refs) * old->acquired_refs);
  11486}
  11487
  11488/* compare two verifier states
  11489 *
  11490 * all states stored in state_list are known to be valid, since
  11491 * verifier reached 'bpf_exit' instruction through them
  11492 *
  11493 * this function is called when verifier exploring different branches of
  11494 * execution popped from the state stack. If it sees an old state that has
  11495 * more strict register state and more strict stack state then this execution
  11496 * branch doesn't need to be explored further, since verifier already
  11497 * concluded that more strict state leads to valid finish.
  11498 *
  11499 * Therefore two states are equivalent if register state is more conservative
  11500 * and explored stack state is more conservative than the current one.
  11501 * Example:
  11502 *       explored                   current
  11503 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
  11504 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
  11505 *
  11506 * In other words if current stack state (one being explored) has more
  11507 * valid slots than old one that already passed validation, it means
  11508 * the verifier can stop exploring and conclude that current state is valid too
  11509 *
  11510 * Similarly with registers. If explored state has register type as invalid
  11511 * whereas register type in current state is meaningful, it means that
  11512 * the current state will reach 'bpf_exit' instruction safely
  11513 */
  11514static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
  11515			      struct bpf_func_state *cur)
  11516{
  11517	int i;
  11518
  11519	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
  11520	for (i = 0; i < MAX_BPF_REG; i++)
  11521		if (!regsafe(env, &old->regs[i], &cur->regs[i],
  11522			     env->idmap_scratch))
  11523			return false;
  11524
  11525	if (!stacksafe(env, old, cur, env->idmap_scratch))
  11526		return false;
  11527
  11528	if (!refsafe(old, cur))
  11529		return false;
  11530
  11531	return true;
  11532}
  11533
  11534static bool states_equal(struct bpf_verifier_env *env,
  11535			 struct bpf_verifier_state *old,
  11536			 struct bpf_verifier_state *cur)
  11537{
  11538	int i;
  11539
  11540	if (old->curframe != cur->curframe)
  11541		return false;
  11542
  11543	/* Verification state from speculative execution simulation
  11544	 * must never prune a non-speculative execution one.
  11545	 */
  11546	if (old->speculative && !cur->speculative)
  11547		return false;
  11548
  11549	if (old->active_spin_lock != cur->active_spin_lock)
  11550		return false;
  11551
  11552	/* for states to be equal callsites have to be the same
  11553	 * and all frame states need to be equivalent
  11554	 */
  11555	for (i = 0; i <= old->curframe; i++) {
  11556		if (old->frame[i]->callsite != cur->frame[i]->callsite)
  11557			return false;
  11558		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
  11559			return false;
  11560	}
  11561	return true;
  11562}
  11563
  11564/* Return 0 if no propagation happened. Return negative error code if error
  11565 * happened. Otherwise, return the propagated bit.
  11566 */
  11567static int propagate_liveness_reg(struct bpf_verifier_env *env,
  11568				  struct bpf_reg_state *reg,
  11569				  struct bpf_reg_state *parent_reg)
  11570{
  11571	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
  11572	u8 flag = reg->live & REG_LIVE_READ;
  11573	int err;
  11574
  11575	/* When comes here, read flags of PARENT_REG or REG could be any of
  11576	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
  11577	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
  11578	 */
  11579	if (parent_flag == REG_LIVE_READ64 ||
  11580	    /* Or if there is no read flag from REG. */
  11581	    !flag ||
  11582	    /* Or if the read flag from REG is the same as PARENT_REG. */
  11583	    parent_flag == flag)
  11584		return 0;
  11585
  11586	err = mark_reg_read(env, reg, parent_reg, flag);
  11587	if (err)
  11588		return err;
  11589
  11590	return flag;
  11591}
  11592
  11593/* A write screens off any subsequent reads; but write marks come from the
  11594 * straight-line code between a state and its parent.  When we arrive at an
  11595 * equivalent state (jump target or such) we didn't arrive by the straight-line
  11596 * code, so read marks in the state must propagate to the parent regardless
  11597 * of the state's write marks. That's what 'parent == state->parent' comparison
  11598 * in mark_reg_read() is for.
  11599 */
  11600static int propagate_liveness(struct bpf_verifier_env *env,
  11601			      const struct bpf_verifier_state *vstate,
  11602			      struct bpf_verifier_state *vparent)
  11603{
  11604	struct bpf_reg_state *state_reg, *parent_reg;
  11605	struct bpf_func_state *state, *parent;
  11606	int i, frame, err = 0;
  11607
  11608	if (vparent->curframe != vstate->curframe) {
  11609		WARN(1, "propagate_live: parent frame %d current frame %d\n",
  11610		     vparent->curframe, vstate->curframe);
  11611		return -EFAULT;
  11612	}
  11613	/* Propagate read liveness of registers... */
  11614	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
  11615	for (frame = 0; frame <= vstate->curframe; frame++) {
  11616		parent = vparent->frame[frame];
  11617		state = vstate->frame[frame];
  11618		parent_reg = parent->regs;
  11619		state_reg = state->regs;
  11620		/* We don't need to worry about FP liveness, it's read-only */
  11621		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
  11622			err = propagate_liveness_reg(env, &state_reg[i],
  11623						     &parent_reg[i]);
  11624			if (err < 0)
  11625				return err;
  11626			if (err == REG_LIVE_READ64)
  11627				mark_insn_zext(env, &parent_reg[i]);
  11628		}
  11629
  11630		/* Propagate stack slots. */
  11631		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
  11632			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
  11633			parent_reg = &parent->stack[i].spilled_ptr;
  11634			state_reg = &state->stack[i].spilled_ptr;
  11635			err = propagate_liveness_reg(env, state_reg,
  11636						     parent_reg);
  11637			if (err < 0)
  11638				return err;
  11639		}
  11640	}
  11641	return 0;
  11642}
  11643
  11644/* find precise scalars in the previous equivalent state and
  11645 * propagate them into the current state
  11646 */
  11647static int propagate_precision(struct bpf_verifier_env *env,
  11648			       const struct bpf_verifier_state *old)
  11649{
  11650	struct bpf_reg_state *state_reg;
  11651	struct bpf_func_state *state;
  11652	int i, err = 0;
  11653
  11654	state = old->frame[old->curframe];
  11655	state_reg = state->regs;
  11656	for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
  11657		if (state_reg->type != SCALAR_VALUE ||
  11658		    !state_reg->precise)
  11659			continue;
  11660		if (env->log.level & BPF_LOG_LEVEL2)
  11661			verbose(env, "propagating r%d\n", i);
  11662		err = mark_chain_precision(env, i);
  11663		if (err < 0)
  11664			return err;
  11665	}
  11666
  11667	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
  11668		if (!is_spilled_reg(&state->stack[i]))
  11669			continue;
  11670		state_reg = &state->stack[i].spilled_ptr;
  11671		if (state_reg->type != SCALAR_VALUE ||
  11672		    !state_reg->precise)
  11673			continue;
  11674		if (env->log.level & BPF_LOG_LEVEL2)
  11675			verbose(env, "propagating fp%d\n",
  11676				(-i - 1) * BPF_REG_SIZE);
  11677		err = mark_chain_precision_stack(env, i);
  11678		if (err < 0)
  11679			return err;
  11680	}
  11681	return 0;
  11682}
  11683
  11684static bool states_maybe_looping(struct bpf_verifier_state *old,
  11685				 struct bpf_verifier_state *cur)
  11686{
  11687	struct bpf_func_state *fold, *fcur;
  11688	int i, fr = cur->curframe;
  11689
  11690	if (old->curframe != fr)
  11691		return false;
  11692
  11693	fold = old->frame[fr];
  11694	fcur = cur->frame[fr];
  11695	for (i = 0; i < MAX_BPF_REG; i++)
  11696		if (memcmp(&fold->regs[i], &fcur->regs[i],
  11697			   offsetof(struct bpf_reg_state, parent)))
  11698			return false;
  11699	return true;
  11700}
  11701
  11702
  11703static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
  11704{
  11705	struct bpf_verifier_state_list *new_sl;
  11706	struct bpf_verifier_state_list *sl, **pprev;
  11707	struct bpf_verifier_state *cur = env->cur_state, *new;
  11708	int i, j, err, states_cnt = 0;
  11709	bool add_new_state = env->test_state_freq ? true : false;
  11710
  11711	cur->last_insn_idx = env->prev_insn_idx;
  11712	if (!env->insn_aux_data[insn_idx].prune_point)
  11713		/* this 'insn_idx' instruction wasn't marked, so we will not
  11714		 * be doing state search here
  11715		 */
  11716		return 0;
  11717
  11718	/* bpf progs typically have pruning point every 4 instructions
  11719	 * http://vger.kernel.org/bpfconf2019.html#session-1
  11720	 * Do not add new state for future pruning if the verifier hasn't seen
  11721	 * at least 2 jumps and at least 8 instructions.
  11722	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
  11723	 * In tests that amounts to up to 50% reduction into total verifier
  11724	 * memory consumption and 20% verifier time speedup.
  11725	 */
  11726	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
  11727	    env->insn_processed - env->prev_insn_processed >= 8)
  11728		add_new_state = true;
  11729
  11730	pprev = explored_state(env, insn_idx);
  11731	sl = *pprev;
  11732
  11733	clean_live_states(env, insn_idx, cur);
  11734
  11735	while (sl) {
  11736		states_cnt++;
  11737		if (sl->state.insn_idx != insn_idx)
  11738			goto next;
  11739
  11740		if (sl->state.branches) {
  11741			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
  11742
  11743			if (frame->in_async_callback_fn &&
  11744			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
  11745				/* Different async_entry_cnt means that the verifier is
  11746				 * processing another entry into async callback.
  11747				 * Seeing the same state is not an indication of infinite
  11748				 * loop or infinite recursion.
  11749				 * But finding the same state doesn't mean that it's safe
  11750				 * to stop processing the current state. The previous state
  11751				 * hasn't yet reached bpf_exit, since state.branches > 0.
  11752				 * Checking in_async_callback_fn alone is not enough either.
  11753				 * Since the verifier still needs to catch infinite loops
  11754				 * inside async callbacks.
  11755				 */
  11756			} else if (states_maybe_looping(&sl->state, cur) &&
  11757				   states_equal(env, &sl->state, cur)) {
  11758				verbose_linfo(env, insn_idx, "; ");
  11759				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
  11760				return -EINVAL;
  11761			}
  11762			/* if the verifier is processing a loop, avoid adding new state
  11763			 * too often, since different loop iterations have distinct
  11764			 * states and may not help future pruning.
  11765			 * This threshold shouldn't be too low to make sure that
  11766			 * a loop with large bound will be rejected quickly.
  11767			 * The most abusive loop will be:
  11768			 * r1 += 1
  11769			 * if r1 < 1000000 goto pc-2
  11770			 * 1M insn_procssed limit / 100 == 10k peak states.
  11771			 * This threshold shouldn't be too high either, since states
  11772			 * at the end of the loop are likely to be useful in pruning.
  11773			 */
  11774			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
  11775			    env->insn_processed - env->prev_insn_processed < 100)
  11776				add_new_state = false;
  11777			goto miss;
  11778		}
  11779		if (states_equal(env, &sl->state, cur)) {
  11780			sl->hit_cnt++;
  11781			/* reached equivalent register/stack state,
  11782			 * prune the search.
  11783			 * Registers read by the continuation are read by us.
  11784			 * If we have any write marks in env->cur_state, they
  11785			 * will prevent corresponding reads in the continuation
  11786			 * from reaching our parent (an explored_state).  Our
  11787			 * own state will get the read marks recorded, but
  11788			 * they'll be immediately forgotten as we're pruning
  11789			 * this state and will pop a new one.
  11790			 */
  11791			err = propagate_liveness(env, &sl->state, cur);
  11792
  11793			/* if previous state reached the exit with precision and
  11794			 * current state is equivalent to it (except precsion marks)
  11795			 * the precision needs to be propagated back in
  11796			 * the current state.
  11797			 */
  11798			err = err ? : push_jmp_history(env, cur);
  11799			err = err ? : propagate_precision(env, &sl->state);
  11800			if (err)
  11801				return err;
  11802			return 1;
  11803		}
  11804miss:
  11805		/* when new state is not going to be added do not increase miss count.
  11806		 * Otherwise several loop iterations will remove the state
  11807		 * recorded earlier. The goal of these heuristics is to have
  11808		 * states from some iterations of the loop (some in the beginning
  11809		 * and some at the end) to help pruning.
  11810		 */
  11811		if (add_new_state)
  11812			sl->miss_cnt++;
  11813		/* heuristic to determine whether this state is beneficial
  11814		 * to keep checking from state equivalence point of view.
  11815		 * Higher numbers increase max_states_per_insn and verification time,
  11816		 * but do not meaningfully decrease insn_processed.
  11817		 */
  11818		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
  11819			/* the state is unlikely to be useful. Remove it to
  11820			 * speed up verification
  11821			 */
  11822			*pprev = sl->next;
  11823			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
  11824				u32 br = sl->state.branches;
  11825
  11826				WARN_ONCE(br,
  11827					  "BUG live_done but branches_to_explore %d\n",
  11828					  br);
  11829				free_verifier_state(&sl->state, false);
  11830				kfree(sl);
  11831				env->peak_states--;
  11832			} else {
  11833				/* cannot free this state, since parentage chain may
  11834				 * walk it later. Add it for free_list instead to
  11835				 * be freed at the end of verification
  11836				 */
  11837				sl->next = env->free_list;
  11838				env->free_list = sl;
  11839			}
  11840			sl = *pprev;
  11841			continue;
  11842		}
  11843next:
  11844		pprev = &sl->next;
  11845		sl = *pprev;
  11846	}
  11847
  11848	if (env->max_states_per_insn < states_cnt)
  11849		env->max_states_per_insn = states_cnt;
  11850
  11851	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
  11852		return push_jmp_history(env, cur);
  11853
  11854	if (!add_new_state)
  11855		return push_jmp_history(env, cur);
  11856
  11857	/* There were no equivalent states, remember the current one.
  11858	 * Technically the current state is not proven to be safe yet,
  11859	 * but it will either reach outer most bpf_exit (which means it's safe)
  11860	 * or it will be rejected. When there are no loops the verifier won't be
  11861	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
  11862	 * again on the way to bpf_exit.
  11863	 * When looping the sl->state.branches will be > 0 and this state
  11864	 * will not be considered for equivalence until branches == 0.
  11865	 */
  11866	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
  11867	if (!new_sl)
  11868		return -ENOMEM;
  11869	env->total_states++;
  11870	env->peak_states++;
  11871	env->prev_jmps_processed = env->jmps_processed;
  11872	env->prev_insn_processed = env->insn_processed;
  11873
  11874	/* add new state to the head of linked list */
  11875	new = &new_sl->state;
  11876	err = copy_verifier_state(new, cur);
  11877	if (err) {
  11878		free_verifier_state(new, false);
  11879		kfree(new_sl);
  11880		return err;
  11881	}
  11882	new->insn_idx = insn_idx;
  11883	WARN_ONCE(new->branches != 1,
  11884		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
  11885
  11886	cur->parent = new;
  11887	cur->first_insn_idx = insn_idx;
  11888	clear_jmp_history(cur);
  11889	new_sl->next = *explored_state(env, insn_idx);
  11890	*explored_state(env, insn_idx) = new_sl;
  11891	/* connect new state to parentage chain. Current frame needs all
  11892	 * registers connected. Only r6 - r9 of the callers are alive (pushed
  11893	 * to the stack implicitly by JITs) so in callers' frames connect just
  11894	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
  11895	 * the state of the call instruction (with WRITTEN set), and r0 comes
  11896	 * from callee with its full parentage chain, anyway.
  11897	 */
  11898	/* clear write marks in current state: the writes we did are not writes
  11899	 * our child did, so they don't screen off its reads from us.
  11900	 * (There are no read marks in current state, because reads always mark
  11901	 * their parent and current state never has children yet.  Only
  11902	 * explored_states can get read marks.)
  11903	 */
  11904	for (j = 0; j <= cur->curframe; j++) {
  11905		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
  11906			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
  11907		for (i = 0; i < BPF_REG_FP; i++)
  11908			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
  11909	}
  11910
  11911	/* all stack frames are accessible from callee, clear them all */
  11912	for (j = 0; j <= cur->curframe; j++) {
  11913		struct bpf_func_state *frame = cur->frame[j];
  11914		struct bpf_func_state *newframe = new->frame[j];
  11915
  11916		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
  11917			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
  11918			frame->stack[i].spilled_ptr.parent =
  11919						&newframe->stack[i].spilled_ptr;
  11920		}
  11921	}
  11922	return 0;
  11923}
  11924
  11925/* Return true if it's OK to have the same insn return a different type. */
  11926static bool reg_type_mismatch_ok(enum bpf_reg_type type)
  11927{
  11928	switch (base_type(type)) {
  11929	case PTR_TO_CTX:
  11930	case PTR_TO_SOCKET:
  11931	case PTR_TO_SOCK_COMMON:
  11932	case PTR_TO_TCP_SOCK:
  11933	case PTR_TO_XDP_SOCK:
  11934	case PTR_TO_BTF_ID:
  11935		return false;
  11936	default:
  11937		return true;
  11938	}
  11939}
  11940
  11941/* If an instruction was previously used with particular pointer types, then we
  11942 * need to be careful to avoid cases such as the below, where it may be ok
  11943 * for one branch accessing the pointer, but not ok for the other branch:
  11944 *
  11945 * R1 = sock_ptr
  11946 * goto X;
  11947 * ...
  11948 * R1 = some_other_valid_ptr;
  11949 * goto X;
  11950 * ...
  11951 * R2 = *(u32 *)(R1 + 0);
  11952 */
  11953static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
  11954{
  11955	return src != prev && (!reg_type_mismatch_ok(src) ||
  11956			       !reg_type_mismatch_ok(prev));
  11957}
  11958
  11959static int do_check(struct bpf_verifier_env *env)
  11960{
  11961	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
  11962	struct bpf_verifier_state *state = env->cur_state;
  11963	struct bpf_insn *insns = env->prog->insnsi;
  11964	struct bpf_reg_state *regs;
  11965	int insn_cnt = env->prog->len;
  11966	bool do_print_state = false;
  11967	int prev_insn_idx = -1;
  11968
  11969	for (;;) {
  11970		struct bpf_insn *insn;
  11971		u8 class;
  11972		int err;
  11973
  11974		env->prev_insn_idx = prev_insn_idx;
  11975		if (env->insn_idx >= insn_cnt) {
  11976			verbose(env, "invalid insn idx %d insn_cnt %d\n",
  11977				env->insn_idx, insn_cnt);
  11978			return -EFAULT;
  11979		}
  11980
  11981		insn = &insns[env->insn_idx];
  11982		class = BPF_CLASS(insn->code);
  11983
  11984		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
  11985			verbose(env,
  11986				"BPF program is too large. Processed %d insn\n",
  11987				env->insn_processed);
  11988			return -E2BIG;
  11989		}
  11990
  11991		err = is_state_visited(env, env->insn_idx);
  11992		if (err < 0)
  11993			return err;
  11994		if (err == 1) {
  11995			/* found equivalent state, can prune the search */
  11996			if (env->log.level & BPF_LOG_LEVEL) {
  11997				if (do_print_state)
  11998					verbose(env, "\nfrom %d to %d%s: safe\n",
  11999						env->prev_insn_idx, env->insn_idx,
  12000						env->cur_state->speculative ?
  12001						" (speculative execution)" : "");
  12002				else
  12003					verbose(env, "%d: safe\n", env->insn_idx);
  12004			}
  12005			goto process_bpf_exit;
  12006		}
  12007
  12008		if (signal_pending(current))
  12009			return -EAGAIN;
  12010
  12011		if (need_resched())
  12012			cond_resched();
  12013
  12014		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
  12015			verbose(env, "\nfrom %d to %d%s:",
  12016				env->prev_insn_idx, env->insn_idx,
  12017				env->cur_state->speculative ?
  12018				" (speculative execution)" : "");
  12019			print_verifier_state(env, state->frame[state->curframe], true);
  12020			do_print_state = false;
  12021		}
  12022
  12023		if (env->log.level & BPF_LOG_LEVEL) {
  12024			const struct bpf_insn_cbs cbs = {
  12025				.cb_call	= disasm_kfunc_name,
  12026				.cb_print	= verbose,
  12027				.private_data	= env,
  12028			};
  12029
  12030			if (verifier_state_scratched(env))
  12031				print_insn_state(env, state->frame[state->curframe]);
  12032
  12033			verbose_linfo(env, env->insn_idx, "; ");
  12034			env->prev_log_len = env->log.len_used;
  12035			verbose(env, "%d: ", env->insn_idx);
  12036			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
  12037			env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
  12038			env->prev_log_len = env->log.len_used;
  12039		}
  12040
  12041		if (bpf_prog_is_dev_bound(env->prog->aux)) {
  12042			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
  12043							   env->prev_insn_idx);
  12044			if (err)
  12045				return err;
  12046		}
  12047
  12048		regs = cur_regs(env);
  12049		sanitize_mark_insn_seen(env);
  12050		prev_insn_idx = env->insn_idx;
  12051
  12052		if (class == BPF_ALU || class == BPF_ALU64) {
  12053			err = check_alu_op(env, insn);
  12054			if (err)
  12055				return err;
  12056
  12057		} else if (class == BPF_LDX) {
  12058			enum bpf_reg_type *prev_src_type, src_reg_type;
  12059
  12060			/* check for reserved fields is already done */
  12061
  12062			/* check src operand */
  12063			err = check_reg_arg(env, insn->src_reg, SRC_OP);
  12064			if (err)
  12065				return err;
  12066
  12067			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
  12068			if (err)
  12069				return err;
  12070
  12071			src_reg_type = regs[insn->src_reg].type;
  12072
  12073			/* check that memory (src_reg + off) is readable,
  12074			 * the state of dst_reg will be updated by this func
  12075			 */
  12076			err = check_mem_access(env, env->insn_idx, insn->src_reg,
  12077					       insn->off, BPF_SIZE(insn->code),
  12078					       BPF_READ, insn->dst_reg, false);
  12079			if (err)
  12080				return err;
  12081
  12082			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
  12083
  12084			if (*prev_src_type == NOT_INIT) {
  12085				/* saw a valid insn
  12086				 * dst_reg = *(u32 *)(src_reg + off)
  12087				 * save type to validate intersecting paths
  12088				 */
  12089				*prev_src_type = src_reg_type;
  12090
  12091			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
  12092				/* ABuser program is trying to use the same insn
  12093				 * dst_reg = *(u32*) (src_reg + off)
  12094				 * with different pointer types:
  12095				 * src_reg == ctx in one branch and
  12096				 * src_reg == stack|map in some other branch.
  12097				 * Reject it.
  12098				 */
  12099				verbose(env, "same insn cannot be used with different pointers\n");
  12100				return -EINVAL;
  12101			}
  12102
  12103		} else if (class == BPF_STX) {
  12104			enum bpf_reg_type *prev_dst_type, dst_reg_type;
  12105
  12106			if (BPF_MODE(insn->code) == BPF_ATOMIC) {
  12107				err = check_atomic(env, env->insn_idx, insn);
  12108				if (err)
  12109					return err;
  12110				env->insn_idx++;
  12111				continue;
  12112			}
  12113
  12114			if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
  12115				verbose(env, "BPF_STX uses reserved fields\n");
  12116				return -EINVAL;
  12117			}
  12118
  12119			/* check src1 operand */
  12120			err = check_reg_arg(env, insn->src_reg, SRC_OP);
  12121			if (err)
  12122				return err;
  12123			/* check src2 operand */
  12124			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  12125			if (err)
  12126				return err;
  12127
  12128			dst_reg_type = regs[insn->dst_reg].type;
  12129
  12130			/* check that memory (dst_reg + off) is writeable */
  12131			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
  12132					       insn->off, BPF_SIZE(insn->code),
  12133					       BPF_WRITE, insn->src_reg, false);
  12134			if (err)
  12135				return err;
  12136
  12137			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
  12138
  12139			if (*prev_dst_type == NOT_INIT) {
  12140				*prev_dst_type = dst_reg_type;
  12141			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
  12142				verbose(env, "same insn cannot be used with different pointers\n");
  12143				return -EINVAL;
  12144			}
  12145
  12146		} else if (class == BPF_ST) {
  12147			if (BPF_MODE(insn->code) != BPF_MEM ||
  12148			    insn->src_reg != BPF_REG_0) {
  12149				verbose(env, "BPF_ST uses reserved fields\n");
  12150				return -EINVAL;
  12151			}
  12152			/* check src operand */
  12153			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
  12154			if (err)
  12155				return err;
  12156
  12157			if (is_ctx_reg(env, insn->dst_reg)) {
  12158				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
  12159					insn->dst_reg,
  12160					reg_type_str(env, reg_state(env, insn->dst_reg)->type));
  12161				return -EACCES;
  12162			}
  12163
  12164			/* check that memory (dst_reg + off) is writeable */
  12165			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
  12166					       insn->off, BPF_SIZE(insn->code),
  12167					       BPF_WRITE, -1, false);
  12168			if (err)
  12169				return err;
  12170
  12171		} else if (class == BPF_JMP || class == BPF_JMP32) {
  12172			u8 opcode = BPF_OP(insn->code);
  12173
  12174			env->jmps_processed++;
  12175			if (opcode == BPF_CALL) {
  12176				if (BPF_SRC(insn->code) != BPF_K ||
  12177				    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
  12178				     && insn->off != 0) ||
  12179				    (insn->src_reg != BPF_REG_0 &&
  12180				     insn->src_reg != BPF_PSEUDO_CALL &&
  12181				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
  12182				    insn->dst_reg != BPF_REG_0 ||
  12183				    class == BPF_JMP32) {
  12184					verbose(env, "BPF_CALL uses reserved fields\n");
  12185					return -EINVAL;
  12186				}
  12187
  12188				if (env->cur_state->active_spin_lock &&
  12189				    (insn->src_reg == BPF_PSEUDO_CALL ||
  12190				     insn->imm != BPF_FUNC_spin_unlock)) {
  12191					verbose(env, "function calls are not allowed while holding a lock\n");
  12192					return -EINVAL;
  12193				}
  12194				if (insn->src_reg == BPF_PSEUDO_CALL)
  12195					err = check_func_call(env, insn, &env->insn_idx);
  12196				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
  12197					err = check_kfunc_call(env, insn, &env->insn_idx);
  12198				else
  12199					err = check_helper_call(env, insn, &env->insn_idx);
  12200				if (err)
  12201					return err;
  12202			} else if (opcode == BPF_JA) {
  12203				if (BPF_SRC(insn->code) != BPF_K ||
  12204				    insn->imm != 0 ||
  12205				    insn->src_reg != BPF_REG_0 ||
  12206				    insn->dst_reg != BPF_REG_0 ||
  12207				    class == BPF_JMP32) {
  12208					verbose(env, "BPF_JA uses reserved fields\n");
  12209					return -EINVAL;
  12210				}
  12211
  12212				env->insn_idx += insn->off + 1;
  12213				continue;
  12214
  12215			} else if (opcode == BPF_EXIT) {
  12216				if (BPF_SRC(insn->code) != BPF_K ||
  12217				    insn->imm != 0 ||
  12218				    insn->src_reg != BPF_REG_0 ||
  12219				    insn->dst_reg != BPF_REG_0 ||
  12220				    class == BPF_JMP32) {
  12221					verbose(env, "BPF_EXIT uses reserved fields\n");
  12222					return -EINVAL;
  12223				}
  12224
  12225				if (env->cur_state->active_spin_lock) {
  12226					verbose(env, "bpf_spin_unlock is missing\n");
  12227					return -EINVAL;
  12228				}
  12229
  12230				if (state->curframe) {
  12231					/* exit from nested function */
  12232					err = prepare_func_exit(env, &env->insn_idx);
  12233					if (err)
  12234						return err;
  12235					do_print_state = true;
  12236					continue;
  12237				}
  12238
  12239				err = check_reference_leak(env);
  12240				if (err)
  12241					return err;
  12242
  12243				err = check_return_code(env);
  12244				if (err)
  12245					return err;
  12246process_bpf_exit:
  12247				mark_verifier_state_scratched(env);
  12248				update_branch_counts(env, env->cur_state);
  12249				err = pop_stack(env, &prev_insn_idx,
  12250						&env->insn_idx, pop_log);
  12251				if (err < 0) {
  12252					if (err != -ENOENT)
  12253						return err;
  12254					break;
  12255				} else {
  12256					do_print_state = true;
  12257					continue;
  12258				}
  12259			} else {
  12260				err = check_cond_jmp_op(env, insn, &env->insn_idx);
  12261				if (err)
  12262					return err;
  12263			}
  12264		} else if (class == BPF_LD) {
  12265			u8 mode = BPF_MODE(insn->code);
  12266
  12267			if (mode == BPF_ABS || mode == BPF_IND) {
  12268				err = check_ld_abs(env, insn);
  12269				if (err)
  12270					return err;
  12271
  12272			} else if (mode == BPF_IMM) {
  12273				err = check_ld_imm(env, insn);
  12274				if (err)
  12275					return err;
  12276
  12277				env->insn_idx++;
  12278				sanitize_mark_insn_seen(env);
  12279			} else {
  12280				verbose(env, "invalid BPF_LD mode\n");
  12281				return -EINVAL;
  12282			}
  12283		} else {
  12284			verbose(env, "unknown insn class %d\n", class);
  12285			return -EINVAL;
  12286		}
  12287
  12288		env->insn_idx++;
  12289	}
  12290
  12291	return 0;
  12292}
  12293
  12294static int find_btf_percpu_datasec(struct btf *btf)
  12295{
  12296	const struct btf_type *t;
  12297	const char *tname;
  12298	int i, n;
  12299
  12300	/*
  12301	 * Both vmlinux and module each have their own ".data..percpu"
  12302	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
  12303	 * types to look at only module's own BTF types.
  12304	 */
  12305	n = btf_nr_types(btf);
  12306	if (btf_is_module(btf))
  12307		i = btf_nr_types(btf_vmlinux);
  12308	else
  12309		i = 1;
  12310
  12311	for(; i < n; i++) {
  12312		t = btf_type_by_id(btf, i);
  12313		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
  12314			continue;
  12315
  12316		tname = btf_name_by_offset(btf, t->name_off);
  12317		if (!strcmp(tname, ".data..percpu"))
  12318			return i;
  12319	}
  12320
  12321	return -ENOENT;
  12322}
  12323
  12324/* replace pseudo btf_id with kernel symbol address */
  12325static int check_pseudo_btf_id(struct bpf_verifier_env *env,
  12326			       struct bpf_insn *insn,
  12327			       struct bpf_insn_aux_data *aux)
  12328{
  12329	const struct btf_var_secinfo *vsi;
  12330	const struct btf_type *datasec;
  12331	struct btf_mod_pair *btf_mod;
  12332	const struct btf_type *t;
  12333	const char *sym_name;
  12334	bool percpu = false;
  12335	u32 type, id = insn->imm;
  12336	struct btf *btf;
  12337	s32 datasec_id;
  12338	u64 addr;
  12339	int i, btf_fd, err;
  12340
  12341	btf_fd = insn[1].imm;
  12342	if (btf_fd) {
  12343		btf = btf_get_by_fd(btf_fd);
  12344		if (IS_ERR(btf)) {
  12345			verbose(env, "invalid module BTF object FD specified.\n");
  12346			return -EINVAL;
  12347		}
  12348	} else {
  12349		if (!btf_vmlinux) {
  12350			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
  12351			return -EINVAL;
  12352		}
  12353		btf = btf_vmlinux;
  12354		btf_get(btf);
  12355	}
  12356
  12357	t = btf_type_by_id(btf, id);
  12358	if (!t) {
  12359		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
  12360		err = -ENOENT;
  12361		goto err_put;
  12362	}
  12363
  12364	if (!btf_type_is_var(t)) {
  12365		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
  12366		err = -EINVAL;
  12367		goto err_put;
  12368	}
  12369
  12370	sym_name = btf_name_by_offset(btf, t->name_off);
  12371	addr = kallsyms_lookup_name(sym_name);
  12372	if (!addr) {
  12373		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
  12374			sym_name);
  12375		err = -ENOENT;
  12376		goto err_put;
  12377	}
  12378
  12379	datasec_id = find_btf_percpu_datasec(btf);
  12380	if (datasec_id > 0) {
  12381		datasec = btf_type_by_id(btf, datasec_id);
  12382		for_each_vsi(i, datasec, vsi) {
  12383			if (vsi->type == id) {
  12384				percpu = true;
  12385				break;
  12386			}
  12387		}
  12388	}
  12389
  12390	insn[0].imm = (u32)addr;
  12391	insn[1].imm = addr >> 32;
  12392
  12393	type = t->type;
  12394	t = btf_type_skip_modifiers(btf, type, NULL);
  12395	if (percpu) {
  12396		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
  12397		aux->btf_var.btf = btf;
  12398		aux->btf_var.btf_id = type;
  12399	} else if (!btf_type_is_struct(t)) {
  12400		const struct btf_type *ret;
  12401		const char *tname;
  12402		u32 tsize;
  12403
  12404		/* resolve the type size of ksym. */
  12405		ret = btf_resolve_size(btf, t, &tsize);
  12406		if (IS_ERR(ret)) {
  12407			tname = btf_name_by_offset(btf, t->name_off);
  12408			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
  12409				tname, PTR_ERR(ret));
  12410			err = -EINVAL;
  12411			goto err_put;
  12412		}
  12413		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
  12414		aux->btf_var.mem_size = tsize;
  12415	} else {
  12416		aux->btf_var.reg_type = PTR_TO_BTF_ID;
  12417		aux->btf_var.btf = btf;
  12418		aux->btf_var.btf_id = type;
  12419	}
  12420
  12421	/* check whether we recorded this BTF (and maybe module) already */
  12422	for (i = 0; i < env->used_btf_cnt; i++) {
  12423		if (env->used_btfs[i].btf == btf) {
  12424			btf_put(btf);
  12425			return 0;
  12426		}
  12427	}
  12428
  12429	if (env->used_btf_cnt >= MAX_USED_BTFS) {
  12430		err = -E2BIG;
  12431		goto err_put;
  12432	}
  12433
  12434	btf_mod = &env->used_btfs[env->used_btf_cnt];
  12435	btf_mod->btf = btf;
  12436	btf_mod->module = NULL;
  12437
  12438	/* if we reference variables from kernel module, bump its refcount */
  12439	if (btf_is_module(btf)) {
  12440		btf_mod->module = btf_try_get_module(btf);
  12441		if (!btf_mod->module) {
  12442			err = -ENXIO;
  12443			goto err_put;
  12444		}
  12445	}
  12446
  12447	env->used_btf_cnt++;
  12448
  12449	return 0;
  12450err_put:
  12451	btf_put(btf);
  12452	return err;
  12453}
  12454
  12455static int check_map_prealloc(struct bpf_map *map)
  12456{
  12457	return (map->map_type != BPF_MAP_TYPE_HASH &&
  12458		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
  12459		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
  12460		!(map->map_flags & BPF_F_NO_PREALLOC);
  12461}
  12462
  12463static bool is_tracing_prog_type(enum bpf_prog_type type)
  12464{
  12465	switch (type) {
  12466	case BPF_PROG_TYPE_KPROBE:
  12467	case BPF_PROG_TYPE_TRACEPOINT:
  12468	case BPF_PROG_TYPE_PERF_EVENT:
  12469	case BPF_PROG_TYPE_RAW_TRACEPOINT:
  12470		return true;
  12471	default:
  12472		return false;
  12473	}
  12474}
  12475
  12476static bool is_preallocated_map(struct bpf_map *map)
  12477{
  12478	if (!check_map_prealloc(map))
  12479		return false;
  12480	if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
  12481		return false;
  12482	return true;
  12483}
  12484
  12485static int check_map_prog_compatibility(struct bpf_verifier_env *env,
  12486					struct bpf_map *map,
  12487					struct bpf_prog *prog)
  12488
  12489{
  12490	enum bpf_prog_type prog_type = resolve_prog_type(prog);
  12491	/*
  12492	 * Validate that trace type programs use preallocated hash maps.
  12493	 *
  12494	 * For programs attached to PERF events this is mandatory as the
  12495	 * perf NMI can hit any arbitrary code sequence.
  12496	 *
  12497	 * All other trace types using preallocated hash maps are unsafe as
  12498	 * well because tracepoint or kprobes can be inside locked regions
  12499	 * of the memory allocator or at a place where a recursion into the
  12500	 * memory allocator would see inconsistent state.
  12501	 *
  12502	 * On RT enabled kernels run-time allocation of all trace type
  12503	 * programs is strictly prohibited due to lock type constraints. On
  12504	 * !RT kernels it is allowed for backwards compatibility reasons for
  12505	 * now, but warnings are emitted so developers are made aware of
  12506	 * the unsafety and can fix their programs before this is enforced.
  12507	 */
  12508	if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
  12509		if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
  12510			verbose(env, "perf_event programs can only use preallocated hash map\n");
  12511			return -EINVAL;
  12512		}
  12513		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
  12514			verbose(env, "trace type programs can only use preallocated hash map\n");
  12515			return -EINVAL;
  12516		}
  12517		WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
  12518		verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
  12519	}
  12520
  12521	if (map_value_has_spin_lock(map)) {
  12522		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
  12523			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
  12524			return -EINVAL;
  12525		}
  12526
  12527		if (is_tracing_prog_type(prog_type)) {
  12528			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
  12529			return -EINVAL;
  12530		}
  12531
  12532		if (prog->aux->sleepable) {
  12533			verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
  12534			return -EINVAL;
  12535		}
  12536	}
  12537
  12538	if (map_value_has_timer(map)) {
  12539		if (is_tracing_prog_type(prog_type)) {
  12540			verbose(env, "tracing progs cannot use bpf_timer yet\n");
  12541			return -EINVAL;
  12542		}
  12543	}
  12544
  12545	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
  12546	    !bpf_offload_prog_map_match(prog, map)) {
  12547		verbose(env, "offload device mismatch between prog and map\n");
  12548		return -EINVAL;
  12549	}
  12550
  12551	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
  12552		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
  12553		return -EINVAL;
  12554	}
  12555
  12556	if (prog->aux->sleepable)
  12557		switch (map->map_type) {
  12558		case BPF_MAP_TYPE_HASH:
  12559		case BPF_MAP_TYPE_LRU_HASH:
  12560		case BPF_MAP_TYPE_ARRAY:
  12561		case BPF_MAP_TYPE_PERCPU_HASH:
  12562		case BPF_MAP_TYPE_PERCPU_ARRAY:
  12563		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
  12564		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
  12565		case BPF_MAP_TYPE_HASH_OF_MAPS:
  12566			if (!is_preallocated_map(map)) {
  12567				verbose(env,
  12568					"Sleepable programs can only use preallocated maps\n");
  12569				return -EINVAL;
  12570			}
  12571			break;
  12572		case BPF_MAP_TYPE_RINGBUF:
  12573		case BPF_MAP_TYPE_INODE_STORAGE:
  12574		case BPF_MAP_TYPE_SK_STORAGE:
  12575		case BPF_MAP_TYPE_TASK_STORAGE:
  12576			break;
  12577		default:
  12578			verbose(env,
  12579				"Sleepable programs can only use array, hash, and ringbuf maps\n");
  12580			return -EINVAL;
  12581		}
  12582
  12583	return 0;
  12584}
  12585
  12586static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
  12587{
  12588	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
  12589		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
  12590}
  12591
  12592/* find and rewrite pseudo imm in ld_imm64 instructions:
  12593 *
  12594 * 1. if it accesses map FD, replace it with actual map pointer.
  12595 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
  12596 *
  12597 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
  12598 */
  12599static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
  12600{
  12601	struct bpf_insn *insn = env->prog->insnsi;
  12602	int insn_cnt = env->prog->len;
  12603	int i, j, err;
  12604
  12605	err = bpf_prog_calc_tag(env->prog);
  12606	if (err)
  12607		return err;
  12608
  12609	for (i = 0; i < insn_cnt; i++, insn++) {
  12610		if (BPF_CLASS(insn->code) == BPF_LDX &&
  12611		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
  12612			verbose(env, "BPF_LDX uses reserved fields\n");
  12613			return -EINVAL;
  12614		}
  12615
  12616		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
  12617			struct bpf_insn_aux_data *aux;
  12618			struct bpf_map *map;
  12619			struct fd f;
  12620			u64 addr;
  12621			u32 fd;
  12622
  12623			if (i == insn_cnt - 1 || insn[1].code != 0 ||
  12624			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
  12625			    insn[1].off != 0) {
  12626				verbose(env, "invalid bpf_ld_imm64 insn\n");
  12627				return -EINVAL;
  12628			}
  12629
  12630			if (insn[0].src_reg == 0)
  12631				/* valid generic load 64-bit imm */
  12632				goto next_insn;
  12633
  12634			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
  12635				aux = &env->insn_aux_data[i];
  12636				err = check_pseudo_btf_id(env, insn, aux);
  12637				if (err)
  12638					return err;
  12639				goto next_insn;
  12640			}
  12641
  12642			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
  12643				aux = &env->insn_aux_data[i];
  12644				aux->ptr_type = PTR_TO_FUNC;
  12645				goto next_insn;
  12646			}
  12647
  12648			/* In final convert_pseudo_ld_imm64() step, this is
  12649			 * converted into regular 64-bit imm load insn.
  12650			 */
  12651			switch (insn[0].src_reg) {
  12652			case BPF_PSEUDO_MAP_VALUE:
  12653			case BPF_PSEUDO_MAP_IDX_VALUE:
  12654				break;
  12655			case BPF_PSEUDO_MAP_FD:
  12656			case BPF_PSEUDO_MAP_IDX:
  12657				if (insn[1].imm == 0)
  12658					break;
  12659				fallthrough;
  12660			default:
  12661				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
  12662				return -EINVAL;
  12663			}
  12664
  12665			switch (insn[0].src_reg) {
  12666			case BPF_PSEUDO_MAP_IDX_VALUE:
  12667			case BPF_PSEUDO_MAP_IDX:
  12668				if (bpfptr_is_null(env->fd_array)) {
  12669					verbose(env, "fd_idx without fd_array is invalid\n");
  12670					return -EPROTO;
  12671				}
  12672				if (copy_from_bpfptr_offset(&fd, env->fd_array,
  12673							    insn[0].imm * sizeof(fd),
  12674							    sizeof(fd)))
  12675					return -EFAULT;
  12676				break;
  12677			default:
  12678				fd = insn[0].imm;
  12679				break;
  12680			}
  12681
  12682			f = fdget(fd);
  12683			map = __bpf_map_get(f);
  12684			if (IS_ERR(map)) {
  12685				verbose(env, "fd %d is not pointing to valid bpf_map\n",
  12686					insn[0].imm);
  12687				return PTR_ERR(map);
  12688			}
  12689
  12690			err = check_map_prog_compatibility(env, map, env->prog);
  12691			if (err) {
  12692				fdput(f);
  12693				return err;
  12694			}
  12695
  12696			aux = &env->insn_aux_data[i];
  12697			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
  12698			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
  12699				addr = (unsigned long)map;
  12700			} else {
  12701				u32 off = insn[1].imm;
  12702
  12703				if (off >= BPF_MAX_VAR_OFF) {
  12704					verbose(env, "direct value offset of %u is not allowed\n", off);
  12705					fdput(f);
  12706					return -EINVAL;
  12707				}
  12708
  12709				if (!map->ops->map_direct_value_addr) {
  12710					verbose(env, "no direct value access support for this map type\n");
  12711					fdput(f);
  12712					return -EINVAL;
  12713				}
  12714
  12715				err = map->ops->map_direct_value_addr(map, &addr, off);
  12716				if (err) {
  12717					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
  12718						map->value_size, off);
  12719					fdput(f);
  12720					return err;
  12721				}
  12722
  12723				aux->map_off = off;
  12724				addr += off;
  12725			}
  12726
  12727			insn[0].imm = (u32)addr;
  12728			insn[1].imm = addr >> 32;
  12729
  12730			/* check whether we recorded this map already */
  12731			for (j = 0; j < env->used_map_cnt; j++) {
  12732				if (env->used_maps[j] == map) {
  12733					aux->map_index = j;
  12734					fdput(f);
  12735					goto next_insn;
  12736				}
  12737			}
  12738
  12739			if (env->used_map_cnt >= MAX_USED_MAPS) {
  12740				fdput(f);
  12741				return -E2BIG;
  12742			}
  12743
  12744			/* hold the map. If the program is rejected by verifier,
  12745			 * the map will be released by release_maps() or it
  12746			 * will be used by the valid program until it's unloaded
  12747			 * and all maps are released in free_used_maps()
  12748			 */
  12749			bpf_map_inc(map);
  12750
  12751			aux->map_index = env->used_map_cnt;
  12752			env->used_maps[env->used_map_cnt++] = map;
  12753
  12754			if (bpf_map_is_cgroup_storage(map) &&
  12755			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
  12756				verbose(env, "only one cgroup storage of each type is allowed\n");
  12757				fdput(f);
  12758				return -EBUSY;
  12759			}
  12760
  12761			fdput(f);
  12762next_insn:
  12763			insn++;
  12764			i++;
  12765			continue;
  12766		}
  12767
  12768		/* Basic sanity check before we invest more work here. */
  12769		if (!bpf_opcode_in_insntable(insn->code)) {
  12770			verbose(env, "unknown opcode %02x\n", insn->code);
  12771			return -EINVAL;
  12772		}
  12773	}
  12774
  12775	/* now all pseudo BPF_LD_IMM64 instructions load valid
  12776	 * 'struct bpf_map *' into a register instead of user map_fd.
  12777	 * These pointers will be used later by verifier to validate map access.
  12778	 */
  12779	return 0;
  12780}
  12781
  12782/* drop refcnt of maps used by the rejected program */
  12783static void release_maps(struct bpf_verifier_env *env)
  12784{
  12785	__bpf_free_used_maps(env->prog->aux, env->used_maps,
  12786			     env->used_map_cnt);
  12787}
  12788
  12789/* drop refcnt of maps used by the rejected program */
  12790static void release_btfs(struct bpf_verifier_env *env)
  12791{
  12792	__bpf_free_used_btfs(env->prog->aux, env->used_btfs,
  12793			     env->used_btf_cnt);
  12794}
  12795
  12796/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
  12797static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
  12798{
  12799	struct bpf_insn *insn = env->prog->insnsi;
  12800	int insn_cnt = env->prog->len;
  12801	int i;
  12802
  12803	for (i = 0; i < insn_cnt; i++, insn++) {
  12804		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
  12805			continue;
  12806		if (insn->src_reg == BPF_PSEUDO_FUNC)
  12807			continue;
  12808		insn->src_reg = 0;
  12809	}
  12810}
  12811
  12812/* single env->prog->insni[off] instruction was replaced with the range
  12813 * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
  12814 * [0, off) and [off, end) to new locations, so the patched range stays zero
  12815 */
  12816static void adjust_insn_aux_data(struct bpf_verifier_env *env,
  12817				 struct bpf_insn_aux_data *new_data,
  12818				 struct bpf_prog *new_prog, u32 off, u32 cnt)
  12819{
  12820	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
  12821	struct bpf_insn *insn = new_prog->insnsi;
  12822	u32 old_seen = old_data[off].seen;
  12823	u32 prog_len;
  12824	int i;
  12825
  12826	/* aux info at OFF always needs adjustment, no matter fast path
  12827	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
  12828	 * original insn at old prog.
  12829	 */
  12830	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
  12831
  12832	if (cnt == 1)
  12833		return;
  12834	prog_len = new_prog->len;
  12835
  12836	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
  12837	memcpy(new_data + off + cnt - 1, old_data + off,
  12838	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
  12839	for (i = off; i < off + cnt - 1; i++) {
  12840		/* Expand insni[off]'s seen count to the patched range. */
  12841		new_data[i].seen = old_seen;
  12842		new_data[i].zext_dst = insn_has_def32(env, insn + i);
  12843	}
  12844	env->insn_aux_data = new_data;
  12845	vfree(old_data);
  12846}
  12847
  12848static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
  12849{
  12850	int i;
  12851
  12852	if (len == 1)
  12853		return;
  12854	/* NOTE: fake 'exit' subprog should be updated as well. */
  12855	for (i = 0; i <= env->subprog_cnt; i++) {
  12856		if (env->subprog_info[i].start <= off)
  12857			continue;
  12858		env->subprog_info[i].start += len - 1;
  12859	}
  12860}
  12861
  12862static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
  12863{
  12864	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
  12865	int i, sz = prog->aux->size_poke_tab;
  12866	struct bpf_jit_poke_descriptor *desc;
  12867
  12868	for (i = 0; i < sz; i++) {
  12869		desc = &tab[i];
  12870		if (desc->insn_idx <= off)
  12871			continue;
  12872		desc->insn_idx += len - 1;
  12873	}
  12874}
  12875
  12876static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
  12877					    const struct bpf_insn *patch, u32 len)
  12878{
  12879	struct bpf_prog *new_prog;
  12880	struct bpf_insn_aux_data *new_data = NULL;
  12881
  12882	if (len > 1) {
  12883		new_data = vzalloc(array_size(env->prog->len + len - 1,
  12884					      sizeof(struct bpf_insn_aux_data)));
  12885		if (!new_data)
  12886			return NULL;
  12887	}
  12888
  12889	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
  12890	if (IS_ERR(new_prog)) {
  12891		if (PTR_ERR(new_prog) == -ERANGE)
  12892			verbose(env,
  12893				"insn %d cannot be patched due to 16-bit range\n",
  12894				env->insn_aux_data[off].orig_idx);
  12895		vfree(new_data);
  12896		return NULL;
  12897	}
  12898	adjust_insn_aux_data(env, new_data, new_prog, off, len);
  12899	adjust_subprog_starts(env, off, len);
  12900	adjust_poke_descs(new_prog, off, len);
  12901	return new_prog;
  12902}
  12903
  12904static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
  12905					      u32 off, u32 cnt)
  12906{
  12907	int i, j;
  12908
  12909	/* find first prog starting at or after off (first to remove) */
  12910	for (i = 0; i < env->subprog_cnt; i++)
  12911		if (env->subprog_info[i].start >= off)
  12912			break;
  12913	/* find first prog starting at or after off + cnt (first to stay) */
  12914	for (j = i; j < env->subprog_cnt; j++)
  12915		if (env->subprog_info[j].start >= off + cnt)
  12916			break;
  12917	/* if j doesn't start exactly at off + cnt, we are just removing
  12918	 * the front of previous prog
  12919	 */
  12920	if (env->subprog_info[j].start != off + cnt)
  12921		j--;
  12922
  12923	if (j > i) {
  12924		struct bpf_prog_aux *aux = env->prog->aux;
  12925		int move;
  12926
  12927		/* move fake 'exit' subprog as well */
  12928		move = env->subprog_cnt + 1 - j;
  12929
  12930		memmove(env->subprog_info + i,
  12931			env->subprog_info + j,
  12932			sizeof(*env->subprog_info) * move);
  12933		env->subprog_cnt -= j - i;
  12934
  12935		/* remove func_info */
  12936		if (aux->func_info) {
  12937			move = aux->func_info_cnt - j;
  12938
  12939			memmove(aux->func_info + i,
  12940				aux->func_info + j,
  12941				sizeof(*aux->func_info) * move);
  12942			aux->func_info_cnt -= j - i;
  12943			/* func_info->insn_off is set after all code rewrites,
  12944			 * in adjust_btf_func() - no need to adjust
  12945			 */
  12946		}
  12947	} else {
  12948		/* convert i from "first prog to remove" to "first to adjust" */
  12949		if (env->subprog_info[i].start == off)
  12950			i++;
  12951	}
  12952
  12953	/* update fake 'exit' subprog as well */
  12954	for (; i <= env->subprog_cnt; i++)
  12955		env->subprog_info[i].start -= cnt;
  12956
  12957	return 0;
  12958}
  12959
  12960static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
  12961				      u32 cnt)
  12962{
  12963	struct bpf_prog *prog = env->prog;
  12964	u32 i, l_off, l_cnt, nr_linfo;
  12965	struct bpf_line_info *linfo;
  12966
  12967	nr_linfo = prog->aux->nr_linfo;
  12968	if (!nr_linfo)
  12969		return 0;
  12970
  12971	linfo = prog->aux->linfo;
  12972
  12973	/* find first line info to remove, count lines to be removed */
  12974	for (i = 0; i < nr_linfo; i++)
  12975		if (linfo[i].insn_off >= off)
  12976			break;
  12977
  12978	l_off = i;
  12979	l_cnt = 0;
  12980	for (; i < nr_linfo; i++)
  12981		if (linfo[i].insn_off < off + cnt)
  12982			l_cnt++;
  12983		else
  12984			break;
  12985
  12986	/* First live insn doesn't match first live linfo, it needs to "inherit"
  12987	 * last removed linfo.  prog is already modified, so prog->len == off
  12988	 * means no live instructions after (tail of the program was removed).
  12989	 */
  12990	if (prog->len != off && l_cnt &&
  12991	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
  12992		l_cnt--;
  12993		linfo[--i].insn_off = off + cnt;
  12994	}
  12995
  12996	/* remove the line info which refer to the removed instructions */
  12997	if (l_cnt) {
  12998		memmove(linfo + l_off, linfo + i,
  12999			sizeof(*linfo) * (nr_linfo - i));
  13000
  13001		prog->aux->nr_linfo -= l_cnt;
  13002		nr_linfo = prog->aux->nr_linfo;
  13003	}
  13004
  13005	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
  13006	for (i = l_off; i < nr_linfo; i++)
  13007		linfo[i].insn_off -= cnt;
  13008
  13009	/* fix up all subprogs (incl. 'exit') which start >= off */
  13010	for (i = 0; i <= env->subprog_cnt; i++)
  13011		if (env->subprog_info[i].linfo_idx > l_off) {
  13012			/* program may have started in the removed region but
  13013			 * may not be fully removed
  13014			 */
  13015			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
  13016				env->subprog_info[i].linfo_idx -= l_cnt;
  13017			else
  13018				env->subprog_info[i].linfo_idx = l_off;
  13019		}
  13020
  13021	return 0;
  13022}
  13023
  13024static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
  13025{
  13026	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
  13027	unsigned int orig_prog_len = env->prog->len;
  13028	int err;
  13029
  13030	if (bpf_prog_is_dev_bound(env->prog->aux))
  13031		bpf_prog_offload_remove_insns(env, off, cnt);
  13032
  13033	err = bpf_remove_insns(env->prog, off, cnt);
  13034	if (err)
  13035		return err;
  13036
  13037	err = adjust_subprog_starts_after_remove(env, off, cnt);
  13038	if (err)
  13039		return err;
  13040
  13041	err = bpf_adj_linfo_after_remove(env, off, cnt);
  13042	if (err)
  13043		return err;
  13044
  13045	memmove(aux_data + off,	aux_data + off + cnt,
  13046		sizeof(*aux_data) * (orig_prog_len - off - cnt));
  13047
  13048	return 0;
  13049}
  13050
  13051/* The verifier does more data flow analysis than llvm and will not
  13052 * explore branches that are dead at run time. Malicious programs can
  13053 * have dead code too. Therefore replace all dead at-run-time code
  13054 * with 'ja -1'.
  13055 *
  13056 * Just nops are not optimal, e.g. if they would sit at the end of the
  13057 * program and through another bug we would manage to jump there, then
  13058 * we'd execute beyond program memory otherwise. Returning exception
  13059 * code also wouldn't work since we can have subprogs where the dead
  13060 * code could be located.
  13061 */
  13062static void sanitize_dead_code(struct bpf_verifier_env *env)
  13063{
  13064	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
  13065	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
  13066	struct bpf_insn *insn = env->prog->insnsi;
  13067	const int insn_cnt = env->prog->len;
  13068	int i;
  13069
  13070	for (i = 0; i < insn_cnt; i++) {
  13071		if (aux_data[i].seen)
  13072			continue;
  13073		memcpy(insn + i, &trap, sizeof(trap));
  13074		aux_data[i].zext_dst = false;
  13075	}
  13076}
  13077
  13078static bool insn_is_cond_jump(u8 code)
  13079{
  13080	u8 op;
  13081
  13082	if (BPF_CLASS(code) == BPF_JMP32)
  13083		return true;
  13084
  13085	if (BPF_CLASS(code) != BPF_JMP)
  13086		return false;
  13087
  13088	op = BPF_OP(code);
  13089	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
  13090}
  13091
  13092static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
  13093{
  13094	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
  13095	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
  13096	struct bpf_insn *insn = env->prog->insnsi;
  13097	const int insn_cnt = env->prog->len;
  13098	int i;
  13099
  13100	for (i = 0; i < insn_cnt; i++, insn++) {
  13101		if (!insn_is_cond_jump(insn->code))
  13102			continue;
  13103
  13104		if (!aux_data[i + 1].seen)
  13105			ja.off = insn->off;
  13106		else if (!aux_data[i + 1 + insn->off].seen)
  13107			ja.off = 0;
  13108		else
  13109			continue;
  13110
  13111		if (bpf_prog_is_dev_bound(env->prog->aux))
  13112			bpf_prog_offload_replace_insn(env, i, &ja);
  13113
  13114		memcpy(insn, &ja, sizeof(ja));
  13115	}
  13116}
  13117
  13118static int opt_remove_dead_code(struct bpf_verifier_env *env)
  13119{
  13120	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
  13121	int insn_cnt = env->prog->len;
  13122	int i, err;
  13123
  13124	for (i = 0; i < insn_cnt; i++) {
  13125		int j;
  13126
  13127		j = 0;
  13128		while (i + j < insn_cnt && !aux_data[i + j].seen)
  13129			j++;
  13130		if (!j)
  13131			continue;
  13132
  13133		err = verifier_remove_insns(env, i, j);
  13134		if (err)
  13135			return err;
  13136		insn_cnt = env->prog->len;
  13137	}
  13138
  13139	return 0;
  13140}
  13141
  13142static int opt_remove_nops(struct bpf_verifier_env *env)
  13143{
  13144	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
  13145	struct bpf_insn *insn = env->prog->insnsi;
  13146	int insn_cnt = env->prog->len;
  13147	int i, err;
  13148
  13149	for (i = 0; i < insn_cnt; i++) {
  13150		if (memcmp(&insn[i], &ja, sizeof(ja)))
  13151			continue;
  13152
  13153		err = verifier_remove_insns(env, i, 1);
  13154		if (err)
  13155			return err;
  13156		insn_cnt--;
  13157		i--;
  13158	}
  13159
  13160	return 0;
  13161}
  13162
  13163static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
  13164					 const union bpf_attr *attr)
  13165{
  13166	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
  13167	struct bpf_insn_aux_data *aux = env->insn_aux_data;
  13168	int i, patch_len, delta = 0, len = env->prog->len;
  13169	struct bpf_insn *insns = env->prog->insnsi;
  13170	struct bpf_prog *new_prog;
  13171	bool rnd_hi32;
  13172
  13173	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
  13174	zext_patch[1] = BPF_ZEXT_REG(0);
  13175	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
  13176	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
  13177	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
  13178	for (i = 0; i < len; i++) {
  13179		int adj_idx = i + delta;
  13180		struct bpf_insn insn;
  13181		int load_reg;
  13182
  13183		insn = insns[adj_idx];
  13184		load_reg = insn_def_regno(&insn);
  13185		if (!aux[adj_idx].zext_dst) {
  13186			u8 code, class;
  13187			u32 imm_rnd;
  13188
  13189			if (!rnd_hi32)
  13190				continue;
  13191
  13192			code = insn.code;
  13193			class = BPF_CLASS(code);
  13194			if (load_reg == -1)
  13195				continue;
  13196
  13197			/* NOTE: arg "reg" (the fourth one) is only used for
  13198			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
  13199			 *       here.
  13200			 */
  13201			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
  13202				if (class == BPF_LD &&
  13203				    BPF_MODE(code) == BPF_IMM)
  13204					i++;
  13205				continue;
  13206			}
  13207
  13208			/* ctx load could be transformed into wider load. */
  13209			if (class == BPF_LDX &&
  13210			    aux[adj_idx].ptr_type == PTR_TO_CTX)
  13211				continue;
  13212
  13213			imm_rnd = get_random_int();
  13214			rnd_hi32_patch[0] = insn;
  13215			rnd_hi32_patch[1].imm = imm_rnd;
  13216			rnd_hi32_patch[3].dst_reg = load_reg;
  13217			patch = rnd_hi32_patch;
  13218			patch_len = 4;
  13219			goto apply_patch_buffer;
  13220		}
  13221
  13222		/* Add in an zero-extend instruction if a) the JIT has requested
  13223		 * it or b) it's a CMPXCHG.
  13224		 *
  13225		 * The latter is because: BPF_CMPXCHG always loads a value into
  13226		 * R0, therefore always zero-extends. However some archs'
  13227		 * equivalent instruction only does this load when the
  13228		 * comparison is successful. This detail of CMPXCHG is
  13229		 * orthogonal to the general zero-extension behaviour of the
  13230		 * CPU, so it's treated independently of bpf_jit_needs_zext.
  13231		 */
  13232		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
  13233			continue;
  13234
  13235		if (WARN_ON(load_reg == -1)) {
  13236			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
  13237			return -EFAULT;
  13238		}
  13239
  13240		zext_patch[0] = insn;
  13241		zext_patch[1].dst_reg = load_reg;
  13242		zext_patch[1].src_reg = load_reg;
  13243		patch = zext_patch;
  13244		patch_len = 2;
  13245apply_patch_buffer:
  13246		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
  13247		if (!new_prog)
  13248			return -ENOMEM;
  13249		env->prog = new_prog;
  13250		insns = new_prog->insnsi;
  13251		aux = env->insn_aux_data;
  13252		delta += patch_len - 1;
  13253	}
  13254
  13255	return 0;
  13256}
  13257
  13258/* convert load instructions that access fields of a context type into a
  13259 * sequence of instructions that access fields of the underlying structure:
  13260 *     struct __sk_buff    -> struct sk_buff
  13261 *     struct bpf_sock_ops -> struct sock
  13262 */
  13263static int convert_ctx_accesses(struct bpf_verifier_env *env)
  13264{
  13265	const struct bpf_verifier_ops *ops = env->ops;
  13266	int i, cnt, size, ctx_field_size, delta = 0;
  13267	const int insn_cnt = env->prog->len;
  13268	struct bpf_insn insn_buf[16], *insn;
  13269	u32 target_size, size_default, off;
  13270	struct bpf_prog *new_prog;
  13271	enum bpf_access_type type;
  13272	bool is_narrower_load;
  13273
  13274	if (ops->gen_prologue || env->seen_direct_write) {
  13275		if (!ops->gen_prologue) {
  13276			verbose(env, "bpf verifier is misconfigured\n");
  13277			return -EINVAL;
  13278		}
  13279		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
  13280					env->prog);
  13281		if (cnt >= ARRAY_SIZE(insn_buf)) {
  13282			verbose(env, "bpf verifier is misconfigured\n");
  13283			return -EINVAL;
  13284		} else if (cnt) {
  13285			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
  13286			if (!new_prog)
  13287				return -ENOMEM;
  13288
  13289			env->prog = new_prog;
  13290			delta += cnt - 1;
  13291		}
  13292	}
  13293
  13294	if (bpf_prog_is_dev_bound(env->prog->aux))
  13295		return 0;
  13296
  13297	insn = env->prog->insnsi + delta;
  13298
  13299	for (i = 0; i < insn_cnt; i++, insn++) {
  13300		bpf_convert_ctx_access_t convert_ctx_access;
  13301		bool ctx_access;
  13302
  13303		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
  13304		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
  13305		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
  13306		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
  13307			type = BPF_READ;
  13308			ctx_access = true;
  13309		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
  13310			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
  13311			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
  13312			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
  13313			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
  13314			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
  13315			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
  13316			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
  13317			type = BPF_WRITE;
  13318			ctx_access = BPF_CLASS(insn->code) == BPF_STX;
  13319		} else {
  13320			continue;
  13321		}
  13322
  13323		if (type == BPF_WRITE &&
  13324		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
  13325			struct bpf_insn patch[] = {
  13326				*insn,
  13327				BPF_ST_NOSPEC(),
  13328			};
  13329
  13330			cnt = ARRAY_SIZE(patch);
  13331			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
  13332			if (!new_prog)
  13333				return -ENOMEM;
  13334
  13335			delta    += cnt - 1;
  13336			env->prog = new_prog;
  13337			insn      = new_prog->insnsi + i + delta;
  13338			continue;
  13339		}
  13340
  13341		if (!ctx_access)
  13342			continue;
  13343
  13344		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
  13345		case PTR_TO_CTX:
  13346			if (!ops->convert_ctx_access)
  13347				continue;
  13348			convert_ctx_access = ops->convert_ctx_access;
  13349			break;
  13350		case PTR_TO_SOCKET:
  13351		case PTR_TO_SOCK_COMMON:
  13352			convert_ctx_access = bpf_sock_convert_ctx_access;
  13353			break;
  13354		case PTR_TO_TCP_SOCK:
  13355			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
  13356			break;
  13357		case PTR_TO_XDP_SOCK:
  13358			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
  13359			break;
  13360		case PTR_TO_BTF_ID:
  13361		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
  13362			if (type == BPF_READ) {
  13363				insn->code = BPF_LDX | BPF_PROBE_MEM |
  13364					BPF_SIZE((insn)->code);
  13365				env->prog->aux->num_exentries++;
  13366			} else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
  13367				verbose(env, "Writes through BTF pointers are not allowed\n");
  13368				return -EINVAL;
  13369			}
  13370			continue;
  13371		default:
  13372			continue;
  13373		}
  13374
  13375		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
  13376		size = BPF_LDST_BYTES(insn);
  13377
  13378		/* If the read access is a narrower load of the field,
  13379		 * convert to a 4/8-byte load, to minimum program type specific
  13380		 * convert_ctx_access changes. If conversion is successful,
  13381		 * we will apply proper mask to the result.
  13382		 */
  13383		is_narrower_load = size < ctx_field_size;
  13384		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
  13385		off = insn->off;
  13386		if (is_narrower_load) {
  13387			u8 size_code;
  13388
  13389			if (type == BPF_WRITE) {
  13390				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
  13391				return -EINVAL;
  13392			}
  13393
  13394			size_code = BPF_H;
  13395			if (ctx_field_size == 4)
  13396				size_code = BPF_W;
  13397			else if (ctx_field_size == 8)
  13398				size_code = BPF_DW;
  13399
  13400			insn->off = off & ~(size_default - 1);
  13401			insn->code = BPF_LDX | BPF_MEM | size_code;
  13402		}
  13403
  13404		target_size = 0;
  13405		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
  13406					 &target_size);
  13407		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
  13408		    (ctx_field_size && !target_size)) {
  13409			verbose(env, "bpf verifier is misconfigured\n");
  13410			return -EINVAL;
  13411		}
  13412
  13413		if (is_narrower_load && size < target_size) {
  13414			u8 shift = bpf_ctx_narrow_access_offset(
  13415				off, size, size_default) * 8;
  13416			if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
  13417				verbose(env, "bpf verifier narrow ctx load misconfigured\n");
  13418				return -EINVAL;
  13419			}
  13420			if (ctx_field_size <= 4) {
  13421				if (shift)
  13422					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
  13423									insn->dst_reg,
  13424									shift);
  13425				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
  13426								(1 << size * 8) - 1);
  13427			} else {
  13428				if (shift)
  13429					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
  13430									insn->dst_reg,
  13431									shift);
  13432				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
  13433								(1ULL << size * 8) - 1);
  13434			}
  13435		}
  13436
  13437		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  13438		if (!new_prog)
  13439			return -ENOMEM;
  13440
  13441		delta += cnt - 1;
  13442
  13443		/* keep walking new program and skip insns we just inserted */
  13444		env->prog = new_prog;
  13445		insn      = new_prog->insnsi + i + delta;
  13446	}
  13447
  13448	return 0;
  13449}
  13450
  13451static int jit_subprogs(struct bpf_verifier_env *env)
  13452{
  13453	struct bpf_prog *prog = env->prog, **func, *tmp;
  13454	int i, j, subprog_start, subprog_end = 0, len, subprog;
  13455	struct bpf_map *map_ptr;
  13456	struct bpf_insn *insn;
  13457	void *old_bpf_func;
  13458	int err, num_exentries;
  13459
  13460	if (env->subprog_cnt <= 1)
  13461		return 0;
  13462
  13463	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
  13464		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
  13465			continue;
  13466
  13467		/* Upon error here we cannot fall back to interpreter but
  13468		 * need a hard reject of the program. Thus -EFAULT is
  13469		 * propagated in any case.
  13470		 */
  13471		subprog = find_subprog(env, i + insn->imm + 1);
  13472		if (subprog < 0) {
  13473			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
  13474				  i + insn->imm + 1);
  13475			return -EFAULT;
  13476		}
  13477		/* temporarily remember subprog id inside insn instead of
  13478		 * aux_data, since next loop will split up all insns into funcs
  13479		 */
  13480		insn->off = subprog;
  13481		/* remember original imm in case JIT fails and fallback
  13482		 * to interpreter will be needed
  13483		 */
  13484		env->insn_aux_data[i].call_imm = insn->imm;
  13485		/* point imm to __bpf_call_base+1 from JITs point of view */
  13486		insn->imm = 1;
  13487		if (bpf_pseudo_func(insn))
  13488			/* jit (e.g. x86_64) may emit fewer instructions
  13489			 * if it learns a u32 imm is the same as a u64 imm.
  13490			 * Force a non zero here.
  13491			 */
  13492			insn[1].imm = 1;
  13493	}
  13494
  13495	err = bpf_prog_alloc_jited_linfo(prog);
  13496	if (err)
  13497		goto out_undo_insn;
  13498
  13499	err = -ENOMEM;
  13500	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
  13501	if (!func)
  13502		goto out_undo_insn;
  13503
  13504	for (i = 0; i < env->subprog_cnt; i++) {
  13505		subprog_start = subprog_end;
  13506		subprog_end = env->subprog_info[i + 1].start;
  13507
  13508		len = subprog_end - subprog_start;
  13509		/* bpf_prog_run() doesn't call subprogs directly,
  13510		 * hence main prog stats include the runtime of subprogs.
  13511		 * subprogs don't have IDs and not reachable via prog_get_next_id
  13512		 * func[i]->stats will never be accessed and stays NULL
  13513		 */
  13514		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
  13515		if (!func[i])
  13516			goto out_free;
  13517		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
  13518		       len * sizeof(struct bpf_insn));
  13519		func[i]->type = prog->type;
  13520		func[i]->len = len;
  13521		if (bpf_prog_calc_tag(func[i]))
  13522			goto out_free;
  13523		func[i]->is_func = 1;
  13524		func[i]->aux->func_idx = i;
  13525		/* Below members will be freed only at prog->aux */
  13526		func[i]->aux->btf = prog->aux->btf;
  13527		func[i]->aux->func_info = prog->aux->func_info;
  13528		func[i]->aux->poke_tab = prog->aux->poke_tab;
  13529		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
  13530
  13531		for (j = 0; j < prog->aux->size_poke_tab; j++) {
  13532			struct bpf_jit_poke_descriptor *poke;
  13533
  13534			poke = &prog->aux->poke_tab[j];
  13535			if (poke->insn_idx < subprog_end &&
  13536			    poke->insn_idx >= subprog_start)
  13537				poke->aux = func[i]->aux;
  13538		}
  13539
  13540		/* Use bpf_prog_F_tag to indicate functions in stack traces.
  13541		 * Long term would need debug info to populate names
  13542		 */
  13543		func[i]->aux->name[0] = 'F';
  13544		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
  13545		func[i]->jit_requested = 1;
  13546		func[i]->blinding_requested = prog->blinding_requested;
  13547		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
  13548		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
  13549		func[i]->aux->linfo = prog->aux->linfo;
  13550		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
  13551		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
  13552		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
  13553		num_exentries = 0;
  13554		insn = func[i]->insnsi;
  13555		for (j = 0; j < func[i]->len; j++, insn++) {
  13556			if (BPF_CLASS(insn->code) == BPF_LDX &&
  13557			    BPF_MODE(insn->code) == BPF_PROBE_MEM)
  13558				num_exentries++;
  13559		}
  13560		func[i]->aux->num_exentries = num_exentries;
  13561		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
  13562		func[i] = bpf_int_jit_compile(func[i]);
  13563		if (!func[i]->jited) {
  13564			err = -ENOTSUPP;
  13565			goto out_free;
  13566		}
  13567		cond_resched();
  13568	}
  13569
  13570	/* at this point all bpf functions were successfully JITed
  13571	 * now populate all bpf_calls with correct addresses and
  13572	 * run last pass of JIT
  13573	 */
  13574	for (i = 0; i < env->subprog_cnt; i++) {
  13575		insn = func[i]->insnsi;
  13576		for (j = 0; j < func[i]->len; j++, insn++) {
  13577			if (bpf_pseudo_func(insn)) {
  13578				subprog = insn->off;
  13579				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
  13580				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
  13581				continue;
  13582			}
  13583			if (!bpf_pseudo_call(insn))
  13584				continue;
  13585			subprog = insn->off;
  13586			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
  13587		}
  13588
  13589		/* we use the aux data to keep a list of the start addresses
  13590		 * of the JITed images for each function in the program
  13591		 *
  13592		 * for some architectures, such as powerpc64, the imm field
  13593		 * might not be large enough to hold the offset of the start
  13594		 * address of the callee's JITed image from __bpf_call_base
  13595		 *
  13596		 * in such cases, we can lookup the start address of a callee
  13597		 * by using its subprog id, available from the off field of
  13598		 * the call instruction, as an index for this list
  13599		 */
  13600		func[i]->aux->func = func;
  13601		func[i]->aux->func_cnt = env->subprog_cnt;
  13602	}
  13603	for (i = 0; i < env->subprog_cnt; i++) {
  13604		old_bpf_func = func[i]->bpf_func;
  13605		tmp = bpf_int_jit_compile(func[i]);
  13606		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
  13607			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
  13608			err = -ENOTSUPP;
  13609			goto out_free;
  13610		}
  13611		cond_resched();
  13612	}
  13613
  13614	/* finally lock prog and jit images for all functions and
  13615	 * populate kallsysm
  13616	 */
  13617	for (i = 0; i < env->subprog_cnt; i++) {
  13618		bpf_prog_lock_ro(func[i]);
  13619		bpf_prog_kallsyms_add(func[i]);
  13620	}
  13621
  13622	/* Last step: make now unused interpreter insns from main
  13623	 * prog consistent for later dump requests, so they can
  13624	 * later look the same as if they were interpreted only.
  13625	 */
  13626	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
  13627		if (bpf_pseudo_func(insn)) {
  13628			insn[0].imm = env->insn_aux_data[i].call_imm;
  13629			insn[1].imm = insn->off;
  13630			insn->off = 0;
  13631			continue;
  13632		}
  13633		if (!bpf_pseudo_call(insn))
  13634			continue;
  13635		insn->off = env->insn_aux_data[i].call_imm;
  13636		subprog = find_subprog(env, i + insn->off + 1);
  13637		insn->imm = subprog;
  13638	}
  13639
  13640	prog->jited = 1;
  13641	prog->bpf_func = func[0]->bpf_func;
  13642	prog->jited_len = func[0]->jited_len;
  13643	prog->aux->func = func;
  13644	prog->aux->func_cnt = env->subprog_cnt;
  13645	bpf_prog_jit_attempt_done(prog);
  13646	return 0;
  13647out_free:
  13648	/* We failed JIT'ing, so at this point we need to unregister poke
  13649	 * descriptors from subprogs, so that kernel is not attempting to
  13650	 * patch it anymore as we're freeing the subprog JIT memory.
  13651	 */
  13652	for (i = 0; i < prog->aux->size_poke_tab; i++) {
  13653		map_ptr = prog->aux->poke_tab[i].tail_call.map;
  13654		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
  13655	}
  13656	/* At this point we're guaranteed that poke descriptors are not
  13657	 * live anymore. We can just unlink its descriptor table as it's
  13658	 * released with the main prog.
  13659	 */
  13660	for (i = 0; i < env->subprog_cnt; i++) {
  13661		if (!func[i])
  13662			continue;
  13663		func[i]->aux->poke_tab = NULL;
  13664		bpf_jit_free(func[i]);
  13665	}
  13666	kfree(func);
  13667out_undo_insn:
  13668	/* cleanup main prog to be interpreted */
  13669	prog->jit_requested = 0;
  13670	prog->blinding_requested = 0;
  13671	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
  13672		if (!bpf_pseudo_call(insn))
  13673			continue;
  13674		insn->off = 0;
  13675		insn->imm = env->insn_aux_data[i].call_imm;
  13676	}
  13677	bpf_prog_jit_attempt_done(prog);
  13678	return err;
  13679}
  13680
  13681static int fixup_call_args(struct bpf_verifier_env *env)
  13682{
  13683#ifndef CONFIG_BPF_JIT_ALWAYS_ON
  13684	struct bpf_prog *prog = env->prog;
  13685	struct bpf_insn *insn = prog->insnsi;
  13686	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
  13687	int i, depth;
  13688#endif
  13689	int err = 0;
  13690
  13691	if (env->prog->jit_requested &&
  13692	    !bpf_prog_is_dev_bound(env->prog->aux)) {
  13693		err = jit_subprogs(env);
  13694		if (err == 0)
  13695			return 0;
  13696		if (err == -EFAULT)
  13697			return err;
  13698	}
  13699#ifndef CONFIG_BPF_JIT_ALWAYS_ON
  13700	if (has_kfunc_call) {
  13701		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
  13702		return -EINVAL;
  13703	}
  13704	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
  13705		/* When JIT fails the progs with bpf2bpf calls and tail_calls
  13706		 * have to be rejected, since interpreter doesn't support them yet.
  13707		 */
  13708		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
  13709		return -EINVAL;
  13710	}
  13711	for (i = 0; i < prog->len; i++, insn++) {
  13712		if (bpf_pseudo_func(insn)) {
  13713			/* When JIT fails the progs with callback calls
  13714			 * have to be rejected, since interpreter doesn't support them yet.
  13715			 */
  13716			verbose(env, "callbacks are not allowed in non-JITed programs\n");
  13717			return -EINVAL;
  13718		}
  13719
  13720		if (!bpf_pseudo_call(insn))
  13721			continue;
  13722		depth = get_callee_stack_depth(env, insn, i);
  13723		if (depth < 0)
  13724			return depth;
  13725		bpf_patch_call_args(insn, depth);
  13726	}
  13727	err = 0;
  13728#endif
  13729	return err;
  13730}
  13731
  13732static int fixup_kfunc_call(struct bpf_verifier_env *env,
  13733			    struct bpf_insn *insn)
  13734{
  13735	const struct bpf_kfunc_desc *desc;
  13736
  13737	if (!insn->imm) {
  13738		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
  13739		return -EINVAL;
  13740	}
  13741
  13742	/* insn->imm has the btf func_id. Replace it with
  13743	 * an address (relative to __bpf_base_call).
  13744	 */
  13745	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
  13746	if (!desc) {
  13747		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
  13748			insn->imm);
  13749		return -EFAULT;
  13750	}
  13751
  13752	insn->imm = desc->imm;
  13753
  13754	return 0;
  13755}
  13756
  13757/* Do various post-verification rewrites in a single program pass.
  13758 * These rewrites simplify JIT and interpreter implementations.
  13759 */
  13760static int do_misc_fixups(struct bpf_verifier_env *env)
  13761{
  13762	struct bpf_prog *prog = env->prog;
  13763	enum bpf_attach_type eatype = prog->expected_attach_type;
  13764	enum bpf_prog_type prog_type = resolve_prog_type(prog);
  13765	struct bpf_insn *insn = prog->insnsi;
  13766	const struct bpf_func_proto *fn;
  13767	const int insn_cnt = prog->len;
  13768	const struct bpf_map_ops *ops;
  13769	struct bpf_insn_aux_data *aux;
  13770	struct bpf_insn insn_buf[16];
  13771	struct bpf_prog *new_prog;
  13772	struct bpf_map *map_ptr;
  13773	int i, ret, cnt, delta = 0;
  13774
  13775	for (i = 0; i < insn_cnt; i++, insn++) {
  13776		/* Make divide-by-zero exceptions impossible. */
  13777		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
  13778		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
  13779		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
  13780		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
  13781			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
  13782			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
  13783			struct bpf_insn *patchlet;
  13784			struct bpf_insn chk_and_div[] = {
  13785				/* [R,W]x div 0 -> 0 */
  13786				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
  13787					     BPF_JNE | BPF_K, insn->src_reg,
  13788					     0, 2, 0),
  13789				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
  13790				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
  13791				*insn,
  13792			};
  13793			struct bpf_insn chk_and_mod[] = {
  13794				/* [R,W]x mod 0 -> [R,W]x */
  13795				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
  13796					     BPF_JEQ | BPF_K, insn->src_reg,
  13797					     0, 1 + (is64 ? 0 : 1), 0),
  13798				*insn,
  13799				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
  13800				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
  13801			};
  13802
  13803			patchlet = isdiv ? chk_and_div : chk_and_mod;
  13804			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
  13805				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
  13806
  13807			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
  13808			if (!new_prog)
  13809				return -ENOMEM;
  13810
  13811			delta    += cnt - 1;
  13812			env->prog = prog = new_prog;
  13813			insn      = new_prog->insnsi + i + delta;
  13814			continue;
  13815		}
  13816
  13817		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
  13818		if (BPF_CLASS(insn->code) == BPF_LD &&
  13819		    (BPF_MODE(insn->code) == BPF_ABS ||
  13820		     BPF_MODE(insn->code) == BPF_IND)) {
  13821			cnt = env->ops->gen_ld_abs(insn, insn_buf);
  13822			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
  13823				verbose(env, "bpf verifier is misconfigured\n");
  13824				return -EINVAL;
  13825			}
  13826
  13827			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  13828			if (!new_prog)
  13829				return -ENOMEM;
  13830
  13831			delta    += cnt - 1;
  13832			env->prog = prog = new_prog;
  13833			insn      = new_prog->insnsi + i + delta;
  13834			continue;
  13835		}
  13836
  13837		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
  13838		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
  13839		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
  13840			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
  13841			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
  13842			struct bpf_insn *patch = &insn_buf[0];
  13843			bool issrc, isneg, isimm;
  13844			u32 off_reg;
  13845
  13846			aux = &env->insn_aux_data[i + delta];
  13847			if (!aux->alu_state ||
  13848			    aux->alu_state == BPF_ALU_NON_POINTER)
  13849				continue;
  13850
  13851			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
  13852			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
  13853				BPF_ALU_SANITIZE_SRC;
  13854			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
  13855
  13856			off_reg = issrc ? insn->src_reg : insn->dst_reg;
  13857			if (isimm) {
  13858				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
  13859			} else {
  13860				if (isneg)
  13861					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
  13862				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
  13863				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
  13864				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
  13865				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
  13866				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
  13867				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
  13868			}
  13869			if (!issrc)
  13870				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
  13871			insn->src_reg = BPF_REG_AX;
  13872			if (isneg)
  13873				insn->code = insn->code == code_add ?
  13874					     code_sub : code_add;
  13875			*patch++ = *insn;
  13876			if (issrc && isneg && !isimm)
  13877				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
  13878			cnt = patch - insn_buf;
  13879
  13880			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  13881			if (!new_prog)
  13882				return -ENOMEM;
  13883
  13884			delta    += cnt - 1;
  13885			env->prog = prog = new_prog;
  13886			insn      = new_prog->insnsi + i + delta;
  13887			continue;
  13888		}
  13889
  13890		if (insn->code != (BPF_JMP | BPF_CALL))
  13891			continue;
  13892		if (insn->src_reg == BPF_PSEUDO_CALL)
  13893			continue;
  13894		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
  13895			ret = fixup_kfunc_call(env, insn);
  13896			if (ret)
  13897				return ret;
  13898			continue;
  13899		}
  13900
  13901		if (insn->imm == BPF_FUNC_get_route_realm)
  13902			prog->dst_needed = 1;
  13903		if (insn->imm == BPF_FUNC_get_prandom_u32)
  13904			bpf_user_rnd_init_once();
  13905		if (insn->imm == BPF_FUNC_override_return)
  13906			prog->kprobe_override = 1;
  13907		if (insn->imm == BPF_FUNC_tail_call) {
  13908			/* If we tail call into other programs, we
  13909			 * cannot make any assumptions since they can
  13910			 * be replaced dynamically during runtime in
  13911			 * the program array.
  13912			 */
  13913			prog->cb_access = 1;
  13914			if (!allow_tail_call_in_subprogs(env))
  13915				prog->aux->stack_depth = MAX_BPF_STACK;
  13916			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
  13917
  13918			/* mark bpf_tail_call as different opcode to avoid
  13919			 * conditional branch in the interpreter for every normal
  13920			 * call and to prevent accidental JITing by JIT compiler
  13921			 * that doesn't support bpf_tail_call yet
  13922			 */
  13923			insn->imm = 0;
  13924			insn->code = BPF_JMP | BPF_TAIL_CALL;
  13925
  13926			aux = &env->insn_aux_data[i + delta];
  13927			if (env->bpf_capable && !prog->blinding_requested &&
  13928			    prog->jit_requested &&
  13929			    !bpf_map_key_poisoned(aux) &&
  13930			    !bpf_map_ptr_poisoned(aux) &&
  13931			    !bpf_map_ptr_unpriv(aux)) {
  13932				struct bpf_jit_poke_descriptor desc = {
  13933					.reason = BPF_POKE_REASON_TAIL_CALL,
  13934					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
  13935					.tail_call.key = bpf_map_key_immediate(aux),
  13936					.insn_idx = i + delta,
  13937				};
  13938
  13939				ret = bpf_jit_add_poke_descriptor(prog, &desc);
  13940				if (ret < 0) {
  13941					verbose(env, "adding tail call poke descriptor failed\n");
  13942					return ret;
  13943				}
  13944
  13945				insn->imm = ret + 1;
  13946				continue;
  13947			}
  13948
  13949			if (!bpf_map_ptr_unpriv(aux))
  13950				continue;
  13951
  13952			/* instead of changing every JIT dealing with tail_call
  13953			 * emit two extra insns:
  13954			 * if (index >= max_entries) goto out;
  13955			 * index &= array->index_mask;
  13956			 * to avoid out-of-bounds cpu speculation
  13957			 */
  13958			if (bpf_map_ptr_poisoned(aux)) {
  13959				verbose(env, "tail_call abusing map_ptr\n");
  13960				return -EINVAL;
  13961			}
  13962
  13963			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
  13964			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
  13965						  map_ptr->max_entries, 2);
  13966			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
  13967						    container_of(map_ptr,
  13968								 struct bpf_array,
  13969								 map)->index_mask);
  13970			insn_buf[2] = *insn;
  13971			cnt = 3;
  13972			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  13973			if (!new_prog)
  13974				return -ENOMEM;
  13975
  13976			delta    += cnt - 1;
  13977			env->prog = prog = new_prog;
  13978			insn      = new_prog->insnsi + i + delta;
  13979			continue;
  13980		}
  13981
  13982		if (insn->imm == BPF_FUNC_timer_set_callback) {
  13983			/* The verifier will process callback_fn as many times as necessary
  13984			 * with different maps and the register states prepared by
  13985			 * set_timer_callback_state will be accurate.
  13986			 *
  13987			 * The following use case is valid:
  13988			 *   map1 is shared by prog1, prog2, prog3.
  13989			 *   prog1 calls bpf_timer_init for some map1 elements
  13990			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
  13991			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
  13992			 *   prog3 calls bpf_timer_start for some map1 elements.
  13993			 *     Those that were not both bpf_timer_init-ed and
  13994			 *     bpf_timer_set_callback-ed will return -EINVAL.
  13995			 */
  13996			struct bpf_insn ld_addrs[2] = {
  13997				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
  13998			};
  13999
  14000			insn_buf[0] = ld_addrs[0];
  14001			insn_buf[1] = ld_addrs[1];
  14002			insn_buf[2] = *insn;
  14003			cnt = 3;
  14004
  14005			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  14006			if (!new_prog)
  14007				return -ENOMEM;
  14008
  14009			delta    += cnt - 1;
  14010			env->prog = prog = new_prog;
  14011			insn      = new_prog->insnsi + i + delta;
  14012			goto patch_call_imm;
  14013		}
  14014
  14015		if (insn->imm == BPF_FUNC_task_storage_get ||
  14016		    insn->imm == BPF_FUNC_sk_storage_get ||
  14017		    insn->imm == BPF_FUNC_inode_storage_get) {
  14018			if (env->prog->aux->sleepable)
  14019				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
  14020			else
  14021				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
  14022			insn_buf[1] = *insn;
  14023			cnt = 2;
  14024
  14025			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  14026			if (!new_prog)
  14027				return -ENOMEM;
  14028
  14029			delta += cnt - 1;
  14030			env->prog = prog = new_prog;
  14031			insn = new_prog->insnsi + i + delta;
  14032			goto patch_call_imm;
  14033		}
  14034
  14035		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
  14036		 * and other inlining handlers are currently limited to 64 bit
  14037		 * only.
  14038		 */
  14039		if (prog->jit_requested && BITS_PER_LONG == 64 &&
  14040		    (insn->imm == BPF_FUNC_map_lookup_elem ||
  14041		     insn->imm == BPF_FUNC_map_update_elem ||
  14042		     insn->imm == BPF_FUNC_map_delete_elem ||
  14043		     insn->imm == BPF_FUNC_map_push_elem   ||
  14044		     insn->imm == BPF_FUNC_map_pop_elem    ||
  14045		     insn->imm == BPF_FUNC_map_peek_elem   ||
  14046		     insn->imm == BPF_FUNC_redirect_map    ||
  14047		     insn->imm == BPF_FUNC_for_each_map_elem ||
  14048		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
  14049			aux = &env->insn_aux_data[i + delta];
  14050			if (bpf_map_ptr_poisoned(aux))
  14051				goto patch_call_imm;
  14052
  14053			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
  14054			ops = map_ptr->ops;
  14055			if (insn->imm == BPF_FUNC_map_lookup_elem &&
  14056			    ops->map_gen_lookup) {
  14057				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
  14058				if (cnt == -EOPNOTSUPP)
  14059					goto patch_map_ops_generic;
  14060				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
  14061					verbose(env, "bpf verifier is misconfigured\n");
  14062					return -EINVAL;
  14063				}
  14064
  14065				new_prog = bpf_patch_insn_data(env, i + delta,
  14066							       insn_buf, cnt);
  14067				if (!new_prog)
  14068					return -ENOMEM;
  14069
  14070				delta    += cnt - 1;
  14071				env->prog = prog = new_prog;
  14072				insn      = new_prog->insnsi + i + delta;
  14073				continue;
  14074			}
  14075
  14076			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
  14077				     (void *(*)(struct bpf_map *map, void *key))NULL));
  14078			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
  14079				     (int (*)(struct bpf_map *map, void *key))NULL));
  14080			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
  14081				     (int (*)(struct bpf_map *map, void *key, void *value,
  14082					      u64 flags))NULL));
  14083			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
  14084				     (int (*)(struct bpf_map *map, void *value,
  14085					      u64 flags))NULL));
  14086			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
  14087				     (int (*)(struct bpf_map *map, void *value))NULL));
  14088			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
  14089				     (int (*)(struct bpf_map *map, void *value))NULL));
  14090			BUILD_BUG_ON(!__same_type(ops->map_redirect,
  14091				     (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
  14092			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
  14093				     (int (*)(struct bpf_map *map,
  14094					      bpf_callback_t callback_fn,
  14095					      void *callback_ctx,
  14096					      u64 flags))NULL));
  14097			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
  14098				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
  14099
  14100patch_map_ops_generic:
  14101			switch (insn->imm) {
  14102			case BPF_FUNC_map_lookup_elem:
  14103				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
  14104				continue;
  14105			case BPF_FUNC_map_update_elem:
  14106				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
  14107				continue;
  14108			case BPF_FUNC_map_delete_elem:
  14109				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
  14110				continue;
  14111			case BPF_FUNC_map_push_elem:
  14112				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
  14113				continue;
  14114			case BPF_FUNC_map_pop_elem:
  14115				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
  14116				continue;
  14117			case BPF_FUNC_map_peek_elem:
  14118				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
  14119				continue;
  14120			case BPF_FUNC_redirect_map:
  14121				insn->imm = BPF_CALL_IMM(ops->map_redirect);
  14122				continue;
  14123			case BPF_FUNC_for_each_map_elem:
  14124				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
  14125				continue;
  14126			case BPF_FUNC_map_lookup_percpu_elem:
  14127				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
  14128				continue;
  14129			}
  14130
  14131			goto patch_call_imm;
  14132		}
  14133
  14134		/* Implement bpf_jiffies64 inline. */
  14135		if (prog->jit_requested && BITS_PER_LONG == 64 &&
  14136		    insn->imm == BPF_FUNC_jiffies64) {
  14137			struct bpf_insn ld_jiffies_addr[2] = {
  14138				BPF_LD_IMM64(BPF_REG_0,
  14139					     (unsigned long)&jiffies),
  14140			};
  14141
  14142			insn_buf[0] = ld_jiffies_addr[0];
  14143			insn_buf[1] = ld_jiffies_addr[1];
  14144			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
  14145						  BPF_REG_0, 0);
  14146			cnt = 3;
  14147
  14148			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
  14149						       cnt);
  14150			if (!new_prog)
  14151				return -ENOMEM;
  14152
  14153			delta    += cnt - 1;
  14154			env->prog = prog = new_prog;
  14155			insn      = new_prog->insnsi + i + delta;
  14156			continue;
  14157		}
  14158
  14159		/* Implement bpf_get_func_arg inline. */
  14160		if (prog_type == BPF_PROG_TYPE_TRACING &&
  14161		    insn->imm == BPF_FUNC_get_func_arg) {
  14162			/* Load nr_args from ctx - 8 */
  14163			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
  14164			insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
  14165			insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
  14166			insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
  14167			insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
  14168			insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
  14169			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
  14170			insn_buf[7] = BPF_JMP_A(1);
  14171			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
  14172			cnt = 9;
  14173
  14174			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  14175			if (!new_prog)
  14176				return -ENOMEM;
  14177
  14178			delta    += cnt - 1;
  14179			env->prog = prog = new_prog;
  14180			insn      = new_prog->insnsi + i + delta;
  14181			continue;
  14182		}
  14183
  14184		/* Implement bpf_get_func_ret inline. */
  14185		if (prog_type == BPF_PROG_TYPE_TRACING &&
  14186		    insn->imm == BPF_FUNC_get_func_ret) {
  14187			if (eatype == BPF_TRACE_FEXIT ||
  14188			    eatype == BPF_MODIFY_RETURN) {
  14189				/* Load nr_args from ctx - 8 */
  14190				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
  14191				insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
  14192				insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
  14193				insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
  14194				insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
  14195				insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
  14196				cnt = 6;
  14197			} else {
  14198				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
  14199				cnt = 1;
  14200			}
  14201
  14202			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
  14203			if (!new_prog)
  14204				return -ENOMEM;
  14205
  14206			delta    += cnt - 1;
  14207			env->prog = prog = new_prog;
  14208			insn      = new_prog->insnsi + i + delta;
  14209			continue;
  14210		}
  14211
  14212		/* Implement get_func_arg_cnt inline. */
  14213		if (prog_type == BPF_PROG_TYPE_TRACING &&
  14214		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
  14215			/* Load nr_args from ctx - 8 */
  14216			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
  14217
  14218			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
  14219			if (!new_prog)
  14220				return -ENOMEM;
  14221
  14222			env->prog = prog = new_prog;
  14223			insn      = new_prog->insnsi + i + delta;
  14224			continue;
  14225		}
  14226
  14227		/* Implement bpf_get_func_ip inline. */
  14228		if (prog_type == BPF_PROG_TYPE_TRACING &&
  14229		    insn->imm == BPF_FUNC_get_func_ip) {
  14230			/* Load IP address from ctx - 16 */
  14231			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
  14232
  14233			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
  14234			if (!new_prog)
  14235				return -ENOMEM;
  14236
  14237			env->prog = prog = new_prog;
  14238			insn      = new_prog->insnsi + i + delta;
  14239			continue;
  14240		}
  14241
  14242patch_call_imm:
  14243		fn = env->ops->get_func_proto(insn->imm, env->prog);
  14244		/* all functions that have prototype and verifier allowed
  14245		 * programs to call them, must be real in-kernel functions
  14246		 */
  14247		if (!fn->func) {
  14248			verbose(env,
  14249				"kernel subsystem misconfigured func %s#%d\n",
  14250				func_id_name(insn->imm), insn->imm);
  14251			return -EFAULT;
  14252		}
  14253		insn->imm = fn->func - __bpf_call_base;
  14254	}
  14255
  14256	/* Since poke tab is now finalized, publish aux to tracker. */
  14257	for (i = 0; i < prog->aux->size_poke_tab; i++) {
  14258		map_ptr = prog->aux->poke_tab[i].tail_call.map;
  14259		if (!map_ptr->ops->map_poke_track ||
  14260		    !map_ptr->ops->map_poke_untrack ||
  14261		    !map_ptr->ops->map_poke_run) {
  14262			verbose(env, "bpf verifier is misconfigured\n");
  14263			return -EINVAL;
  14264		}
  14265
  14266		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
  14267		if (ret < 0) {
  14268			verbose(env, "tracking tail call prog failed\n");
  14269			return ret;
  14270		}
  14271	}
  14272
  14273	sort_kfunc_descs_by_imm(env->prog);
  14274
  14275	return 0;
  14276}
  14277
  14278static void free_states(struct bpf_verifier_env *env)
  14279{
  14280	struct bpf_verifier_state_list *sl, *sln;
  14281	int i;
  14282
  14283	sl = env->free_list;
  14284	while (sl) {
  14285		sln = sl->next;
  14286		free_verifier_state(&sl->state, false);
  14287		kfree(sl);
  14288		sl = sln;
  14289	}
  14290	env->free_list = NULL;
  14291
  14292	if (!env->explored_states)
  14293		return;
  14294
  14295	for (i = 0; i < state_htab_size(env); i++) {
  14296		sl = env->explored_states[i];
  14297
  14298		while (sl) {
  14299			sln = sl->next;
  14300			free_verifier_state(&sl->state, false);
  14301			kfree(sl);
  14302			sl = sln;
  14303		}
  14304		env->explored_states[i] = NULL;
  14305	}
  14306}
  14307
  14308static int do_check_common(struct bpf_verifier_env *env, int subprog)
  14309{
  14310	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
  14311	struct bpf_verifier_state *state;
  14312	struct bpf_reg_state *regs;
  14313	int ret, i;
  14314
  14315	env->prev_linfo = NULL;
  14316	env->pass_cnt++;
  14317
  14318	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
  14319	if (!state)
  14320		return -ENOMEM;
  14321	state->curframe = 0;
  14322	state->speculative = false;
  14323	state->branches = 1;
  14324	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
  14325	if (!state->frame[0]) {
  14326		kfree(state);
  14327		return -ENOMEM;
  14328	}
  14329	env->cur_state = state;
  14330	init_func_state(env, state->frame[0],
  14331			BPF_MAIN_FUNC /* callsite */,
  14332			0 /* frameno */,
  14333			subprog);
  14334
  14335	regs = state->frame[state->curframe]->regs;
  14336	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
  14337		ret = btf_prepare_func_args(env, subprog, regs);
  14338		if (ret)
  14339			goto out;
  14340		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
  14341			if (regs[i].type == PTR_TO_CTX)
  14342				mark_reg_known_zero(env, regs, i);
  14343			else if (regs[i].type == SCALAR_VALUE)
  14344				mark_reg_unknown(env, regs, i);
  14345			else if (base_type(regs[i].type) == PTR_TO_MEM) {
  14346				const u32 mem_size = regs[i].mem_size;
  14347
  14348				mark_reg_known_zero(env, regs, i);
  14349				regs[i].mem_size = mem_size;
  14350				regs[i].id = ++env->id_gen;
  14351			}
  14352		}
  14353	} else {
  14354		/* 1st arg to a function */
  14355		regs[BPF_REG_1].type = PTR_TO_CTX;
  14356		mark_reg_known_zero(env, regs, BPF_REG_1);
  14357		ret = btf_check_subprog_arg_match(env, subprog, regs);
  14358		if (ret == -EFAULT)
  14359			/* unlikely verifier bug. abort.
  14360			 * ret == 0 and ret < 0 are sadly acceptable for
  14361			 * main() function due to backward compatibility.
  14362			 * Like socket filter program may be written as:
  14363			 * int bpf_prog(struct pt_regs *ctx)
  14364			 * and never dereference that ctx in the program.
  14365			 * 'struct pt_regs' is a type mismatch for socket
  14366			 * filter that should be using 'struct __sk_buff'.
  14367			 */
  14368			goto out;
  14369	}
  14370
  14371	ret = do_check(env);
  14372out:
  14373	/* check for NULL is necessary, since cur_state can be freed inside
  14374	 * do_check() under memory pressure.
  14375	 */
  14376	if (env->cur_state) {
  14377		free_verifier_state(env->cur_state, true);
  14378		env->cur_state = NULL;
  14379	}
  14380	while (!pop_stack(env, NULL, NULL, false));
  14381	if (!ret && pop_log)
  14382		bpf_vlog_reset(&env->log, 0);
  14383	free_states(env);
  14384	return ret;
  14385}
  14386
  14387/* Verify all global functions in a BPF program one by one based on their BTF.
  14388 * All global functions must pass verification. Otherwise the whole program is rejected.
  14389 * Consider:
  14390 * int bar(int);
  14391 * int foo(int f)
  14392 * {
  14393 *    return bar(f);
  14394 * }
  14395 * int bar(int b)
  14396 * {
  14397 *    ...
  14398 * }
  14399 * foo() will be verified first for R1=any_scalar_value. During verification it
  14400 * will be assumed that bar() already verified successfully and call to bar()
  14401 * from foo() will be checked for type match only. Later bar() will be verified
  14402 * independently to check that it's safe for R1=any_scalar_value.
  14403 */
  14404static int do_check_subprogs(struct bpf_verifier_env *env)
  14405{
  14406	struct bpf_prog_aux *aux = env->prog->aux;
  14407	int i, ret;
  14408
  14409	if (!aux->func_info)
  14410		return 0;
  14411
  14412	for (i = 1; i < env->subprog_cnt; i++) {
  14413		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
  14414			continue;
  14415		env->insn_idx = env->subprog_info[i].start;
  14416		WARN_ON_ONCE(env->insn_idx == 0);
  14417		ret = do_check_common(env, i);
  14418		if (ret) {
  14419			return ret;
  14420		} else if (env->log.level & BPF_LOG_LEVEL) {
  14421			verbose(env,
  14422				"Func#%d is safe for any args that match its prototype\n",
  14423				i);
  14424		}
  14425	}
  14426	return 0;
  14427}
  14428
  14429static int do_check_main(struct bpf_verifier_env *env)
  14430{
  14431	int ret;
  14432
  14433	env->insn_idx = 0;
  14434	ret = do_check_common(env, 0);
  14435	if (!ret)
  14436		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
  14437	return ret;
  14438}
  14439
  14440
  14441static void print_verification_stats(struct bpf_verifier_env *env)
  14442{
  14443	int i;
  14444
  14445	if (env->log.level & BPF_LOG_STATS) {
  14446		verbose(env, "verification time %lld usec\n",
  14447			div_u64(env->verification_time, 1000));
  14448		verbose(env, "stack depth ");
  14449		for (i = 0; i < env->subprog_cnt; i++) {
  14450			u32 depth = env->subprog_info[i].stack_depth;
  14451
  14452			verbose(env, "%d", depth);
  14453			if (i + 1 < env->subprog_cnt)
  14454				verbose(env, "+");
  14455		}
  14456		verbose(env, "\n");
  14457	}
  14458	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
  14459		"total_states %d peak_states %d mark_read %d\n",
  14460		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
  14461		env->max_states_per_insn, env->total_states,
  14462		env->peak_states, env->longest_mark_read_walk);
  14463}
  14464
  14465static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
  14466{
  14467	const struct btf_type *t, *func_proto;
  14468	const struct bpf_struct_ops *st_ops;
  14469	const struct btf_member *member;
  14470	struct bpf_prog *prog = env->prog;
  14471	u32 btf_id, member_idx;
  14472	const char *mname;
  14473
  14474	if (!prog->gpl_compatible) {
  14475		verbose(env, "struct ops programs must have a GPL compatible license\n");
  14476		return -EINVAL;
  14477	}
  14478
  14479	btf_id = prog->aux->attach_btf_id;
  14480	st_ops = bpf_struct_ops_find(btf_id);
  14481	if (!st_ops) {
  14482		verbose(env, "attach_btf_id %u is not a supported struct\n",
  14483			btf_id);
  14484		return -ENOTSUPP;
  14485	}
  14486
  14487	t = st_ops->type;
  14488	member_idx = prog->expected_attach_type;
  14489	if (member_idx >= btf_type_vlen(t)) {
  14490		verbose(env, "attach to invalid member idx %u of struct %s\n",
  14491			member_idx, st_ops->name);
  14492		return -EINVAL;
  14493	}
  14494
  14495	member = &btf_type_member(t)[member_idx];
  14496	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
  14497	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
  14498					       NULL);
  14499	if (!func_proto) {
  14500		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
  14501			mname, member_idx, st_ops->name);
  14502		return -EINVAL;
  14503	}
  14504
  14505	if (st_ops->check_member) {
  14506		int err = st_ops->check_member(t, member);
  14507
  14508		if (err) {
  14509			verbose(env, "attach to unsupported member %s of struct %s\n",
  14510				mname, st_ops->name);
  14511			return err;
  14512		}
  14513	}
  14514
  14515	prog->aux->attach_func_proto = func_proto;
  14516	prog->aux->attach_func_name = mname;
  14517	env->ops = st_ops->verifier_ops;
  14518
  14519	return 0;
  14520}
  14521#define SECURITY_PREFIX "security_"
  14522
  14523static int check_attach_modify_return(unsigned long addr, const char *func_name)
  14524{
  14525	if (within_error_injection_list(addr) ||
  14526	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
  14527		return 0;
  14528
  14529	return -EINVAL;
  14530}
  14531
  14532/* list of non-sleepable functions that are otherwise on
  14533 * ALLOW_ERROR_INJECTION list
  14534 */
  14535BTF_SET_START(btf_non_sleepable_error_inject)
  14536/* Three functions below can be called from sleepable and non-sleepable context.
  14537 * Assume non-sleepable from bpf safety point of view.
  14538 */
  14539BTF_ID(func, __filemap_add_folio)
  14540BTF_ID(func, should_fail_alloc_page)
  14541BTF_ID(func, should_failslab)
  14542BTF_SET_END(btf_non_sleepable_error_inject)
  14543
  14544static int check_non_sleepable_error_inject(u32 btf_id)
  14545{
  14546	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
  14547}
  14548
  14549int bpf_check_attach_target(struct bpf_verifier_log *log,
  14550			    const struct bpf_prog *prog,
  14551			    const struct bpf_prog *tgt_prog,
  14552			    u32 btf_id,
  14553			    struct bpf_attach_target_info *tgt_info)
  14554{
  14555	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
  14556	const char prefix[] = "btf_trace_";
  14557	int ret = 0, subprog = -1, i;
  14558	const struct btf_type *t;
  14559	bool conservative = true;
  14560	const char *tname;
  14561	struct btf *btf;
  14562	long addr = 0;
  14563
  14564	if (!btf_id) {
  14565		bpf_log(log, "Tracing programs must provide btf_id\n");
  14566		return -EINVAL;
  14567	}
  14568	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
  14569	if (!btf) {
  14570		bpf_log(log,
  14571			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
  14572		return -EINVAL;
  14573	}
  14574	t = btf_type_by_id(btf, btf_id);
  14575	if (!t) {
  14576		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
  14577		return -EINVAL;
  14578	}
  14579	tname = btf_name_by_offset(btf, t->name_off);
  14580	if (!tname) {
  14581		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
  14582		return -EINVAL;
  14583	}
  14584	if (tgt_prog) {
  14585		struct bpf_prog_aux *aux = tgt_prog->aux;
  14586
  14587		for (i = 0; i < aux->func_info_cnt; i++)
  14588			if (aux->func_info[i].type_id == btf_id) {
  14589				subprog = i;
  14590				break;
  14591			}
  14592		if (subprog == -1) {
  14593			bpf_log(log, "Subprog %s doesn't exist\n", tname);
  14594			return -EINVAL;
  14595		}
  14596		conservative = aux->func_info_aux[subprog].unreliable;
  14597		if (prog_extension) {
  14598			if (conservative) {
  14599				bpf_log(log,
  14600					"Cannot replace static functions\n");
  14601				return -EINVAL;
  14602			}
  14603			if (!prog->jit_requested) {
  14604				bpf_log(log,
  14605					"Extension programs should be JITed\n");
  14606				return -EINVAL;
  14607			}
  14608		}
  14609		if (!tgt_prog->jited) {
  14610			bpf_log(log, "Can attach to only JITed progs\n");
  14611			return -EINVAL;
  14612		}
  14613		if (tgt_prog->type == prog->type) {
  14614			/* Cannot fentry/fexit another fentry/fexit program.
  14615			 * Cannot attach program extension to another extension.
  14616			 * It's ok to attach fentry/fexit to extension program.
  14617			 */
  14618			bpf_log(log, "Cannot recursively attach\n");
  14619			return -EINVAL;
  14620		}
  14621		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
  14622		    prog_extension &&
  14623		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
  14624		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
  14625			/* Program extensions can extend all program types
  14626			 * except fentry/fexit. The reason is the following.
  14627			 * The fentry/fexit programs are used for performance
  14628			 * analysis, stats and can be attached to any program
  14629			 * type except themselves. When extension program is
  14630			 * replacing XDP function it is necessary to allow
  14631			 * performance analysis of all functions. Both original
  14632			 * XDP program and its program extension. Hence
  14633			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
  14634			 * allowed. If extending of fentry/fexit was allowed it
  14635			 * would be possible to create long call chain
  14636			 * fentry->extension->fentry->extension beyond
  14637			 * reasonable stack size. Hence extending fentry is not
  14638			 * allowed.
  14639			 */
  14640			bpf_log(log, "Cannot extend fentry/fexit\n");
  14641			return -EINVAL;
  14642		}
  14643	} else {
  14644		if (prog_extension) {
  14645			bpf_log(log, "Cannot replace kernel functions\n");
  14646			return -EINVAL;
  14647		}
  14648	}
  14649
  14650	switch (prog->expected_attach_type) {
  14651	case BPF_TRACE_RAW_TP:
  14652		if (tgt_prog) {
  14653			bpf_log(log,
  14654				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
  14655			return -EINVAL;
  14656		}
  14657		if (!btf_type_is_typedef(t)) {
  14658			bpf_log(log, "attach_btf_id %u is not a typedef\n",
  14659				btf_id);
  14660			return -EINVAL;
  14661		}
  14662		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
  14663			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
  14664				btf_id, tname);
  14665			return -EINVAL;
  14666		}
  14667		tname += sizeof(prefix) - 1;
  14668		t = btf_type_by_id(btf, t->type);
  14669		if (!btf_type_is_ptr(t))
  14670			/* should never happen in valid vmlinux build */
  14671			return -EINVAL;
  14672		t = btf_type_by_id(btf, t->type);
  14673		if (!btf_type_is_func_proto(t))
  14674			/* should never happen in valid vmlinux build */
  14675			return -EINVAL;
  14676
  14677		break;
  14678	case BPF_TRACE_ITER:
  14679		if (!btf_type_is_func(t)) {
  14680			bpf_log(log, "attach_btf_id %u is not a function\n",
  14681				btf_id);
  14682			return -EINVAL;
  14683		}
  14684		t = btf_type_by_id(btf, t->type);
  14685		if (!btf_type_is_func_proto(t))
  14686			return -EINVAL;
  14687		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
  14688		if (ret)
  14689			return ret;
  14690		break;
  14691	default:
  14692		if (!prog_extension)
  14693			return -EINVAL;
  14694		fallthrough;
  14695	case BPF_MODIFY_RETURN:
  14696	case BPF_LSM_MAC:
  14697	case BPF_TRACE_FENTRY:
  14698	case BPF_TRACE_FEXIT:
  14699		if (!btf_type_is_func(t)) {
  14700			bpf_log(log, "attach_btf_id %u is not a function\n",
  14701				btf_id);
  14702			return -EINVAL;
  14703		}
  14704		if (prog_extension &&
  14705		    btf_check_type_match(log, prog, btf, t))
  14706			return -EINVAL;
  14707		t = btf_type_by_id(btf, t->type);
  14708		if (!btf_type_is_func_proto(t))
  14709			return -EINVAL;
  14710
  14711		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
  14712		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
  14713		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
  14714			return -EINVAL;
  14715
  14716		if (tgt_prog && conservative)
  14717			t = NULL;
  14718
  14719		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
  14720		if (ret < 0)
  14721			return ret;
  14722
  14723		if (tgt_prog) {
  14724			if (subprog == 0)
  14725				addr = (long) tgt_prog->bpf_func;
  14726			else
  14727				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
  14728		} else {
  14729			addr = kallsyms_lookup_name(tname);
  14730			if (!addr) {
  14731				bpf_log(log,
  14732					"The address of function %s cannot be found\n",
  14733					tname);
  14734				return -ENOENT;
  14735			}
  14736		}
  14737
  14738		if (prog->aux->sleepable) {
  14739			ret = -EINVAL;
  14740			switch (prog->type) {
  14741			case BPF_PROG_TYPE_TRACING:
  14742				/* fentry/fexit/fmod_ret progs can be sleepable only if they are
  14743				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
  14744				 */
  14745				if (!check_non_sleepable_error_inject(btf_id) &&
  14746				    within_error_injection_list(addr))
  14747					ret = 0;
  14748				break;
  14749			case BPF_PROG_TYPE_LSM:
  14750				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
  14751				 * Only some of them are sleepable.
  14752				 */
  14753				if (bpf_lsm_is_sleepable_hook(btf_id))
  14754					ret = 0;
  14755				break;
  14756			default:
  14757				break;
  14758			}
  14759			if (ret) {
  14760				bpf_log(log, "%s is not sleepable\n", tname);
  14761				return ret;
  14762			}
  14763		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
  14764			if (tgt_prog) {
  14765				bpf_log(log, "can't modify return codes of BPF programs\n");
  14766				return -EINVAL;
  14767			}
  14768			ret = check_attach_modify_return(addr, tname);
  14769			if (ret) {
  14770				bpf_log(log, "%s() is not modifiable\n", tname);
  14771				return ret;
  14772			}
  14773		}
  14774
  14775		break;
  14776	}
  14777	tgt_info->tgt_addr = addr;
  14778	tgt_info->tgt_name = tname;
  14779	tgt_info->tgt_type = t;
  14780	return 0;
  14781}
  14782
  14783BTF_SET_START(btf_id_deny)
  14784BTF_ID_UNUSED
  14785#ifdef CONFIG_SMP
  14786BTF_ID(func, migrate_disable)
  14787BTF_ID(func, migrate_enable)
  14788#endif
  14789#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
  14790BTF_ID(func, rcu_read_unlock_strict)
  14791#endif
  14792BTF_SET_END(btf_id_deny)
  14793
  14794static int check_attach_btf_id(struct bpf_verifier_env *env)
  14795{
  14796	struct bpf_prog *prog = env->prog;
  14797	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
  14798	struct bpf_attach_target_info tgt_info = {};
  14799	u32 btf_id = prog->aux->attach_btf_id;
  14800	struct bpf_trampoline *tr;
  14801	int ret;
  14802	u64 key;
  14803
  14804	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
  14805		if (prog->aux->sleepable)
  14806			/* attach_btf_id checked to be zero already */
  14807			return 0;
  14808		verbose(env, "Syscall programs can only be sleepable\n");
  14809		return -EINVAL;
  14810	}
  14811
  14812	if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
  14813	    prog->type != BPF_PROG_TYPE_LSM) {
  14814		verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
  14815		return -EINVAL;
  14816	}
  14817
  14818	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
  14819		return check_struct_ops_btf_id(env);
  14820
  14821	if (prog->type != BPF_PROG_TYPE_TRACING &&
  14822	    prog->type != BPF_PROG_TYPE_LSM &&
  14823	    prog->type != BPF_PROG_TYPE_EXT)
  14824		return 0;
  14825
  14826	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
  14827	if (ret)
  14828		return ret;
  14829
  14830	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
  14831		/* to make freplace equivalent to their targets, they need to
  14832		 * inherit env->ops and expected_attach_type for the rest of the
  14833		 * verification
  14834		 */
  14835		env->ops = bpf_verifier_ops[tgt_prog->type];
  14836		prog->expected_attach_type = tgt_prog->expected_attach_type;
  14837	}
  14838
  14839	/* store info about the attachment target that will be used later */
  14840	prog->aux->attach_func_proto = tgt_info.tgt_type;
  14841	prog->aux->attach_func_name = tgt_info.tgt_name;
  14842
  14843	if (tgt_prog) {
  14844		prog->aux->saved_dst_prog_type = tgt_prog->type;
  14845		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
  14846	}
  14847
  14848	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
  14849		prog->aux->attach_btf_trace = true;
  14850		return 0;
  14851	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
  14852		if (!bpf_iter_prog_supported(prog))
  14853			return -EINVAL;
  14854		return 0;
  14855	}
  14856
  14857	if (prog->type == BPF_PROG_TYPE_LSM) {
  14858		ret = bpf_lsm_verify_prog(&env->log, prog);
  14859		if (ret < 0)
  14860			return ret;
  14861	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
  14862		   btf_id_set_contains(&btf_id_deny, btf_id)) {
  14863		return -EINVAL;
  14864	}
  14865
  14866	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
  14867	tr = bpf_trampoline_get(key, &tgt_info);
  14868	if (!tr)
  14869		return -ENOMEM;
  14870
  14871	prog->aux->dst_trampoline = tr;
  14872	return 0;
  14873}
  14874
  14875struct btf *bpf_get_btf_vmlinux(void)
  14876{
  14877	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
  14878		mutex_lock(&bpf_verifier_lock);
  14879		if (!btf_vmlinux)
  14880			btf_vmlinux = btf_parse_vmlinux();
  14881		mutex_unlock(&bpf_verifier_lock);
  14882	}
  14883	return btf_vmlinux;
  14884}
  14885
  14886int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
  14887{
  14888	u64 start_time = ktime_get_ns();
  14889	struct bpf_verifier_env *env;
  14890	struct bpf_verifier_log *log;
  14891	int i, len, ret = -EINVAL;
  14892	bool is_priv;
  14893
  14894	/* no program is valid */
  14895	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
  14896		return -EINVAL;
  14897
  14898	/* 'struct bpf_verifier_env' can be global, but since it's not small,
  14899	 * allocate/free it every time bpf_check() is called
  14900	 */
  14901	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
  14902	if (!env)
  14903		return -ENOMEM;
  14904	log = &env->log;
  14905
  14906	len = (*prog)->len;
  14907	env->insn_aux_data =
  14908		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
  14909	ret = -ENOMEM;
  14910	if (!env->insn_aux_data)
  14911		goto err_free_env;
  14912	for (i = 0; i < len; i++)
  14913		env->insn_aux_data[i].orig_idx = i;
  14914	env->prog = *prog;
  14915	env->ops = bpf_verifier_ops[env->prog->type];
  14916	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
  14917	is_priv = bpf_capable();
  14918
  14919	bpf_get_btf_vmlinux();
  14920
  14921	/* grab the mutex to protect few globals used by verifier */
  14922	if (!is_priv)
  14923		mutex_lock(&bpf_verifier_lock);
  14924
  14925	if (attr->log_level || attr->log_buf || attr->log_size) {
  14926		/* user requested verbose verifier output
  14927		 * and supplied buffer to store the verification trace
  14928		 */
  14929		log->level = attr->log_level;
  14930		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
  14931		log->len_total = attr->log_size;
  14932
  14933		/* log attributes have to be sane */
  14934		if (!bpf_verifier_log_attr_valid(log)) {
  14935			ret = -EINVAL;
  14936			goto err_unlock;
  14937		}
  14938	}
  14939
  14940	mark_verifier_state_clean(env);
  14941
  14942	if (IS_ERR(btf_vmlinux)) {
  14943		/* Either gcc or pahole or kernel are broken. */
  14944		verbose(env, "in-kernel BTF is malformed\n");
  14945		ret = PTR_ERR(btf_vmlinux);
  14946		goto skip_full_check;
  14947	}
  14948
  14949	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
  14950	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
  14951		env->strict_alignment = true;
  14952	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
  14953		env->strict_alignment = false;
  14954
  14955	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
  14956	env->allow_uninit_stack = bpf_allow_uninit_stack();
  14957	env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
  14958	env->bypass_spec_v1 = bpf_bypass_spec_v1();
  14959	env->bypass_spec_v4 = bpf_bypass_spec_v4();
  14960	env->bpf_capable = bpf_capable();
  14961
  14962	if (is_priv)
  14963		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
  14964
  14965	env->explored_states = kvcalloc(state_htab_size(env),
  14966				       sizeof(struct bpf_verifier_state_list *),
  14967				       GFP_USER);
  14968	ret = -ENOMEM;
  14969	if (!env->explored_states)
  14970		goto skip_full_check;
  14971
  14972	ret = add_subprog_and_kfunc(env);
  14973	if (ret < 0)
  14974		goto skip_full_check;
  14975
  14976	ret = check_subprogs(env);
  14977	if (ret < 0)
  14978		goto skip_full_check;
  14979
  14980	ret = check_btf_info(env, attr, uattr);
  14981	if (ret < 0)
  14982		goto skip_full_check;
  14983
  14984	ret = check_attach_btf_id(env);
  14985	if (ret)
  14986		goto skip_full_check;
  14987
  14988	ret = resolve_pseudo_ldimm64(env);
  14989	if (ret < 0)
  14990		goto skip_full_check;
  14991
  14992	if (bpf_prog_is_dev_bound(env->prog->aux)) {
  14993		ret = bpf_prog_offload_verifier_prep(env->prog);
  14994		if (ret)
  14995			goto skip_full_check;
  14996	}
  14997
  14998	ret = check_cfg(env);
  14999	if (ret < 0)
  15000		goto skip_full_check;
  15001
  15002	ret = do_check_subprogs(env);
  15003	ret = ret ?: do_check_main(env);
  15004
  15005	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
  15006		ret = bpf_prog_offload_finalize(env);
  15007
  15008skip_full_check:
  15009	kvfree(env->explored_states);
  15010
  15011	if (ret == 0)
  15012		ret = check_max_stack_depth(env);
  15013
  15014	/* instruction rewrites happen after this point */
  15015	if (is_priv) {
  15016		if (ret == 0)
  15017			opt_hard_wire_dead_code_branches(env);
  15018		if (ret == 0)
  15019			ret = opt_remove_dead_code(env);
  15020		if (ret == 0)
  15021			ret = opt_remove_nops(env);
  15022	} else {
  15023		if (ret == 0)
  15024			sanitize_dead_code(env);
  15025	}
  15026
  15027	if (ret == 0)
  15028		/* program is valid, convert *(u32*)(ctx + off) accesses */
  15029		ret = convert_ctx_accesses(env);
  15030
  15031	if (ret == 0)
  15032		ret = do_misc_fixups(env);
  15033
  15034	/* do 32-bit optimization after insn patching has done so those patched
  15035	 * insns could be handled correctly.
  15036	 */
  15037	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
  15038		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
  15039		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
  15040								     : false;
  15041	}
  15042
  15043	if (ret == 0)
  15044		ret = fixup_call_args(env);
  15045
  15046	env->verification_time = ktime_get_ns() - start_time;
  15047	print_verification_stats(env);
  15048	env->prog->aux->verified_insns = env->insn_processed;
  15049
  15050	if (log->level && bpf_verifier_log_full(log))
  15051		ret = -ENOSPC;
  15052	if (log->level && !log->ubuf) {
  15053		ret = -EFAULT;
  15054		goto err_release_maps;
  15055	}
  15056
  15057	if (ret)
  15058		goto err_release_maps;
  15059
  15060	if (env->used_map_cnt) {
  15061		/* if program passed verifier, update used_maps in bpf_prog_info */
  15062		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
  15063							  sizeof(env->used_maps[0]),
  15064							  GFP_KERNEL);
  15065
  15066		if (!env->prog->aux->used_maps) {
  15067			ret = -ENOMEM;
  15068			goto err_release_maps;
  15069		}
  15070
  15071		memcpy(env->prog->aux->used_maps, env->used_maps,
  15072		       sizeof(env->used_maps[0]) * env->used_map_cnt);
  15073		env->prog->aux->used_map_cnt = env->used_map_cnt;
  15074	}
  15075	if (env->used_btf_cnt) {
  15076		/* if program passed verifier, update used_btfs in bpf_prog_aux */
  15077		env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
  15078							  sizeof(env->used_btfs[0]),
  15079							  GFP_KERNEL);
  15080		if (!env->prog->aux->used_btfs) {
  15081			ret = -ENOMEM;
  15082			goto err_release_maps;
  15083		}
  15084
  15085		memcpy(env->prog->aux->used_btfs, env->used_btfs,
  15086		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
  15087		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
  15088	}
  15089	if (env->used_map_cnt || env->used_btf_cnt) {
  15090		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
  15091		 * bpf_ld_imm64 instructions
  15092		 */
  15093		convert_pseudo_ld_imm64(env);
  15094	}
  15095
  15096	adjust_btf_func(env);
  15097
  15098err_release_maps:
  15099	if (!env->prog->aux->used_maps)
  15100		/* if we didn't copy map pointers into bpf_prog_info, release
  15101		 * them now. Otherwise free_used_maps() will release them.
  15102		 */
  15103		release_maps(env);
  15104	if (!env->prog->aux->used_btfs)
  15105		release_btfs(env);
  15106
  15107	/* extension progs temporarily inherit the attach_type of their targets
  15108	   for verification purposes, so set it back to zero before returning
  15109	 */
  15110	if (env->prog->type == BPF_PROG_TYPE_EXT)
  15111		env->prog->expected_attach_type = 0;
  15112
  15113	*prog = env->prog;
  15114err_unlock:
  15115	if (!is_priv)
  15116		mutex_unlock(&bpf_verifier_lock);
  15117	vfree(env->insn_aux_data);
  15118err_free_env:
  15119	kfree(env);
  15120	return ret;
  15121}