bpf_trace.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
bpf_trace.c (69377B)
      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
      3 * Copyright (c) 2016 Facebook
      4 */
      5#include <linux/kernel.h>
      6#include <linux/types.h>
      7#include <linux/slab.h>
      8#include <linux/bpf.h>
      9#include <linux/bpf_perf_event.h>
     10#include <linux/btf.h>
     11#include <linux/filter.h>
     12#include <linux/uaccess.h>
     13#include <linux/ctype.h>
     14#include <linux/kprobes.h>
     15#include <linux/spinlock.h>
     16#include <linux/syscalls.h>
     17#include <linux/error-injection.h>
     18#include <linux/btf_ids.h>
     19#include <linux/bpf_lsm.h>
     20#include <linux/fprobe.h>
     21#include <linux/bsearch.h>
     22#include <linux/sort.h>
     23
     24#include <net/bpf_sk_storage.h>
     25
     26#include <uapi/linux/bpf.h>
     27#include <uapi/linux/btf.h>
     28
     29#include <asm/tlb.h>
     30
     31#include "trace_probe.h"
     32#include "trace.h"
     33
     34#define CREATE_TRACE_POINTS
     35#include "bpf_trace.h"
     36
     37#define bpf_event_rcu_dereference(p)					\
     38	rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
     39
     40#ifdef CONFIG_MODULES
     41struct bpf_trace_module {
     42	struct module *module;
     43	struct list_head list;
     44};
     45
     46static LIST_HEAD(bpf_trace_modules);
     47static DEFINE_MUTEX(bpf_module_mutex);
     48
     49static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
     50{
     51	struct bpf_raw_event_map *btp, *ret = NULL;
     52	struct bpf_trace_module *btm;
     53	unsigned int i;
     54
     55	mutex_lock(&bpf_module_mutex);
     56	list_for_each_entry(btm, &bpf_trace_modules, list) {
     57		for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
     58			btp = &btm->module->bpf_raw_events[i];
     59			if (!strcmp(btp->tp->name, name)) {
     60				if (try_module_get(btm->module))
     61					ret = btp;
     62				goto out;
     63			}
     64		}
     65	}
     66out:
     67	mutex_unlock(&bpf_module_mutex);
     68	return ret;
     69}
     70#else
     71static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
     72{
     73	return NULL;
     74}
     75#endif /* CONFIG_MODULES */
     76
     77u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
     78u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
     79
     80static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
     81				  u64 flags, const struct btf **btf,
     82				  s32 *btf_id);
     83static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
     84static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
     85
     86/**
     87 * trace_call_bpf - invoke BPF program
     88 * @call: tracepoint event
     89 * @ctx: opaque context pointer
     90 *
     91 * kprobe handlers execute BPF programs via this helper.
     92 * Can be used from static tracepoints in the future.
     93 *
     94 * Return: BPF programs always return an integer which is interpreted by
     95 * kprobe handler as:
     96 * 0 - return from kprobe (event is filtered out)
     97 * 1 - store kprobe event into ring buffer
     98 * Other values are reserved and currently alias to 1
     99 */
    100unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
    101{
    102	unsigned int ret;
    103
    104	cant_sleep();
    105
    106	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
    107		/*
    108		 * since some bpf program is already running on this cpu,
    109		 * don't call into another bpf program (same or different)
    110		 * and don't send kprobe event into ring-buffer,
    111		 * so return zero here
    112		 */
    113		ret = 0;
    114		goto out;
    115	}
    116
    117	/*
    118	 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
    119	 * to all call sites, we did a bpf_prog_array_valid() there to check
    120	 * whether call->prog_array is empty or not, which is
    121	 * a heuristic to speed up execution.
    122	 *
    123	 * If bpf_prog_array_valid() fetched prog_array was
    124	 * non-NULL, we go into trace_call_bpf() and do the actual
    125	 * proper rcu_dereference() under RCU lock.
    126	 * If it turns out that prog_array is NULL then, we bail out.
    127	 * For the opposite, if the bpf_prog_array_valid() fetched pointer
    128	 * was NULL, you'll skip the prog_array with the risk of missing
    129	 * out of events when it was updated in between this and the
    130	 * rcu_dereference() which is accepted risk.
    131	 */
    132	rcu_read_lock();
    133	ret = bpf_prog_run_array(rcu_dereference(call->prog_array),
    134				 ctx, bpf_prog_run);
    135	rcu_read_unlock();
    136
    137 out:
    138	__this_cpu_dec(bpf_prog_active);
    139
    140	return ret;
    141}
    142
    143#ifdef CONFIG_BPF_KPROBE_OVERRIDE
    144BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
    145{
    146	regs_set_return_value(regs, rc);
    147	override_function_with_return(regs);
    148	return 0;
    149}
    150
    151static const struct bpf_func_proto bpf_override_return_proto = {
    152	.func		= bpf_override_return,
    153	.gpl_only	= true,
    154	.ret_type	= RET_INTEGER,
    155	.arg1_type	= ARG_PTR_TO_CTX,
    156	.arg2_type	= ARG_ANYTHING,
    157};
    158#endif
    159
    160static __always_inline int
    161bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
    162{
    163	int ret;
    164
    165	ret = copy_from_user_nofault(dst, unsafe_ptr, size);
    166	if (unlikely(ret < 0))
    167		memset(dst, 0, size);
    168	return ret;
    169}
    170
    171BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
    172	   const void __user *, unsafe_ptr)
    173{
    174	return bpf_probe_read_user_common(dst, size, unsafe_ptr);
    175}
    176
    177const struct bpf_func_proto bpf_probe_read_user_proto = {
    178	.func		= bpf_probe_read_user,
    179	.gpl_only	= true,
    180	.ret_type	= RET_INTEGER,
    181	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    182	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    183	.arg3_type	= ARG_ANYTHING,
    184};
    185
    186static __always_inline int
    187bpf_probe_read_user_str_common(void *dst, u32 size,
    188			       const void __user *unsafe_ptr)
    189{
    190	int ret;
    191
    192	/*
    193	 * NB: We rely on strncpy_from_user() not copying junk past the NUL
    194	 * terminator into `dst`.
    195	 *
    196	 * strncpy_from_user() does long-sized strides in the fast path. If the
    197	 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
    198	 * then there could be junk after the NUL in `dst`. If user takes `dst`
    199	 * and keys a hash map with it, then semantically identical strings can
    200	 * occupy multiple entries in the map.
    201	 */
    202	ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
    203	if (unlikely(ret < 0))
    204		memset(dst, 0, size);
    205	return ret;
    206}
    207
    208BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
    209	   const void __user *, unsafe_ptr)
    210{
    211	return bpf_probe_read_user_str_common(dst, size, unsafe_ptr);
    212}
    213
    214const struct bpf_func_proto bpf_probe_read_user_str_proto = {
    215	.func		= bpf_probe_read_user_str,
    216	.gpl_only	= true,
    217	.ret_type	= RET_INTEGER,
    218	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    219	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    220	.arg3_type	= ARG_ANYTHING,
    221};
    222
    223static __always_inline int
    224bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
    225{
    226	int ret;
    227
    228	ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
    229	if (unlikely(ret < 0))
    230		memset(dst, 0, size);
    231	return ret;
    232}
    233
    234BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
    235	   const void *, unsafe_ptr)
    236{
    237	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
    238}
    239
    240const struct bpf_func_proto bpf_probe_read_kernel_proto = {
    241	.func		= bpf_probe_read_kernel,
    242	.gpl_only	= true,
    243	.ret_type	= RET_INTEGER,
    244	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    245	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    246	.arg3_type	= ARG_ANYTHING,
    247};
    248
    249static __always_inline int
    250bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
    251{
    252	int ret;
    253
    254	/*
    255	 * The strncpy_from_kernel_nofault() call will likely not fill the
    256	 * entire buffer, but that's okay in this circumstance as we're probing
    257	 * arbitrary memory anyway similar to bpf_probe_read_*() and might
    258	 * as well probe the stack. Thus, memory is explicitly cleared
    259	 * only in error case, so that improper users ignoring return
    260	 * code altogether don't copy garbage; otherwise length of string
    261	 * is returned that can be used for bpf_perf_event_output() et al.
    262	 */
    263	ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
    264	if (unlikely(ret < 0))
    265		memset(dst, 0, size);
    266	return ret;
    267}
    268
    269BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
    270	   const void *, unsafe_ptr)
    271{
    272	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
    273}
    274
    275const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
    276	.func		= bpf_probe_read_kernel_str,
    277	.gpl_only	= true,
    278	.ret_type	= RET_INTEGER,
    279	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    280	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    281	.arg3_type	= ARG_ANYTHING,
    282};
    283
    284#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
    285BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
    286	   const void *, unsafe_ptr)
    287{
    288	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
    289		return bpf_probe_read_user_common(dst, size,
    290				(__force void __user *)unsafe_ptr);
    291	}
    292	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
    293}
    294
    295static const struct bpf_func_proto bpf_probe_read_compat_proto = {
    296	.func		= bpf_probe_read_compat,
    297	.gpl_only	= true,
    298	.ret_type	= RET_INTEGER,
    299	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    300	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    301	.arg3_type	= ARG_ANYTHING,
    302};
    303
    304BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
    305	   const void *, unsafe_ptr)
    306{
    307	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
    308		return bpf_probe_read_user_str_common(dst, size,
    309				(__force void __user *)unsafe_ptr);
    310	}
    311	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
    312}
    313
    314static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
    315	.func		= bpf_probe_read_compat_str,
    316	.gpl_only	= true,
    317	.ret_type	= RET_INTEGER,
    318	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    319	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    320	.arg3_type	= ARG_ANYTHING,
    321};
    322#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
    323
    324BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
    325	   u32, size)
    326{
    327	/*
    328	 * Ensure we're in user context which is safe for the helper to
    329	 * run. This helper has no business in a kthread.
    330	 *
    331	 * access_ok() should prevent writing to non-user memory, but in
    332	 * some situations (nommu, temporary switch, etc) access_ok() does
    333	 * not provide enough validation, hence the check on KERNEL_DS.
    334	 *
    335	 * nmi_uaccess_okay() ensures the probe is not run in an interim
    336	 * state, when the task or mm are switched. This is specifically
    337	 * required to prevent the use of temporary mm.
    338	 */
    339
    340	if (unlikely(in_interrupt() ||
    341		     current->flags & (PF_KTHREAD | PF_EXITING)))
    342		return -EPERM;
    343	if (unlikely(!nmi_uaccess_okay()))
    344		return -EPERM;
    345
    346	return copy_to_user_nofault(unsafe_ptr, src, size);
    347}
    348
    349static const struct bpf_func_proto bpf_probe_write_user_proto = {
    350	.func		= bpf_probe_write_user,
    351	.gpl_only	= true,
    352	.ret_type	= RET_INTEGER,
    353	.arg1_type	= ARG_ANYTHING,
    354	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    355	.arg3_type	= ARG_CONST_SIZE,
    356};
    357
    358static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
    359{
    360	if (!capable(CAP_SYS_ADMIN))
    361		return NULL;
    362
    363	pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
    364			    current->comm, task_pid_nr(current));
    365
    366	return &bpf_probe_write_user_proto;
    367}
    368
    369static DEFINE_RAW_SPINLOCK(trace_printk_lock);
    370
    371#define MAX_TRACE_PRINTK_VARARGS	3
    372#define BPF_TRACE_PRINTK_SIZE		1024
    373
    374BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
    375	   u64, arg2, u64, arg3)
    376{
    377	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
    378	u32 *bin_args;
    379	static char buf[BPF_TRACE_PRINTK_SIZE];
    380	unsigned long flags;
    381	int ret;
    382
    383	ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
    384				  MAX_TRACE_PRINTK_VARARGS);
    385	if (ret < 0)
    386		return ret;
    387
    388	raw_spin_lock_irqsave(&trace_printk_lock, flags);
    389	ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
    390
    391	trace_bpf_trace_printk(buf);
    392	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
    393
    394	bpf_bprintf_cleanup();
    395
    396	return ret;
    397}
    398
    399static const struct bpf_func_proto bpf_trace_printk_proto = {
    400	.func		= bpf_trace_printk,
    401	.gpl_only	= true,
    402	.ret_type	= RET_INTEGER,
    403	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    404	.arg2_type	= ARG_CONST_SIZE,
    405};
    406
    407static void __set_printk_clr_event(void)
    408{
    409	/*
    410	 * This program might be calling bpf_trace_printk,
    411	 * so enable the associated bpf_trace/bpf_trace_printk event.
    412	 * Repeat this each time as it is possible a user has
    413	 * disabled bpf_trace_printk events.  By loading a program
    414	 * calling bpf_trace_printk() however the user has expressed
    415	 * the intent to see such events.
    416	 */
    417	if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
    418		pr_warn_ratelimited("could not enable bpf_trace_printk events");
    419}
    420
    421const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
    422{
    423	__set_printk_clr_event();
    424	return &bpf_trace_printk_proto;
    425}
    426
    427BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
    428	   u32, data_len)
    429{
    430	static char buf[BPF_TRACE_PRINTK_SIZE];
    431	unsigned long flags;
    432	int ret, num_args;
    433	u32 *bin_args;
    434
    435	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
    436	    (data_len && !data))
    437		return -EINVAL;
    438	num_args = data_len / 8;
    439
    440	ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
    441	if (ret < 0)
    442		return ret;
    443
    444	raw_spin_lock_irqsave(&trace_printk_lock, flags);
    445	ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
    446
    447	trace_bpf_trace_printk(buf);
    448	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
    449
    450	bpf_bprintf_cleanup();
    451
    452	return ret;
    453}
    454
    455static const struct bpf_func_proto bpf_trace_vprintk_proto = {
    456	.func		= bpf_trace_vprintk,
    457	.gpl_only	= true,
    458	.ret_type	= RET_INTEGER,
    459	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    460	.arg2_type	= ARG_CONST_SIZE,
    461	.arg3_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
    462	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
    463};
    464
    465const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
    466{
    467	__set_printk_clr_event();
    468	return &bpf_trace_vprintk_proto;
    469}
    470
    471BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
    472	   const void *, data, u32, data_len)
    473{
    474	int err, num_args;
    475	u32 *bin_args;
    476
    477	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
    478	    (data_len && !data))
    479		return -EINVAL;
    480	num_args = data_len / 8;
    481
    482	err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
    483	if (err < 0)
    484		return err;
    485
    486	seq_bprintf(m, fmt, bin_args);
    487
    488	bpf_bprintf_cleanup();
    489
    490	return seq_has_overflowed(m) ? -EOVERFLOW : 0;
    491}
    492
    493BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
    494
    495static const struct bpf_func_proto bpf_seq_printf_proto = {
    496	.func		= bpf_seq_printf,
    497	.gpl_only	= true,
    498	.ret_type	= RET_INTEGER,
    499	.arg1_type	= ARG_PTR_TO_BTF_ID,
    500	.arg1_btf_id	= &btf_seq_file_ids[0],
    501	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    502	.arg3_type	= ARG_CONST_SIZE,
    503	.arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
    504	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
    505};
    506
    507BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
    508{
    509	return seq_write(m, data, len) ? -EOVERFLOW : 0;
    510}
    511
    512static const struct bpf_func_proto bpf_seq_write_proto = {
    513	.func		= bpf_seq_write,
    514	.gpl_only	= true,
    515	.ret_type	= RET_INTEGER,
    516	.arg1_type	= ARG_PTR_TO_BTF_ID,
    517	.arg1_btf_id	= &btf_seq_file_ids[0],
    518	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    519	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
    520};
    521
    522BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr,
    523	   u32, btf_ptr_size, u64, flags)
    524{
    525	const struct btf *btf;
    526	s32 btf_id;
    527	int ret;
    528
    529	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
    530	if (ret)
    531		return ret;
    532
    533	return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
    534}
    535
    536static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
    537	.func		= bpf_seq_printf_btf,
    538	.gpl_only	= true,
    539	.ret_type	= RET_INTEGER,
    540	.arg1_type	= ARG_PTR_TO_BTF_ID,
    541	.arg1_btf_id	= &btf_seq_file_ids[0],
    542	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    543	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
    544	.arg4_type	= ARG_ANYTHING,
    545};
    546
    547static __always_inline int
    548get_map_perf_counter(struct bpf_map *map, u64 flags,
    549		     u64 *value, u64 *enabled, u64 *running)
    550{
    551	struct bpf_array *array = container_of(map, struct bpf_array, map);
    552	unsigned int cpu = smp_processor_id();
    553	u64 index = flags & BPF_F_INDEX_MASK;
    554	struct bpf_event_entry *ee;
    555
    556	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
    557		return -EINVAL;
    558	if (index == BPF_F_CURRENT_CPU)
    559		index = cpu;
    560	if (unlikely(index >= array->map.max_entries))
    561		return -E2BIG;
    562
    563	ee = READ_ONCE(array->ptrs[index]);
    564	if (!ee)
    565		return -ENOENT;
    566
    567	return perf_event_read_local(ee->event, value, enabled, running);
    568}
    569
    570BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
    571{
    572	u64 value = 0;
    573	int err;
    574
    575	err = get_map_perf_counter(map, flags, &value, NULL, NULL);
    576	/*
    577	 * this api is ugly since we miss [-22..-2] range of valid
    578	 * counter values, but that's uapi
    579	 */
    580	if (err)
    581		return err;
    582	return value;
    583}
    584
    585static const struct bpf_func_proto bpf_perf_event_read_proto = {
    586	.func		= bpf_perf_event_read,
    587	.gpl_only	= true,
    588	.ret_type	= RET_INTEGER,
    589	.arg1_type	= ARG_CONST_MAP_PTR,
    590	.arg2_type	= ARG_ANYTHING,
    591};
    592
    593BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
    594	   struct bpf_perf_event_value *, buf, u32, size)
    595{
    596	int err = -EINVAL;
    597
    598	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
    599		goto clear;
    600	err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
    601				   &buf->running);
    602	if (unlikely(err))
    603		goto clear;
    604	return 0;
    605clear:
    606	memset(buf, 0, size);
    607	return err;
    608}
    609
    610static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
    611	.func		= bpf_perf_event_read_value,
    612	.gpl_only	= true,
    613	.ret_type	= RET_INTEGER,
    614	.arg1_type	= ARG_CONST_MAP_PTR,
    615	.arg2_type	= ARG_ANYTHING,
    616	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
    617	.arg4_type	= ARG_CONST_SIZE,
    618};
    619
    620static __always_inline u64
    621__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
    622			u64 flags, struct perf_sample_data *sd)
    623{
    624	struct bpf_array *array = container_of(map, struct bpf_array, map);
    625	unsigned int cpu = smp_processor_id();
    626	u64 index = flags & BPF_F_INDEX_MASK;
    627	struct bpf_event_entry *ee;
    628	struct perf_event *event;
    629
    630	if (index == BPF_F_CURRENT_CPU)
    631		index = cpu;
    632	if (unlikely(index >= array->map.max_entries))
    633		return -E2BIG;
    634
    635	ee = READ_ONCE(array->ptrs[index]);
    636	if (!ee)
    637		return -ENOENT;
    638
    639	event = ee->event;
    640	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
    641		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
    642		return -EINVAL;
    643
    644	if (unlikely(event->oncpu != cpu))
    645		return -EOPNOTSUPP;
    646
    647	return perf_event_output(event, sd, regs);
    648}
    649
    650/*
    651 * Support executing tracepoints in normal, irq, and nmi context that each call
    652 * bpf_perf_event_output
    653 */
    654struct bpf_trace_sample_data {
    655	struct perf_sample_data sds[3];
    656};
    657
    658static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
    659static DEFINE_PER_CPU(int, bpf_trace_nest_level);
    660BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
    661	   u64, flags, void *, data, u64, size)
    662{
    663	struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
    664	int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
    665	struct perf_raw_record raw = {
    666		.frag = {
    667			.size = size,
    668			.data = data,
    669		},
    670	};
    671	struct perf_sample_data *sd;
    672	int err;
    673
    674	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
    675		err = -EBUSY;
    676		goto out;
    677	}
    678
    679	sd = &sds->sds[nest_level - 1];
    680
    681	if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
    682		err = -EINVAL;
    683		goto out;
    684	}
    685
    686	perf_sample_data_init(sd, 0, 0);
    687	sd->raw = &raw;
    688
    689	err = __bpf_perf_event_output(regs, map, flags, sd);
    690
    691out:
    692	this_cpu_dec(bpf_trace_nest_level);
    693	return err;
    694}
    695
    696static const struct bpf_func_proto bpf_perf_event_output_proto = {
    697	.func		= bpf_perf_event_output,
    698	.gpl_only	= true,
    699	.ret_type	= RET_INTEGER,
    700	.arg1_type	= ARG_PTR_TO_CTX,
    701	.arg2_type	= ARG_CONST_MAP_PTR,
    702	.arg3_type	= ARG_ANYTHING,
    703	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    704	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
    705};
    706
    707static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
    708struct bpf_nested_pt_regs {
    709	struct pt_regs regs[3];
    710};
    711static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
    712static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
    713
    714u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
    715		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
    716{
    717	int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
    718	struct perf_raw_frag frag = {
    719		.copy		= ctx_copy,
    720		.size		= ctx_size,
    721		.data		= ctx,
    722	};
    723	struct perf_raw_record raw = {
    724		.frag = {
    725			{
    726				.next	= ctx_size ? &frag : NULL,
    727			},
    728			.size	= meta_size,
    729			.data	= meta,
    730		},
    731	};
    732	struct perf_sample_data *sd;
    733	struct pt_regs *regs;
    734	u64 ret;
    735
    736	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
    737		ret = -EBUSY;
    738		goto out;
    739	}
    740	sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]);
    741	regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
    742
    743	perf_fetch_caller_regs(regs);
    744	perf_sample_data_init(sd, 0, 0);
    745	sd->raw = &raw;
    746
    747	ret = __bpf_perf_event_output(regs, map, flags, sd);
    748out:
    749	this_cpu_dec(bpf_event_output_nest_level);
    750	return ret;
    751}
    752
    753BPF_CALL_0(bpf_get_current_task)
    754{
    755	return (long) current;
    756}
    757
    758const struct bpf_func_proto bpf_get_current_task_proto = {
    759	.func		= bpf_get_current_task,
    760	.gpl_only	= true,
    761	.ret_type	= RET_INTEGER,
    762};
    763
    764BPF_CALL_0(bpf_get_current_task_btf)
    765{
    766	return (unsigned long) current;
    767}
    768
    769const struct bpf_func_proto bpf_get_current_task_btf_proto = {
    770	.func		= bpf_get_current_task_btf,
    771	.gpl_only	= true,
    772	.ret_type	= RET_PTR_TO_BTF_ID,
    773	.ret_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
    774};
    775
    776BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
    777{
    778	return (unsigned long) task_pt_regs(task);
    779}
    780
    781BTF_ID_LIST(bpf_task_pt_regs_ids)
    782BTF_ID(struct, pt_regs)
    783
    784const struct bpf_func_proto bpf_task_pt_regs_proto = {
    785	.func		= bpf_task_pt_regs,
    786	.gpl_only	= true,
    787	.arg1_type	= ARG_PTR_TO_BTF_ID,
    788	.arg1_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
    789	.ret_type	= RET_PTR_TO_BTF_ID,
    790	.ret_btf_id	= &bpf_task_pt_regs_ids[0],
    791};
    792
    793BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
    794{
    795	struct bpf_array *array = container_of(map, struct bpf_array, map);
    796	struct cgroup *cgrp;
    797
    798	if (unlikely(idx >= array->map.max_entries))
    799		return -E2BIG;
    800
    801	cgrp = READ_ONCE(array->ptrs[idx]);
    802	if (unlikely(!cgrp))
    803		return -EAGAIN;
    804
    805	return task_under_cgroup_hierarchy(current, cgrp);
    806}
    807
    808static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
    809	.func           = bpf_current_task_under_cgroup,
    810	.gpl_only       = false,
    811	.ret_type       = RET_INTEGER,
    812	.arg1_type      = ARG_CONST_MAP_PTR,
    813	.arg2_type      = ARG_ANYTHING,
    814};
    815
    816struct send_signal_irq_work {
    817	struct irq_work irq_work;
    818	struct task_struct *task;
    819	u32 sig;
    820	enum pid_type type;
    821};
    822
    823static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
    824
    825static void do_bpf_send_signal(struct irq_work *entry)
    826{
    827	struct send_signal_irq_work *work;
    828
    829	work = container_of(entry, struct send_signal_irq_work, irq_work);
    830	group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
    831}
    832
    833static int bpf_send_signal_common(u32 sig, enum pid_type type)
    834{
    835	struct send_signal_irq_work *work = NULL;
    836
    837	/* Similar to bpf_probe_write_user, task needs to be
    838	 * in a sound condition and kernel memory access be
    839	 * permitted in order to send signal to the current
    840	 * task.
    841	 */
    842	if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
    843		return -EPERM;
    844	if (unlikely(!nmi_uaccess_okay()))
    845		return -EPERM;
    846
    847	if (irqs_disabled()) {
    848		/* Do an early check on signal validity. Otherwise,
    849		 * the error is lost in deferred irq_work.
    850		 */
    851		if (unlikely(!valid_signal(sig)))
    852			return -EINVAL;
    853
    854		work = this_cpu_ptr(&send_signal_work);
    855		if (irq_work_is_busy(&work->irq_work))
    856			return -EBUSY;
    857
    858		/* Add the current task, which is the target of sending signal,
    859		 * to the irq_work. The current task may change when queued
    860		 * irq works get executed.
    861		 */
    862		work->task = current;
    863		work->sig = sig;
    864		work->type = type;
    865		irq_work_queue(&work->irq_work);
    866		return 0;
    867	}
    868
    869	return group_send_sig_info(sig, SEND_SIG_PRIV, current, type);
    870}
    871
    872BPF_CALL_1(bpf_send_signal, u32, sig)
    873{
    874	return bpf_send_signal_common(sig, PIDTYPE_TGID);
    875}
    876
    877static const struct bpf_func_proto bpf_send_signal_proto = {
    878	.func		= bpf_send_signal,
    879	.gpl_only	= false,
    880	.ret_type	= RET_INTEGER,
    881	.arg1_type	= ARG_ANYTHING,
    882};
    883
    884BPF_CALL_1(bpf_send_signal_thread, u32, sig)
    885{
    886	return bpf_send_signal_common(sig, PIDTYPE_PID);
    887}
    888
    889static const struct bpf_func_proto bpf_send_signal_thread_proto = {
    890	.func		= bpf_send_signal_thread,
    891	.gpl_only	= false,
    892	.ret_type	= RET_INTEGER,
    893	.arg1_type	= ARG_ANYTHING,
    894};
    895
    896BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
    897{
    898	long len;
    899	char *p;
    900
    901	if (!sz)
    902		return 0;
    903
    904	p = d_path(path, buf, sz);
    905	if (IS_ERR(p)) {
    906		len = PTR_ERR(p);
    907	} else {
    908		len = buf + sz - p;
    909		memmove(buf, p, len);
    910	}
    911
    912	return len;
    913}
    914
    915BTF_SET_START(btf_allowlist_d_path)
    916#ifdef CONFIG_SECURITY
    917BTF_ID(func, security_file_permission)
    918BTF_ID(func, security_inode_getattr)
    919BTF_ID(func, security_file_open)
    920#endif
    921#ifdef CONFIG_SECURITY_PATH
    922BTF_ID(func, security_path_truncate)
    923#endif
    924BTF_ID(func, vfs_truncate)
    925BTF_ID(func, vfs_fallocate)
    926BTF_ID(func, dentry_open)
    927BTF_ID(func, vfs_getattr)
    928BTF_ID(func, filp_close)
    929BTF_SET_END(btf_allowlist_d_path)
    930
    931static bool bpf_d_path_allowed(const struct bpf_prog *prog)
    932{
    933	if (prog->type == BPF_PROG_TYPE_TRACING &&
    934	    prog->expected_attach_type == BPF_TRACE_ITER)
    935		return true;
    936
    937	if (prog->type == BPF_PROG_TYPE_LSM)
    938		return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
    939
    940	return btf_id_set_contains(&btf_allowlist_d_path,
    941				   prog->aux->attach_btf_id);
    942}
    943
    944BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
    945
    946static const struct bpf_func_proto bpf_d_path_proto = {
    947	.func		= bpf_d_path,
    948	.gpl_only	= false,
    949	.ret_type	= RET_INTEGER,
    950	.arg1_type	= ARG_PTR_TO_BTF_ID,
    951	.arg1_btf_id	= &bpf_d_path_btf_ids[0],
    952	.arg2_type	= ARG_PTR_TO_MEM,
    953	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
    954	.allowed	= bpf_d_path_allowed,
    955};
    956
    957#define BTF_F_ALL	(BTF_F_COMPACT  | BTF_F_NONAME | \
    958			 BTF_F_PTR_RAW | BTF_F_ZERO)
    959
    960static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
    961				  u64 flags, const struct btf **btf,
    962				  s32 *btf_id)
    963{
    964	const struct btf_type *t;
    965
    966	if (unlikely(flags & ~(BTF_F_ALL)))
    967		return -EINVAL;
    968
    969	if (btf_ptr_size != sizeof(struct btf_ptr))
    970		return -EINVAL;
    971
    972	*btf = bpf_get_btf_vmlinux();
    973
    974	if (IS_ERR_OR_NULL(*btf))
    975		return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
    976
    977	if (ptr->type_id > 0)
    978		*btf_id = ptr->type_id;
    979	else
    980		return -EINVAL;
    981
    982	if (*btf_id > 0)
    983		t = btf_type_by_id(*btf, *btf_id);
    984	if (*btf_id <= 0 || !t)
    985		return -ENOENT;
    986
    987	return 0;
    988}
    989
    990BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr,
    991	   u32, btf_ptr_size, u64, flags)
    992{
    993	const struct btf *btf;
    994	s32 btf_id;
    995	int ret;
    996
    997	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
    998	if (ret)
    999		return ret;
   1000
   1001	return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
   1002				      flags);
   1003}
   1004
   1005const struct bpf_func_proto bpf_snprintf_btf_proto = {
   1006	.func		= bpf_snprintf_btf,
   1007	.gpl_only	= false,
   1008	.ret_type	= RET_INTEGER,
   1009	.arg1_type	= ARG_PTR_TO_MEM,
   1010	.arg2_type	= ARG_CONST_SIZE,
   1011	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
   1012	.arg4_type	= ARG_CONST_SIZE,
   1013	.arg5_type	= ARG_ANYTHING,
   1014};
   1015
   1016BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
   1017{
   1018	/* This helper call is inlined by verifier. */
   1019	return ((u64 *)ctx)[-2];
   1020}
   1021
   1022static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
   1023	.func		= bpf_get_func_ip_tracing,
   1024	.gpl_only	= true,
   1025	.ret_type	= RET_INTEGER,
   1026	.arg1_type	= ARG_PTR_TO_CTX,
   1027};
   1028
   1029BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
   1030{
   1031	struct kprobe *kp = kprobe_running();
   1032
   1033	return kp ? (uintptr_t)kp->addr : 0;
   1034}
   1035
   1036static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
   1037	.func		= bpf_get_func_ip_kprobe,
   1038	.gpl_only	= true,
   1039	.ret_type	= RET_INTEGER,
   1040	.arg1_type	= ARG_PTR_TO_CTX,
   1041};
   1042
   1043BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs)
   1044{
   1045	return bpf_kprobe_multi_entry_ip(current->bpf_ctx);
   1046}
   1047
   1048static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = {
   1049	.func		= bpf_get_func_ip_kprobe_multi,
   1050	.gpl_only	= false,
   1051	.ret_type	= RET_INTEGER,
   1052	.arg1_type	= ARG_PTR_TO_CTX,
   1053};
   1054
   1055BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs)
   1056{
   1057	return bpf_kprobe_multi_cookie(current->bpf_ctx);
   1058}
   1059
   1060static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
   1061	.func		= bpf_get_attach_cookie_kprobe_multi,
   1062	.gpl_only	= false,
   1063	.ret_type	= RET_INTEGER,
   1064	.arg1_type	= ARG_PTR_TO_CTX,
   1065};
   1066
   1067BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
   1068{
   1069	struct bpf_trace_run_ctx *run_ctx;
   1070
   1071	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
   1072	return run_ctx->bpf_cookie;
   1073}
   1074
   1075static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = {
   1076	.func		= bpf_get_attach_cookie_trace,
   1077	.gpl_only	= false,
   1078	.ret_type	= RET_INTEGER,
   1079	.arg1_type	= ARG_PTR_TO_CTX,
   1080};
   1081
   1082BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx)
   1083{
   1084	return ctx->event->bpf_cookie;
   1085}
   1086
   1087static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
   1088	.func		= bpf_get_attach_cookie_pe,
   1089	.gpl_only	= false,
   1090	.ret_type	= RET_INTEGER,
   1091	.arg1_type	= ARG_PTR_TO_CTX,
   1092};
   1093
   1094BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx)
   1095{
   1096	struct bpf_trace_run_ctx *run_ctx;
   1097
   1098	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
   1099	return run_ctx->bpf_cookie;
   1100}
   1101
   1102static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = {
   1103	.func		= bpf_get_attach_cookie_tracing,
   1104	.gpl_only	= false,
   1105	.ret_type	= RET_INTEGER,
   1106	.arg1_type	= ARG_PTR_TO_CTX,
   1107};
   1108
   1109BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
   1110{
   1111#ifndef CONFIG_X86
   1112	return -ENOENT;
   1113#else
   1114	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
   1115	u32 entry_cnt = size / br_entry_size;
   1116
   1117	entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt);
   1118
   1119	if (unlikely(flags))
   1120		return -EINVAL;
   1121
   1122	if (!entry_cnt)
   1123		return -ENOENT;
   1124
   1125	return entry_cnt * br_entry_size;
   1126#endif
   1127}
   1128
   1129static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
   1130	.func		= bpf_get_branch_snapshot,
   1131	.gpl_only	= true,
   1132	.ret_type	= RET_INTEGER,
   1133	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
   1134	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
   1135};
   1136
   1137BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value)
   1138{
   1139	/* This helper call is inlined by verifier. */
   1140	u64 nr_args = ((u64 *)ctx)[-1];
   1141
   1142	if ((u64) n >= nr_args)
   1143		return -EINVAL;
   1144	*value = ((u64 *)ctx)[n];
   1145	return 0;
   1146}
   1147
   1148static const struct bpf_func_proto bpf_get_func_arg_proto = {
   1149	.func		= get_func_arg,
   1150	.ret_type	= RET_INTEGER,
   1151	.arg1_type	= ARG_PTR_TO_CTX,
   1152	.arg2_type	= ARG_ANYTHING,
   1153	.arg3_type	= ARG_PTR_TO_LONG,
   1154};
   1155
   1156BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
   1157{
   1158	/* This helper call is inlined by verifier. */
   1159	u64 nr_args = ((u64 *)ctx)[-1];
   1160
   1161	*value = ((u64 *)ctx)[nr_args];
   1162	return 0;
   1163}
   1164
   1165static const struct bpf_func_proto bpf_get_func_ret_proto = {
   1166	.func		= get_func_ret,
   1167	.ret_type	= RET_INTEGER,
   1168	.arg1_type	= ARG_PTR_TO_CTX,
   1169	.arg2_type	= ARG_PTR_TO_LONG,
   1170};
   1171
   1172BPF_CALL_1(get_func_arg_cnt, void *, ctx)
   1173{
   1174	/* This helper call is inlined by verifier. */
   1175	return ((u64 *)ctx)[-1];
   1176}
   1177
   1178static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
   1179	.func		= get_func_arg_cnt,
   1180	.ret_type	= RET_INTEGER,
   1181	.arg1_type	= ARG_PTR_TO_CTX,
   1182};
   1183
   1184static const struct bpf_func_proto *
   1185bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   1186{
   1187	switch (func_id) {
   1188	case BPF_FUNC_map_lookup_elem:
   1189		return &bpf_map_lookup_elem_proto;
   1190	case BPF_FUNC_map_update_elem:
   1191		return &bpf_map_update_elem_proto;
   1192	case BPF_FUNC_map_delete_elem:
   1193		return &bpf_map_delete_elem_proto;
   1194	case BPF_FUNC_map_push_elem:
   1195		return &bpf_map_push_elem_proto;
   1196	case BPF_FUNC_map_pop_elem:
   1197		return &bpf_map_pop_elem_proto;
   1198	case BPF_FUNC_map_peek_elem:
   1199		return &bpf_map_peek_elem_proto;
   1200	case BPF_FUNC_map_lookup_percpu_elem:
   1201		return &bpf_map_lookup_percpu_elem_proto;
   1202	case BPF_FUNC_ktime_get_ns:
   1203		return &bpf_ktime_get_ns_proto;
   1204	case BPF_FUNC_ktime_get_boot_ns:
   1205		return &bpf_ktime_get_boot_ns_proto;
   1206	case BPF_FUNC_tail_call:
   1207		return &bpf_tail_call_proto;
   1208	case BPF_FUNC_get_current_pid_tgid:
   1209		return &bpf_get_current_pid_tgid_proto;
   1210	case BPF_FUNC_get_current_task:
   1211		return &bpf_get_current_task_proto;
   1212	case BPF_FUNC_get_current_task_btf:
   1213		return &bpf_get_current_task_btf_proto;
   1214	case BPF_FUNC_task_pt_regs:
   1215		return &bpf_task_pt_regs_proto;
   1216	case BPF_FUNC_get_current_uid_gid:
   1217		return &bpf_get_current_uid_gid_proto;
   1218	case BPF_FUNC_get_current_comm:
   1219		return &bpf_get_current_comm_proto;
   1220	case BPF_FUNC_trace_printk:
   1221		return bpf_get_trace_printk_proto();
   1222	case BPF_FUNC_get_smp_processor_id:
   1223		return &bpf_get_smp_processor_id_proto;
   1224	case BPF_FUNC_get_numa_node_id:
   1225		return &bpf_get_numa_node_id_proto;
   1226	case BPF_FUNC_perf_event_read:
   1227		return &bpf_perf_event_read_proto;
   1228	case BPF_FUNC_current_task_under_cgroup:
   1229		return &bpf_current_task_under_cgroup_proto;
   1230	case BPF_FUNC_get_prandom_u32:
   1231		return &bpf_get_prandom_u32_proto;
   1232	case BPF_FUNC_probe_write_user:
   1233		return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
   1234		       NULL : bpf_get_probe_write_proto();
   1235	case BPF_FUNC_probe_read_user:
   1236		return &bpf_probe_read_user_proto;
   1237	case BPF_FUNC_probe_read_kernel:
   1238		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
   1239		       NULL : &bpf_probe_read_kernel_proto;
   1240	case BPF_FUNC_probe_read_user_str:
   1241		return &bpf_probe_read_user_str_proto;
   1242	case BPF_FUNC_probe_read_kernel_str:
   1243		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
   1244		       NULL : &bpf_probe_read_kernel_str_proto;
   1245#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
   1246	case BPF_FUNC_probe_read:
   1247		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
   1248		       NULL : &bpf_probe_read_compat_proto;
   1249	case BPF_FUNC_probe_read_str:
   1250		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
   1251		       NULL : &bpf_probe_read_compat_str_proto;
   1252#endif
   1253#ifdef CONFIG_CGROUPS
   1254	case BPF_FUNC_get_current_cgroup_id:
   1255		return &bpf_get_current_cgroup_id_proto;
   1256	case BPF_FUNC_get_current_ancestor_cgroup_id:
   1257		return &bpf_get_current_ancestor_cgroup_id_proto;
   1258#endif
   1259	case BPF_FUNC_send_signal:
   1260		return &bpf_send_signal_proto;
   1261	case BPF_FUNC_send_signal_thread:
   1262		return &bpf_send_signal_thread_proto;
   1263	case BPF_FUNC_perf_event_read_value:
   1264		return &bpf_perf_event_read_value_proto;
   1265	case BPF_FUNC_get_ns_current_pid_tgid:
   1266		return &bpf_get_ns_current_pid_tgid_proto;
   1267	case BPF_FUNC_ringbuf_output:
   1268		return &bpf_ringbuf_output_proto;
   1269	case BPF_FUNC_ringbuf_reserve:
   1270		return &bpf_ringbuf_reserve_proto;
   1271	case BPF_FUNC_ringbuf_submit:
   1272		return &bpf_ringbuf_submit_proto;
   1273	case BPF_FUNC_ringbuf_discard:
   1274		return &bpf_ringbuf_discard_proto;
   1275	case BPF_FUNC_ringbuf_query:
   1276		return &bpf_ringbuf_query_proto;
   1277	case BPF_FUNC_jiffies64:
   1278		return &bpf_jiffies64_proto;
   1279	case BPF_FUNC_get_task_stack:
   1280		return &bpf_get_task_stack_proto;
   1281	case BPF_FUNC_copy_from_user:
   1282		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
   1283	case BPF_FUNC_copy_from_user_task:
   1284		return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
   1285	case BPF_FUNC_snprintf_btf:
   1286		return &bpf_snprintf_btf_proto;
   1287	case BPF_FUNC_per_cpu_ptr:
   1288		return &bpf_per_cpu_ptr_proto;
   1289	case BPF_FUNC_this_cpu_ptr:
   1290		return &bpf_this_cpu_ptr_proto;
   1291	case BPF_FUNC_task_storage_get:
   1292		return &bpf_task_storage_get_proto;
   1293	case BPF_FUNC_task_storage_delete:
   1294		return &bpf_task_storage_delete_proto;
   1295	case BPF_FUNC_for_each_map_elem:
   1296		return &bpf_for_each_map_elem_proto;
   1297	case BPF_FUNC_snprintf:
   1298		return &bpf_snprintf_proto;
   1299	case BPF_FUNC_get_func_ip:
   1300		return &bpf_get_func_ip_proto_tracing;
   1301	case BPF_FUNC_get_branch_snapshot:
   1302		return &bpf_get_branch_snapshot_proto;
   1303	case BPF_FUNC_find_vma:
   1304		return &bpf_find_vma_proto;
   1305	case BPF_FUNC_trace_vprintk:
   1306		return bpf_get_trace_vprintk_proto();
   1307	default:
   1308		return bpf_base_func_proto(func_id);
   1309	}
   1310}
   1311
   1312static const struct bpf_func_proto *
   1313kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   1314{
   1315	switch (func_id) {
   1316	case BPF_FUNC_perf_event_output:
   1317		return &bpf_perf_event_output_proto;
   1318	case BPF_FUNC_get_stackid:
   1319		return &bpf_get_stackid_proto;
   1320	case BPF_FUNC_get_stack:
   1321		return &bpf_get_stack_proto;
   1322#ifdef CONFIG_BPF_KPROBE_OVERRIDE
   1323	case BPF_FUNC_override_return:
   1324		return &bpf_override_return_proto;
   1325#endif
   1326	case BPF_FUNC_get_func_ip:
   1327		return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
   1328			&bpf_get_func_ip_proto_kprobe_multi :
   1329			&bpf_get_func_ip_proto_kprobe;
   1330	case BPF_FUNC_get_attach_cookie:
   1331		return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
   1332			&bpf_get_attach_cookie_proto_kmulti :
   1333			&bpf_get_attach_cookie_proto_trace;
   1334	default:
   1335		return bpf_tracing_func_proto(func_id, prog);
   1336	}
   1337}
   1338
   1339/* bpf+kprobe programs can access fields of 'struct pt_regs' */
   1340static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
   1341					const struct bpf_prog *prog,
   1342					struct bpf_insn_access_aux *info)
   1343{
   1344	if (off < 0 || off >= sizeof(struct pt_regs))
   1345		return false;
   1346	if (type != BPF_READ)
   1347		return false;
   1348	if (off % size != 0)
   1349		return false;
   1350	/*
   1351	 * Assertion for 32 bit to make sure last 8 byte access
   1352	 * (BPF_DW) to the last 4 byte member is disallowed.
   1353	 */
   1354	if (off + size > sizeof(struct pt_regs))
   1355		return false;
   1356
   1357	return true;
   1358}
   1359
   1360const struct bpf_verifier_ops kprobe_verifier_ops = {
   1361	.get_func_proto  = kprobe_prog_func_proto,
   1362	.is_valid_access = kprobe_prog_is_valid_access,
   1363};
   1364
   1365const struct bpf_prog_ops kprobe_prog_ops = {
   1366};
   1367
   1368BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
   1369	   u64, flags, void *, data, u64, size)
   1370{
   1371	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
   1372
   1373	/*
   1374	 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
   1375	 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
   1376	 * from there and call the same bpf_perf_event_output() helper inline.
   1377	 */
   1378	return ____bpf_perf_event_output(regs, map, flags, data, size);
   1379}
   1380
   1381static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
   1382	.func		= bpf_perf_event_output_tp,
   1383	.gpl_only	= true,
   1384	.ret_type	= RET_INTEGER,
   1385	.arg1_type	= ARG_PTR_TO_CTX,
   1386	.arg2_type	= ARG_CONST_MAP_PTR,
   1387	.arg3_type	= ARG_ANYTHING,
   1388	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
   1389	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
   1390};
   1391
   1392BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
   1393	   u64, flags)
   1394{
   1395	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
   1396
   1397	/*
   1398	 * Same comment as in bpf_perf_event_output_tp(), only that this time
   1399	 * the other helper's function body cannot be inlined due to being
   1400	 * external, thus we need to call raw helper function.
   1401	 */
   1402	return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
   1403			       flags, 0, 0);
   1404}
   1405
   1406static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
   1407	.func		= bpf_get_stackid_tp,
   1408	.gpl_only	= true,
   1409	.ret_type	= RET_INTEGER,
   1410	.arg1_type	= ARG_PTR_TO_CTX,
   1411	.arg2_type	= ARG_CONST_MAP_PTR,
   1412	.arg3_type	= ARG_ANYTHING,
   1413};
   1414
   1415BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
   1416	   u64, flags)
   1417{
   1418	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
   1419
   1420	return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
   1421			     (unsigned long) size, flags, 0);
   1422}
   1423
   1424static const struct bpf_func_proto bpf_get_stack_proto_tp = {
   1425	.func		= bpf_get_stack_tp,
   1426	.gpl_only	= true,
   1427	.ret_type	= RET_INTEGER,
   1428	.arg1_type	= ARG_PTR_TO_CTX,
   1429	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
   1430	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
   1431	.arg4_type	= ARG_ANYTHING,
   1432};
   1433
   1434static const struct bpf_func_proto *
   1435tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   1436{
   1437	switch (func_id) {
   1438	case BPF_FUNC_perf_event_output:
   1439		return &bpf_perf_event_output_proto_tp;
   1440	case BPF_FUNC_get_stackid:
   1441		return &bpf_get_stackid_proto_tp;
   1442	case BPF_FUNC_get_stack:
   1443		return &bpf_get_stack_proto_tp;
   1444	case BPF_FUNC_get_attach_cookie:
   1445		return &bpf_get_attach_cookie_proto_trace;
   1446	default:
   1447		return bpf_tracing_func_proto(func_id, prog);
   1448	}
   1449}
   1450
   1451static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
   1452				    const struct bpf_prog *prog,
   1453				    struct bpf_insn_access_aux *info)
   1454{
   1455	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
   1456		return false;
   1457	if (type != BPF_READ)
   1458		return false;
   1459	if (off % size != 0)
   1460		return false;
   1461
   1462	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
   1463	return true;
   1464}
   1465
   1466const struct bpf_verifier_ops tracepoint_verifier_ops = {
   1467	.get_func_proto  = tp_prog_func_proto,
   1468	.is_valid_access = tp_prog_is_valid_access,
   1469};
   1470
   1471const struct bpf_prog_ops tracepoint_prog_ops = {
   1472};
   1473
   1474BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
   1475	   struct bpf_perf_event_value *, buf, u32, size)
   1476{
   1477	int err = -EINVAL;
   1478
   1479	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
   1480		goto clear;
   1481	err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
   1482				    &buf->running);
   1483	if (unlikely(err))
   1484		goto clear;
   1485	return 0;
   1486clear:
   1487	memset(buf, 0, size);
   1488	return err;
   1489}
   1490
   1491static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
   1492         .func           = bpf_perf_prog_read_value,
   1493         .gpl_only       = true,
   1494         .ret_type       = RET_INTEGER,
   1495         .arg1_type      = ARG_PTR_TO_CTX,
   1496         .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
   1497         .arg3_type      = ARG_CONST_SIZE,
   1498};
   1499
   1500BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
   1501	   void *, buf, u32, size, u64, flags)
   1502{
   1503	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
   1504	struct perf_branch_stack *br_stack = ctx->data->br_stack;
   1505	u32 to_copy;
   1506
   1507	if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
   1508		return -EINVAL;
   1509
   1510	if (unlikely(!br_stack))
   1511		return -ENOENT;
   1512
   1513	if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
   1514		return br_stack->nr * br_entry_size;
   1515
   1516	if (!buf || (size % br_entry_size != 0))
   1517		return -EINVAL;
   1518
   1519	to_copy = min_t(u32, br_stack->nr * br_entry_size, size);
   1520	memcpy(buf, br_stack->entries, to_copy);
   1521
   1522	return to_copy;
   1523}
   1524
   1525static const struct bpf_func_proto bpf_read_branch_records_proto = {
   1526	.func           = bpf_read_branch_records,
   1527	.gpl_only       = true,
   1528	.ret_type       = RET_INTEGER,
   1529	.arg1_type      = ARG_PTR_TO_CTX,
   1530	.arg2_type      = ARG_PTR_TO_MEM_OR_NULL,
   1531	.arg3_type      = ARG_CONST_SIZE_OR_ZERO,
   1532	.arg4_type      = ARG_ANYTHING,
   1533};
   1534
   1535static const struct bpf_func_proto *
   1536pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   1537{
   1538	switch (func_id) {
   1539	case BPF_FUNC_perf_event_output:
   1540		return &bpf_perf_event_output_proto_tp;
   1541	case BPF_FUNC_get_stackid:
   1542		return &bpf_get_stackid_proto_pe;
   1543	case BPF_FUNC_get_stack:
   1544		return &bpf_get_stack_proto_pe;
   1545	case BPF_FUNC_perf_prog_read_value:
   1546		return &bpf_perf_prog_read_value_proto;
   1547	case BPF_FUNC_read_branch_records:
   1548		return &bpf_read_branch_records_proto;
   1549	case BPF_FUNC_get_attach_cookie:
   1550		return &bpf_get_attach_cookie_proto_pe;
   1551	default:
   1552		return bpf_tracing_func_proto(func_id, prog);
   1553	}
   1554}
   1555
   1556/*
   1557 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
   1558 * to avoid potential recursive reuse issue when/if tracepoints are added
   1559 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
   1560 *
   1561 * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
   1562 * in normal, irq, and nmi context.
   1563 */
   1564struct bpf_raw_tp_regs {
   1565	struct pt_regs regs[3];
   1566};
   1567static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
   1568static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
   1569static struct pt_regs *get_bpf_raw_tp_regs(void)
   1570{
   1571	struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
   1572	int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
   1573
   1574	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
   1575		this_cpu_dec(bpf_raw_tp_nest_level);
   1576		return ERR_PTR(-EBUSY);
   1577	}
   1578
   1579	return &tp_regs->regs[nest_level - 1];
   1580}
   1581
   1582static void put_bpf_raw_tp_regs(void)
   1583{
   1584	this_cpu_dec(bpf_raw_tp_nest_level);
   1585}
   1586
   1587BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
   1588	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
   1589{
   1590	struct pt_regs *regs = get_bpf_raw_tp_regs();
   1591	int ret;
   1592
   1593	if (IS_ERR(regs))
   1594		return PTR_ERR(regs);
   1595
   1596	perf_fetch_caller_regs(regs);
   1597	ret = ____bpf_perf_event_output(regs, map, flags, data, size);
   1598
   1599	put_bpf_raw_tp_regs();
   1600	return ret;
   1601}
   1602
   1603static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
   1604	.func		= bpf_perf_event_output_raw_tp,
   1605	.gpl_only	= true,
   1606	.ret_type	= RET_INTEGER,
   1607	.arg1_type	= ARG_PTR_TO_CTX,
   1608	.arg2_type	= ARG_CONST_MAP_PTR,
   1609	.arg3_type	= ARG_ANYTHING,
   1610	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
   1611	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
   1612};
   1613
   1614extern const struct bpf_func_proto bpf_skb_output_proto;
   1615extern const struct bpf_func_proto bpf_xdp_output_proto;
   1616extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
   1617
   1618BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
   1619	   struct bpf_map *, map, u64, flags)
   1620{
   1621	struct pt_regs *regs = get_bpf_raw_tp_regs();
   1622	int ret;
   1623
   1624	if (IS_ERR(regs))
   1625		return PTR_ERR(regs);
   1626
   1627	perf_fetch_caller_regs(regs);
   1628	/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
   1629	ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
   1630			      flags, 0, 0);
   1631	put_bpf_raw_tp_regs();
   1632	return ret;
   1633}
   1634
   1635static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
   1636	.func		= bpf_get_stackid_raw_tp,
   1637	.gpl_only	= true,
   1638	.ret_type	= RET_INTEGER,
   1639	.arg1_type	= ARG_PTR_TO_CTX,
   1640	.arg2_type	= ARG_CONST_MAP_PTR,
   1641	.arg3_type	= ARG_ANYTHING,
   1642};
   1643
   1644BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
   1645	   void *, buf, u32, size, u64, flags)
   1646{
   1647	struct pt_regs *regs = get_bpf_raw_tp_regs();
   1648	int ret;
   1649
   1650	if (IS_ERR(regs))
   1651		return PTR_ERR(regs);
   1652
   1653	perf_fetch_caller_regs(regs);
   1654	ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
   1655			    (unsigned long) size, flags, 0);
   1656	put_bpf_raw_tp_regs();
   1657	return ret;
   1658}
   1659
   1660static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
   1661	.func		= bpf_get_stack_raw_tp,
   1662	.gpl_only	= true,
   1663	.ret_type	= RET_INTEGER,
   1664	.arg1_type	= ARG_PTR_TO_CTX,
   1665	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
   1666	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
   1667	.arg4_type	= ARG_ANYTHING,
   1668};
   1669
   1670static const struct bpf_func_proto *
   1671raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   1672{
   1673	switch (func_id) {
   1674	case BPF_FUNC_perf_event_output:
   1675		return &bpf_perf_event_output_proto_raw_tp;
   1676	case BPF_FUNC_get_stackid:
   1677		return &bpf_get_stackid_proto_raw_tp;
   1678	case BPF_FUNC_get_stack:
   1679		return &bpf_get_stack_proto_raw_tp;
   1680	default:
   1681		return bpf_tracing_func_proto(func_id, prog);
   1682	}
   1683}
   1684
   1685const struct bpf_func_proto *
   1686tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   1687{
   1688	const struct bpf_func_proto *fn;
   1689
   1690	switch (func_id) {
   1691#ifdef CONFIG_NET
   1692	case BPF_FUNC_skb_output:
   1693		return &bpf_skb_output_proto;
   1694	case BPF_FUNC_xdp_output:
   1695		return &bpf_xdp_output_proto;
   1696	case BPF_FUNC_skc_to_tcp6_sock:
   1697		return &bpf_skc_to_tcp6_sock_proto;
   1698	case BPF_FUNC_skc_to_tcp_sock:
   1699		return &bpf_skc_to_tcp_sock_proto;
   1700	case BPF_FUNC_skc_to_tcp_timewait_sock:
   1701		return &bpf_skc_to_tcp_timewait_sock_proto;
   1702	case BPF_FUNC_skc_to_tcp_request_sock:
   1703		return &bpf_skc_to_tcp_request_sock_proto;
   1704	case BPF_FUNC_skc_to_udp6_sock:
   1705		return &bpf_skc_to_udp6_sock_proto;
   1706	case BPF_FUNC_skc_to_unix_sock:
   1707		return &bpf_skc_to_unix_sock_proto;
   1708	case BPF_FUNC_skc_to_mptcp_sock:
   1709		return &bpf_skc_to_mptcp_sock_proto;
   1710	case BPF_FUNC_sk_storage_get:
   1711		return &bpf_sk_storage_get_tracing_proto;
   1712	case BPF_FUNC_sk_storage_delete:
   1713		return &bpf_sk_storage_delete_tracing_proto;
   1714	case BPF_FUNC_sock_from_file:
   1715		return &bpf_sock_from_file_proto;
   1716	case BPF_FUNC_get_socket_cookie:
   1717		return &bpf_get_socket_ptr_cookie_proto;
   1718	case BPF_FUNC_xdp_get_buff_len:
   1719		return &bpf_xdp_get_buff_len_trace_proto;
   1720#endif
   1721	case BPF_FUNC_seq_printf:
   1722		return prog->expected_attach_type == BPF_TRACE_ITER ?
   1723		       &bpf_seq_printf_proto :
   1724		       NULL;
   1725	case BPF_FUNC_seq_write:
   1726		return prog->expected_attach_type == BPF_TRACE_ITER ?
   1727		       &bpf_seq_write_proto :
   1728		       NULL;
   1729	case BPF_FUNC_seq_printf_btf:
   1730		return prog->expected_attach_type == BPF_TRACE_ITER ?
   1731		       &bpf_seq_printf_btf_proto :
   1732		       NULL;
   1733	case BPF_FUNC_d_path:
   1734		return &bpf_d_path_proto;
   1735	case BPF_FUNC_get_func_arg:
   1736		return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL;
   1737	case BPF_FUNC_get_func_ret:
   1738		return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL;
   1739	case BPF_FUNC_get_func_arg_cnt:
   1740		return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL;
   1741	case BPF_FUNC_get_attach_cookie:
   1742		return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL;
   1743	default:
   1744		fn = raw_tp_prog_func_proto(func_id, prog);
   1745		if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
   1746			fn = bpf_iter_get_func_proto(func_id, prog);
   1747		return fn;
   1748	}
   1749}
   1750
   1751static bool raw_tp_prog_is_valid_access(int off, int size,
   1752					enum bpf_access_type type,
   1753					const struct bpf_prog *prog,
   1754					struct bpf_insn_access_aux *info)
   1755{
   1756	return bpf_tracing_ctx_access(off, size, type);
   1757}
   1758
   1759static bool tracing_prog_is_valid_access(int off, int size,
   1760					 enum bpf_access_type type,
   1761					 const struct bpf_prog *prog,
   1762					 struct bpf_insn_access_aux *info)
   1763{
   1764	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
   1765}
   1766
   1767int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
   1768				     const union bpf_attr *kattr,
   1769				     union bpf_attr __user *uattr)
   1770{
   1771	return -ENOTSUPP;
   1772}
   1773
   1774const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
   1775	.get_func_proto  = raw_tp_prog_func_proto,
   1776	.is_valid_access = raw_tp_prog_is_valid_access,
   1777};
   1778
   1779const struct bpf_prog_ops raw_tracepoint_prog_ops = {
   1780#ifdef CONFIG_NET
   1781	.test_run = bpf_prog_test_run_raw_tp,
   1782#endif
   1783};
   1784
   1785const struct bpf_verifier_ops tracing_verifier_ops = {
   1786	.get_func_proto  = tracing_prog_func_proto,
   1787	.is_valid_access = tracing_prog_is_valid_access,
   1788};
   1789
   1790const struct bpf_prog_ops tracing_prog_ops = {
   1791	.test_run = bpf_prog_test_run_tracing,
   1792};
   1793
   1794static bool raw_tp_writable_prog_is_valid_access(int off, int size,
   1795						 enum bpf_access_type type,
   1796						 const struct bpf_prog *prog,
   1797						 struct bpf_insn_access_aux *info)
   1798{
   1799	if (off == 0) {
   1800		if (size != sizeof(u64) || type != BPF_READ)
   1801			return false;
   1802		info->reg_type = PTR_TO_TP_BUFFER;
   1803	}
   1804	return raw_tp_prog_is_valid_access(off, size, type, prog, info);
   1805}
   1806
   1807const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
   1808	.get_func_proto  = raw_tp_prog_func_proto,
   1809	.is_valid_access = raw_tp_writable_prog_is_valid_access,
   1810};
   1811
   1812const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
   1813};
   1814
   1815static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
   1816				    const struct bpf_prog *prog,
   1817				    struct bpf_insn_access_aux *info)
   1818{
   1819	const int size_u64 = sizeof(u64);
   1820
   1821	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
   1822		return false;
   1823	if (type != BPF_READ)
   1824		return false;
   1825	if (off % size != 0) {
   1826		if (sizeof(unsigned long) != 4)
   1827			return false;
   1828		if (size != 8)
   1829			return false;
   1830		if (off % size != 4)
   1831			return false;
   1832	}
   1833
   1834	switch (off) {
   1835	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
   1836		bpf_ctx_record_field_size(info, size_u64);
   1837		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
   1838			return false;
   1839		break;
   1840	case bpf_ctx_range(struct bpf_perf_event_data, addr):
   1841		bpf_ctx_record_field_size(info, size_u64);
   1842		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
   1843			return false;
   1844		break;
   1845	default:
   1846		if (size != sizeof(long))
   1847			return false;
   1848	}
   1849
   1850	return true;
   1851}
   1852
   1853static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
   1854				      const struct bpf_insn *si,
   1855				      struct bpf_insn *insn_buf,
   1856				      struct bpf_prog *prog, u32 *target_size)
   1857{
   1858	struct bpf_insn *insn = insn_buf;
   1859
   1860	switch (si->off) {
   1861	case offsetof(struct bpf_perf_event_data, sample_period):
   1862		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
   1863						       data), si->dst_reg, si->src_reg,
   1864				      offsetof(struct bpf_perf_event_data_kern, data));
   1865		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
   1866				      bpf_target_off(struct perf_sample_data, period, 8,
   1867						     target_size));
   1868		break;
   1869	case offsetof(struct bpf_perf_event_data, addr):
   1870		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
   1871						       data), si->dst_reg, si->src_reg,
   1872				      offsetof(struct bpf_perf_event_data_kern, data));
   1873		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
   1874				      bpf_target_off(struct perf_sample_data, addr, 8,
   1875						     target_size));
   1876		break;
   1877	default:
   1878		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
   1879						       regs), si->dst_reg, si->src_reg,
   1880				      offsetof(struct bpf_perf_event_data_kern, regs));
   1881		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
   1882				      si->off);
   1883		break;
   1884	}
   1885
   1886	return insn - insn_buf;
   1887}
   1888
   1889const struct bpf_verifier_ops perf_event_verifier_ops = {
   1890	.get_func_proto		= pe_prog_func_proto,
   1891	.is_valid_access	= pe_prog_is_valid_access,
   1892	.convert_ctx_access	= pe_prog_convert_ctx_access,
   1893};
   1894
   1895const struct bpf_prog_ops perf_event_prog_ops = {
   1896};
   1897
   1898static DEFINE_MUTEX(bpf_event_mutex);
   1899
   1900#define BPF_TRACE_MAX_PROGS 64
   1901
   1902int perf_event_attach_bpf_prog(struct perf_event *event,
   1903			       struct bpf_prog *prog,
   1904			       u64 bpf_cookie)
   1905{
   1906	struct bpf_prog_array *old_array;
   1907	struct bpf_prog_array *new_array;
   1908	int ret = -EEXIST;
   1909
   1910	/*
   1911	 * Kprobe override only works if they are on the function entry,
   1912	 * and only if they are on the opt-in list.
   1913	 */
   1914	if (prog->kprobe_override &&
   1915	    (!trace_kprobe_on_func_entry(event->tp_event) ||
   1916	     !trace_kprobe_error_injectable(event->tp_event)))
   1917		return -EINVAL;
   1918
   1919	mutex_lock(&bpf_event_mutex);
   1920
   1921	if (event->prog)
   1922		goto unlock;
   1923
   1924	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
   1925	if (old_array &&
   1926	    bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
   1927		ret = -E2BIG;
   1928		goto unlock;
   1929	}
   1930
   1931	ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array);
   1932	if (ret < 0)
   1933		goto unlock;
   1934
   1935	/* set the new array to event->tp_event and set event->prog */
   1936	event->prog = prog;
   1937	event->bpf_cookie = bpf_cookie;
   1938	rcu_assign_pointer(event->tp_event->prog_array, new_array);
   1939	bpf_prog_array_free(old_array);
   1940
   1941unlock:
   1942	mutex_unlock(&bpf_event_mutex);
   1943	return ret;
   1944}
   1945
   1946void perf_event_detach_bpf_prog(struct perf_event *event)
   1947{
   1948	struct bpf_prog_array *old_array;
   1949	struct bpf_prog_array *new_array;
   1950	int ret;
   1951
   1952	mutex_lock(&bpf_event_mutex);
   1953
   1954	if (!event->prog)
   1955		goto unlock;
   1956
   1957	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
   1958	ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
   1959	if (ret == -ENOENT)
   1960		goto unlock;
   1961	if (ret < 0) {
   1962		bpf_prog_array_delete_safe(old_array, event->prog);
   1963	} else {
   1964		rcu_assign_pointer(event->tp_event->prog_array, new_array);
   1965		bpf_prog_array_free(old_array);
   1966	}
   1967
   1968	bpf_prog_put(event->prog);
   1969	event->prog = NULL;
   1970
   1971unlock:
   1972	mutex_unlock(&bpf_event_mutex);
   1973}
   1974
   1975int perf_event_query_prog_array(struct perf_event *event, void __user *info)
   1976{
   1977	struct perf_event_query_bpf __user *uquery = info;
   1978	struct perf_event_query_bpf query = {};
   1979	struct bpf_prog_array *progs;
   1980	u32 *ids, prog_cnt, ids_len;
   1981	int ret;
   1982
   1983	if (!perfmon_capable())
   1984		return -EPERM;
   1985	if (event->attr.type != PERF_TYPE_TRACEPOINT)
   1986		return -EINVAL;
   1987	if (copy_from_user(&query, uquery, sizeof(query)))
   1988		return -EFAULT;
   1989
   1990	ids_len = query.ids_len;
   1991	if (ids_len > BPF_TRACE_MAX_PROGS)
   1992		return -E2BIG;
   1993	ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
   1994	if (!ids)
   1995		return -ENOMEM;
   1996	/*
   1997	 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
   1998	 * is required when user only wants to check for uquery->prog_cnt.
   1999	 * There is no need to check for it since the case is handled
   2000	 * gracefully in bpf_prog_array_copy_info.
   2001	 */
   2002
   2003	mutex_lock(&bpf_event_mutex);
   2004	progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
   2005	ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
   2006	mutex_unlock(&bpf_event_mutex);
   2007
   2008	if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
   2009	    copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
   2010		ret = -EFAULT;
   2011
   2012	kfree(ids);
   2013	return ret;
   2014}
   2015
   2016extern struct bpf_raw_event_map __start__bpf_raw_tp[];
   2017extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
   2018
   2019struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
   2020{
   2021	struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
   2022
   2023	for (; btp < __stop__bpf_raw_tp; btp++) {
   2024		if (!strcmp(btp->tp->name, name))
   2025			return btp;
   2026	}
   2027
   2028	return bpf_get_raw_tracepoint_module(name);
   2029}
   2030
   2031void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
   2032{
   2033	struct module *mod;
   2034
   2035	preempt_disable();
   2036	mod = __module_address((unsigned long)btp);
   2037	module_put(mod);
   2038	preempt_enable();
   2039}
   2040
   2041static __always_inline
   2042void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
   2043{
   2044	cant_sleep();
   2045	rcu_read_lock();
   2046	(void) bpf_prog_run(prog, args);
   2047	rcu_read_unlock();
   2048}
   2049
   2050#define UNPACK(...)			__VA_ARGS__
   2051#define REPEAT_1(FN, DL, X, ...)	FN(X)
   2052#define REPEAT_2(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
   2053#define REPEAT_3(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
   2054#define REPEAT_4(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
   2055#define REPEAT_5(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
   2056#define REPEAT_6(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
   2057#define REPEAT_7(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
   2058#define REPEAT_8(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
   2059#define REPEAT_9(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
   2060#define REPEAT_10(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
   2061#define REPEAT_11(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
   2062#define REPEAT_12(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
   2063#define REPEAT(X, FN, DL, ...)		REPEAT_##X(FN, DL, __VA_ARGS__)
   2064
   2065#define SARG(X)		u64 arg##X
   2066#define COPY(X)		args[X] = arg##X
   2067
   2068#define __DL_COM	(,)
   2069#define __DL_SEM	(;)
   2070
   2071#define __SEQ_0_11	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
   2072
   2073#define BPF_TRACE_DEFN_x(x)						\
   2074	void bpf_trace_run##x(struct bpf_prog *prog,			\
   2075			      REPEAT(x, SARG, __DL_COM, __SEQ_0_11))	\
   2076	{								\
   2077		u64 args[x];						\
   2078		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
   2079		__bpf_trace_run(prog, args);				\
   2080	}								\
   2081	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
   2082BPF_TRACE_DEFN_x(1);
   2083BPF_TRACE_DEFN_x(2);
   2084BPF_TRACE_DEFN_x(3);
   2085BPF_TRACE_DEFN_x(4);
   2086BPF_TRACE_DEFN_x(5);
   2087BPF_TRACE_DEFN_x(6);
   2088BPF_TRACE_DEFN_x(7);
   2089BPF_TRACE_DEFN_x(8);
   2090BPF_TRACE_DEFN_x(9);
   2091BPF_TRACE_DEFN_x(10);
   2092BPF_TRACE_DEFN_x(11);
   2093BPF_TRACE_DEFN_x(12);
   2094
   2095static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
   2096{
   2097	struct tracepoint *tp = btp->tp;
   2098
   2099	/*
   2100	 * check that program doesn't access arguments beyond what's
   2101	 * available in this tracepoint
   2102	 */
   2103	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
   2104		return -EINVAL;
   2105
   2106	if (prog->aux->max_tp_access > btp->writable_size)
   2107		return -EINVAL;
   2108
   2109	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
   2110						   prog);
   2111}
   2112
   2113int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
   2114{
   2115	return __bpf_probe_register(btp, prog);
   2116}
   2117
   2118int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
   2119{
   2120	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
   2121}
   2122
   2123int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
   2124			    u32 *fd_type, const char **buf,
   2125			    u64 *probe_offset, u64 *probe_addr)
   2126{
   2127	bool is_tracepoint, is_syscall_tp;
   2128	struct bpf_prog *prog;
   2129	int flags, err = 0;
   2130
   2131	prog = event->prog;
   2132	if (!prog)
   2133		return -ENOENT;
   2134
   2135	/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
   2136	if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
   2137		return -EOPNOTSUPP;
   2138
   2139	*prog_id = prog->aux->id;
   2140	flags = event->tp_event->flags;
   2141	is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
   2142	is_syscall_tp = is_syscall_trace_event(event->tp_event);
   2143
   2144	if (is_tracepoint || is_syscall_tp) {
   2145		*buf = is_tracepoint ? event->tp_event->tp->name
   2146				     : event->tp_event->name;
   2147		*fd_type = BPF_FD_TYPE_TRACEPOINT;
   2148		*probe_offset = 0x0;
   2149		*probe_addr = 0x0;
   2150	} else {
   2151		/* kprobe/uprobe */
   2152		err = -EOPNOTSUPP;
   2153#ifdef CONFIG_KPROBE_EVENTS
   2154		if (flags & TRACE_EVENT_FL_KPROBE)
   2155			err = bpf_get_kprobe_info(event, fd_type, buf,
   2156						  probe_offset, probe_addr,
   2157						  event->attr.type == PERF_TYPE_TRACEPOINT);
   2158#endif
   2159#ifdef CONFIG_UPROBE_EVENTS
   2160		if (flags & TRACE_EVENT_FL_UPROBE)
   2161			err = bpf_get_uprobe_info(event, fd_type, buf,
   2162						  probe_offset,
   2163						  event->attr.type == PERF_TYPE_TRACEPOINT);
   2164#endif
   2165	}
   2166
   2167	return err;
   2168}
   2169
   2170static int __init send_signal_irq_work_init(void)
   2171{
   2172	int cpu;
   2173	struct send_signal_irq_work *work;
   2174
   2175	for_each_possible_cpu(cpu) {
   2176		work = per_cpu_ptr(&send_signal_work, cpu);
   2177		init_irq_work(&work->irq_work, do_bpf_send_signal);
   2178	}
   2179	return 0;
   2180}
   2181
   2182subsys_initcall(send_signal_irq_work_init);
   2183
   2184#ifdef CONFIG_MODULES
   2185static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
   2186			    void *module)
   2187{
   2188	struct bpf_trace_module *btm, *tmp;
   2189	struct module *mod = module;
   2190	int ret = 0;
   2191
   2192	if (mod->num_bpf_raw_events == 0 ||
   2193	    (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
   2194		goto out;
   2195
   2196	mutex_lock(&bpf_module_mutex);
   2197
   2198	switch (op) {
   2199	case MODULE_STATE_COMING:
   2200		btm = kzalloc(sizeof(*btm), GFP_KERNEL);
   2201		if (btm) {
   2202			btm->module = module;
   2203			list_add(&btm->list, &bpf_trace_modules);
   2204		} else {
   2205			ret = -ENOMEM;
   2206		}
   2207		break;
   2208	case MODULE_STATE_GOING:
   2209		list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
   2210			if (btm->module == module) {
   2211				list_del(&btm->list);
   2212				kfree(btm);
   2213				break;
   2214			}
   2215		}
   2216		break;
   2217	}
   2218
   2219	mutex_unlock(&bpf_module_mutex);
   2220
   2221out:
   2222	return notifier_from_errno(ret);
   2223}
   2224
   2225static struct notifier_block bpf_module_nb = {
   2226	.notifier_call = bpf_event_notify,
   2227};
   2228
   2229static int __init bpf_event_init(void)
   2230{
   2231	register_module_notifier(&bpf_module_nb);
   2232	return 0;
   2233}
   2234
   2235fs_initcall(bpf_event_init);
   2236#endif /* CONFIG_MODULES */
   2237
   2238#ifdef CONFIG_FPROBE
   2239struct bpf_kprobe_multi_link {
   2240	struct bpf_link link;
   2241	struct fprobe fp;
   2242	unsigned long *addrs;
   2243	u64 *cookies;
   2244	u32 cnt;
   2245};
   2246
   2247struct bpf_kprobe_multi_run_ctx {
   2248	struct bpf_run_ctx run_ctx;
   2249	struct bpf_kprobe_multi_link *link;
   2250	unsigned long entry_ip;
   2251};
   2252
   2253struct user_syms {
   2254	const char **syms;
   2255	char *buf;
   2256};
   2257
   2258static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt)
   2259{
   2260	unsigned long __user usymbol;
   2261	const char **syms = NULL;
   2262	char *buf = NULL, *p;
   2263	int err = -ENOMEM;
   2264	unsigned int i;
   2265
   2266	syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
   2267	if (!syms)
   2268		goto error;
   2269
   2270	buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
   2271	if (!buf)
   2272		goto error;
   2273
   2274	for (p = buf, i = 0; i < cnt; i++) {
   2275		if (__get_user(usymbol, usyms + i)) {
   2276			err = -EFAULT;
   2277			goto error;
   2278		}
   2279		err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN);
   2280		if (err == KSYM_NAME_LEN)
   2281			err = -E2BIG;
   2282		if (err < 0)
   2283			goto error;
   2284		syms[i] = p;
   2285		p += err + 1;
   2286	}
   2287
   2288	us->syms = syms;
   2289	us->buf = buf;
   2290	return 0;
   2291
   2292error:
   2293	if (err) {
   2294		kvfree(syms);
   2295		kvfree(buf);
   2296	}
   2297	return err;
   2298}
   2299
   2300static void free_user_syms(struct user_syms *us)
   2301{
   2302	kvfree(us->syms);
   2303	kvfree(us->buf);
   2304}
   2305
   2306static void bpf_kprobe_multi_link_release(struct bpf_link *link)
   2307{
   2308	struct bpf_kprobe_multi_link *kmulti_link;
   2309
   2310	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
   2311	unregister_fprobe(&kmulti_link->fp);
   2312}
   2313
   2314static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
   2315{
   2316	struct bpf_kprobe_multi_link *kmulti_link;
   2317
   2318	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
   2319	kvfree(kmulti_link->addrs);
   2320	kvfree(kmulti_link->cookies);
   2321	kfree(kmulti_link);
   2322}
   2323
   2324static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
   2325	.release = bpf_kprobe_multi_link_release,
   2326	.dealloc = bpf_kprobe_multi_link_dealloc,
   2327};
   2328
   2329static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
   2330{
   2331	const struct bpf_kprobe_multi_link *link = priv;
   2332	unsigned long *addr_a = a, *addr_b = b;
   2333	u64 *cookie_a, *cookie_b;
   2334
   2335	cookie_a = link->cookies + (addr_a - link->addrs);
   2336	cookie_b = link->cookies + (addr_b - link->addrs);
   2337
   2338	/* swap addr_a/addr_b and cookie_a/cookie_b values */
   2339	swap(*addr_a, *addr_b);
   2340	swap(*cookie_a, *cookie_b);
   2341}
   2342
   2343static int __bpf_kprobe_multi_cookie_cmp(const void *a, const void *b)
   2344{
   2345	const unsigned long *addr_a = a, *addr_b = b;
   2346
   2347	if (*addr_a == *addr_b)
   2348		return 0;
   2349	return *addr_a < *addr_b ? -1 : 1;
   2350}
   2351
   2352static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv)
   2353{
   2354	return __bpf_kprobe_multi_cookie_cmp(a, b);
   2355}
   2356
   2357static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
   2358{
   2359	struct bpf_kprobe_multi_run_ctx *run_ctx;
   2360	struct bpf_kprobe_multi_link *link;
   2361	u64 *cookie, entry_ip;
   2362	unsigned long *addr;
   2363
   2364	if (WARN_ON_ONCE(!ctx))
   2365		return 0;
   2366	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
   2367	link = run_ctx->link;
   2368	if (!link->cookies)
   2369		return 0;
   2370	entry_ip = run_ctx->entry_ip;
   2371	addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip),
   2372		       __bpf_kprobe_multi_cookie_cmp);
   2373	if (!addr)
   2374		return 0;
   2375	cookie = link->cookies + (addr - link->addrs);
   2376	return *cookie;
   2377}
   2378
   2379static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
   2380{
   2381	struct bpf_kprobe_multi_run_ctx *run_ctx;
   2382
   2383	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
   2384	return run_ctx->entry_ip;
   2385}
   2386
   2387static int
   2388kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
   2389			   unsigned long entry_ip, struct pt_regs *regs)
   2390{
   2391	struct bpf_kprobe_multi_run_ctx run_ctx = {
   2392		.link = link,
   2393		.entry_ip = entry_ip,
   2394	};
   2395	struct bpf_run_ctx *old_run_ctx;
   2396	int err;
   2397
   2398	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
   2399		err = 0;
   2400		goto out;
   2401	}
   2402
   2403	migrate_disable();
   2404	rcu_read_lock();
   2405	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
   2406	err = bpf_prog_run(link->link.prog, regs);
   2407	bpf_reset_run_ctx(old_run_ctx);
   2408	rcu_read_unlock();
   2409	migrate_enable();
   2410
   2411 out:
   2412	__this_cpu_dec(bpf_prog_active);
   2413	return err;
   2414}
   2415
   2416static void
   2417kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip,
   2418			  struct pt_regs *regs)
   2419{
   2420	struct bpf_kprobe_multi_link *link;
   2421
   2422	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
   2423	kprobe_multi_link_prog_run(link, entry_ip, regs);
   2424}
   2425
   2426static int symbols_cmp_r(const void *a, const void *b, const void *priv)
   2427{
   2428	const char **str_a = (const char **) a;
   2429	const char **str_b = (const char **) b;
   2430
   2431	return strcmp(*str_a, *str_b);
   2432}
   2433
   2434struct multi_symbols_sort {
   2435	const char **funcs;
   2436	u64 *cookies;
   2437};
   2438
   2439static void symbols_swap_r(void *a, void *b, int size, const void *priv)
   2440{
   2441	const struct multi_symbols_sort *data = priv;
   2442	const char **name_a = a, **name_b = b;
   2443
   2444	swap(*name_a, *name_b);
   2445
   2446	/* If defined, swap also related cookies. */
   2447	if (data->cookies) {
   2448		u64 *cookie_a, *cookie_b;
   2449
   2450		cookie_a = data->cookies + (name_a - data->funcs);
   2451		cookie_b = data->cookies + (name_b - data->funcs);
   2452		swap(*cookie_a, *cookie_b);
   2453	}
   2454}
   2455
   2456int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
   2457{
   2458	struct bpf_kprobe_multi_link *link = NULL;
   2459	struct bpf_link_primer link_primer;
   2460	void __user *ucookies;
   2461	unsigned long *addrs;
   2462	u32 flags, cnt, size;
   2463	void __user *uaddrs;
   2464	u64 *cookies = NULL;
   2465	void __user *usyms;
   2466	int err;
   2467
   2468	/* no support for 32bit archs yet */
   2469	if (sizeof(u64) != sizeof(void *))
   2470		return -EOPNOTSUPP;
   2471
   2472	if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI)
   2473		return -EINVAL;
   2474
   2475	flags = attr->link_create.kprobe_multi.flags;
   2476	if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
   2477		return -EINVAL;
   2478
   2479	uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs);
   2480	usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms);
   2481	if (!!uaddrs == !!usyms)
   2482		return -EINVAL;
   2483
   2484	cnt = attr->link_create.kprobe_multi.cnt;
   2485	if (!cnt)
   2486		return -EINVAL;
   2487
   2488	size = cnt * sizeof(*addrs);
   2489	addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
   2490	if (!addrs)
   2491		return -ENOMEM;
   2492
   2493	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
   2494	if (ucookies) {
   2495		cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
   2496		if (!cookies) {
   2497			err = -ENOMEM;
   2498			goto error;
   2499		}
   2500		if (copy_from_user(cookies, ucookies, size)) {
   2501			err = -EFAULT;
   2502			goto error;
   2503		}
   2504	}
   2505
   2506	if (uaddrs) {
   2507		if (copy_from_user(addrs, uaddrs, size)) {
   2508			err = -EFAULT;
   2509			goto error;
   2510		}
   2511	} else {
   2512		struct multi_symbols_sort data = {
   2513			.cookies = cookies,
   2514		};
   2515		struct user_syms us;
   2516
   2517		err = copy_user_syms(&us, usyms, cnt);
   2518		if (err)
   2519			goto error;
   2520
   2521		if (cookies)
   2522			data.funcs = us.syms;
   2523
   2524		sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r,
   2525		       symbols_swap_r, &data);
   2526
   2527		err = ftrace_lookup_symbols(us.syms, cnt, addrs);
   2528		free_user_syms(&us);
   2529		if (err)
   2530			goto error;
   2531	}
   2532
   2533	link = kzalloc(sizeof(*link), GFP_KERNEL);
   2534	if (!link) {
   2535		err = -ENOMEM;
   2536		goto error;
   2537	}
   2538
   2539	bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI,
   2540		      &bpf_kprobe_multi_link_lops, prog);
   2541
   2542	err = bpf_link_prime(&link->link, &link_primer);
   2543	if (err)
   2544		goto error;
   2545
   2546	if (flags & BPF_F_KPROBE_MULTI_RETURN)
   2547		link->fp.exit_handler = kprobe_multi_link_handler;
   2548	else
   2549		link->fp.entry_handler = kprobe_multi_link_handler;
   2550
   2551	link->addrs = addrs;
   2552	link->cookies = cookies;
   2553	link->cnt = cnt;
   2554
   2555	if (cookies) {
   2556		/*
   2557		 * Sorting addresses will trigger sorting cookies as well
   2558		 * (check bpf_kprobe_multi_cookie_swap). This way we can
   2559		 * find cookie based on the address in bpf_get_attach_cookie
   2560		 * helper.
   2561		 */
   2562		sort_r(addrs, cnt, sizeof(*addrs),
   2563		       bpf_kprobe_multi_cookie_cmp,
   2564		       bpf_kprobe_multi_cookie_swap,
   2565		       link);
   2566	}
   2567
   2568	err = register_fprobe_ips(&link->fp, addrs, cnt);
   2569	if (err) {
   2570		bpf_link_cleanup(&link_primer);
   2571		return err;
   2572	}
   2573
   2574	return bpf_link_settle(&link_primer);
   2575
   2576error:
   2577	kfree(link);
   2578	kvfree(addrs);
   2579	kvfree(cookies);
   2580	return err;
   2581}
   2582#else /* !CONFIG_FPROBE */
   2583int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
   2584{
   2585	return -EOPNOTSUPP;
   2586}
   2587static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
   2588{
   2589	return 0;
   2590}
   2591static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
   2592{
   2593	return 0;
   2594}
   2595#endif