cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

helpers.c (42723B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
      3 */
      4#include <linux/bpf.h>
      5#include <linux/btf.h>
      6#include <linux/bpf-cgroup.h>
      7#include <linux/rcupdate.h>
      8#include <linux/random.h>
      9#include <linux/smp.h>
     10#include <linux/topology.h>
     11#include <linux/ktime.h>
     12#include <linux/sched.h>
     13#include <linux/uidgid.h>
     14#include <linux/filter.h>
     15#include <linux/ctype.h>
     16#include <linux/jiffies.h>
     17#include <linux/pid_namespace.h>
     18#include <linux/proc_ns.h>
     19#include <linux/security.h>
     20#include <linux/btf_ids.h>
     21
     22#include "../../lib/kstrtox.h"
     23
     24/* If kernel subsystem is allowing eBPF programs to call this function,
     25 * inside its own verifier_ops->get_func_proto() callback it should return
     26 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
     27 *
     28 * Different map implementations will rely on rcu in map methods
     29 * lookup/update/delete, therefore eBPF programs must run under rcu lock
     30 * if program is allowed to access maps, so check rcu_read_lock_held in
     31 * all three functions.
     32 */
     33BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
     34{
     35	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
     36	return (unsigned long) map->ops->map_lookup_elem(map, key);
     37}
     38
     39const struct bpf_func_proto bpf_map_lookup_elem_proto = {
     40	.func		= bpf_map_lookup_elem,
     41	.gpl_only	= false,
     42	.pkt_access	= true,
     43	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
     44	.arg1_type	= ARG_CONST_MAP_PTR,
     45	.arg2_type	= ARG_PTR_TO_MAP_KEY,
     46};
     47
     48BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
     49	   void *, value, u64, flags)
     50{
     51	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
     52	return map->ops->map_update_elem(map, key, value, flags);
     53}
     54
     55const struct bpf_func_proto bpf_map_update_elem_proto = {
     56	.func		= bpf_map_update_elem,
     57	.gpl_only	= false,
     58	.pkt_access	= true,
     59	.ret_type	= RET_INTEGER,
     60	.arg1_type	= ARG_CONST_MAP_PTR,
     61	.arg2_type	= ARG_PTR_TO_MAP_KEY,
     62	.arg3_type	= ARG_PTR_TO_MAP_VALUE,
     63	.arg4_type	= ARG_ANYTHING,
     64};
     65
     66BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
     67{
     68	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
     69	return map->ops->map_delete_elem(map, key);
     70}
     71
     72const struct bpf_func_proto bpf_map_delete_elem_proto = {
     73	.func		= bpf_map_delete_elem,
     74	.gpl_only	= false,
     75	.pkt_access	= true,
     76	.ret_type	= RET_INTEGER,
     77	.arg1_type	= ARG_CONST_MAP_PTR,
     78	.arg2_type	= ARG_PTR_TO_MAP_KEY,
     79};
     80
     81BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
     82{
     83	return map->ops->map_push_elem(map, value, flags);
     84}
     85
     86const struct bpf_func_proto bpf_map_push_elem_proto = {
     87	.func		= bpf_map_push_elem,
     88	.gpl_only	= false,
     89	.pkt_access	= true,
     90	.ret_type	= RET_INTEGER,
     91	.arg1_type	= ARG_CONST_MAP_PTR,
     92	.arg2_type	= ARG_PTR_TO_MAP_VALUE,
     93	.arg3_type	= ARG_ANYTHING,
     94};
     95
     96BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
     97{
     98	return map->ops->map_pop_elem(map, value);
     99}
    100
    101const struct bpf_func_proto bpf_map_pop_elem_proto = {
    102	.func		= bpf_map_pop_elem,
    103	.gpl_only	= false,
    104	.ret_type	= RET_INTEGER,
    105	.arg1_type	= ARG_CONST_MAP_PTR,
    106	.arg2_type	= ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
    107};
    108
    109BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
    110{
    111	return map->ops->map_peek_elem(map, value);
    112}
    113
    114const struct bpf_func_proto bpf_map_peek_elem_proto = {
    115	.func		= bpf_map_peek_elem,
    116	.gpl_only	= false,
    117	.ret_type	= RET_INTEGER,
    118	.arg1_type	= ARG_CONST_MAP_PTR,
    119	.arg2_type	= ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
    120};
    121
    122BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
    123{
    124	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
    125	return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
    126}
    127
    128const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
    129	.func		= bpf_map_lookup_percpu_elem,
    130	.gpl_only	= false,
    131	.pkt_access	= true,
    132	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
    133	.arg1_type	= ARG_CONST_MAP_PTR,
    134	.arg2_type	= ARG_PTR_TO_MAP_KEY,
    135	.arg3_type	= ARG_ANYTHING,
    136};
    137
    138const struct bpf_func_proto bpf_get_prandom_u32_proto = {
    139	.func		= bpf_user_rnd_u32,
    140	.gpl_only	= false,
    141	.ret_type	= RET_INTEGER,
    142};
    143
    144BPF_CALL_0(bpf_get_smp_processor_id)
    145{
    146	return smp_processor_id();
    147}
    148
    149const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
    150	.func		= bpf_get_smp_processor_id,
    151	.gpl_only	= false,
    152	.ret_type	= RET_INTEGER,
    153};
    154
    155BPF_CALL_0(bpf_get_numa_node_id)
    156{
    157	return numa_node_id();
    158}
    159
    160const struct bpf_func_proto bpf_get_numa_node_id_proto = {
    161	.func		= bpf_get_numa_node_id,
    162	.gpl_only	= false,
    163	.ret_type	= RET_INTEGER,
    164};
    165
    166BPF_CALL_0(bpf_ktime_get_ns)
    167{
    168	/* NMI safe access to clock monotonic */
    169	return ktime_get_mono_fast_ns();
    170}
    171
    172const struct bpf_func_proto bpf_ktime_get_ns_proto = {
    173	.func		= bpf_ktime_get_ns,
    174	.gpl_only	= false,
    175	.ret_type	= RET_INTEGER,
    176};
    177
    178BPF_CALL_0(bpf_ktime_get_boot_ns)
    179{
    180	/* NMI safe access to clock boottime */
    181	return ktime_get_boot_fast_ns();
    182}
    183
    184const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
    185	.func		= bpf_ktime_get_boot_ns,
    186	.gpl_only	= false,
    187	.ret_type	= RET_INTEGER,
    188};
    189
    190BPF_CALL_0(bpf_ktime_get_coarse_ns)
    191{
    192	return ktime_get_coarse_ns();
    193}
    194
    195const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
    196	.func		= bpf_ktime_get_coarse_ns,
    197	.gpl_only	= false,
    198	.ret_type	= RET_INTEGER,
    199};
    200
    201BPF_CALL_0(bpf_get_current_pid_tgid)
    202{
    203	struct task_struct *task = current;
    204
    205	if (unlikely(!task))
    206		return -EINVAL;
    207
    208	return (u64) task->tgid << 32 | task->pid;
    209}
    210
    211const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
    212	.func		= bpf_get_current_pid_tgid,
    213	.gpl_only	= false,
    214	.ret_type	= RET_INTEGER,
    215};
    216
    217BPF_CALL_0(bpf_get_current_uid_gid)
    218{
    219	struct task_struct *task = current;
    220	kuid_t uid;
    221	kgid_t gid;
    222
    223	if (unlikely(!task))
    224		return -EINVAL;
    225
    226	current_uid_gid(&uid, &gid);
    227	return (u64) from_kgid(&init_user_ns, gid) << 32 |
    228		     from_kuid(&init_user_ns, uid);
    229}
    230
    231const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
    232	.func		= bpf_get_current_uid_gid,
    233	.gpl_only	= false,
    234	.ret_type	= RET_INTEGER,
    235};
    236
    237BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
    238{
    239	struct task_struct *task = current;
    240
    241	if (unlikely(!task))
    242		goto err_clear;
    243
    244	/* Verifier guarantees that size > 0 */
    245	strscpy(buf, task->comm, size);
    246	return 0;
    247err_clear:
    248	memset(buf, 0, size);
    249	return -EINVAL;
    250}
    251
    252const struct bpf_func_proto bpf_get_current_comm_proto = {
    253	.func		= bpf_get_current_comm,
    254	.gpl_only	= false,
    255	.ret_type	= RET_INTEGER,
    256	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    257	.arg2_type	= ARG_CONST_SIZE,
    258};
    259
    260#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
    261
    262static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
    263{
    264	arch_spinlock_t *l = (void *)lock;
    265	union {
    266		__u32 val;
    267		arch_spinlock_t lock;
    268	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
    269
    270	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
    271	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
    272	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
    273	arch_spin_lock(l);
    274}
    275
    276static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
    277{
    278	arch_spinlock_t *l = (void *)lock;
    279
    280	arch_spin_unlock(l);
    281}
    282
    283#else
    284
    285static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
    286{
    287	atomic_t *l = (void *)lock;
    288
    289	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
    290	do {
    291		atomic_cond_read_relaxed(l, !VAL);
    292	} while (atomic_xchg(l, 1));
    293}
    294
    295static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
    296{
    297	atomic_t *l = (void *)lock;
    298
    299	atomic_set_release(l, 0);
    300}
    301
    302#endif
    303
    304static DEFINE_PER_CPU(unsigned long, irqsave_flags);
    305
    306static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
    307{
    308	unsigned long flags;
    309
    310	local_irq_save(flags);
    311	__bpf_spin_lock(lock);
    312	__this_cpu_write(irqsave_flags, flags);
    313}
    314
    315notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
    316{
    317	__bpf_spin_lock_irqsave(lock);
    318	return 0;
    319}
    320
    321const struct bpf_func_proto bpf_spin_lock_proto = {
    322	.func		= bpf_spin_lock,
    323	.gpl_only	= false,
    324	.ret_type	= RET_VOID,
    325	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
    326};
    327
    328static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
    329{
    330	unsigned long flags;
    331
    332	flags = __this_cpu_read(irqsave_flags);
    333	__bpf_spin_unlock(lock);
    334	local_irq_restore(flags);
    335}
    336
    337notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
    338{
    339	__bpf_spin_unlock_irqrestore(lock);
    340	return 0;
    341}
    342
    343const struct bpf_func_proto bpf_spin_unlock_proto = {
    344	.func		= bpf_spin_unlock,
    345	.gpl_only	= false,
    346	.ret_type	= RET_VOID,
    347	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
    348};
    349
    350void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
    351			   bool lock_src)
    352{
    353	struct bpf_spin_lock *lock;
    354
    355	if (lock_src)
    356		lock = src + map->spin_lock_off;
    357	else
    358		lock = dst + map->spin_lock_off;
    359	preempt_disable();
    360	__bpf_spin_lock_irqsave(lock);
    361	copy_map_value(map, dst, src);
    362	__bpf_spin_unlock_irqrestore(lock);
    363	preempt_enable();
    364}
    365
    366BPF_CALL_0(bpf_jiffies64)
    367{
    368	return get_jiffies_64();
    369}
    370
    371const struct bpf_func_proto bpf_jiffies64_proto = {
    372	.func		= bpf_jiffies64,
    373	.gpl_only	= false,
    374	.ret_type	= RET_INTEGER,
    375};
    376
    377#ifdef CONFIG_CGROUPS
    378BPF_CALL_0(bpf_get_current_cgroup_id)
    379{
    380	struct cgroup *cgrp;
    381	u64 cgrp_id;
    382
    383	rcu_read_lock();
    384	cgrp = task_dfl_cgroup(current);
    385	cgrp_id = cgroup_id(cgrp);
    386	rcu_read_unlock();
    387
    388	return cgrp_id;
    389}
    390
    391const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
    392	.func		= bpf_get_current_cgroup_id,
    393	.gpl_only	= false,
    394	.ret_type	= RET_INTEGER,
    395};
    396
    397BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
    398{
    399	struct cgroup *cgrp;
    400	struct cgroup *ancestor;
    401	u64 cgrp_id;
    402
    403	rcu_read_lock();
    404	cgrp = task_dfl_cgroup(current);
    405	ancestor = cgroup_ancestor(cgrp, ancestor_level);
    406	cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
    407	rcu_read_unlock();
    408
    409	return cgrp_id;
    410}
    411
    412const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
    413	.func		= bpf_get_current_ancestor_cgroup_id,
    414	.gpl_only	= false,
    415	.ret_type	= RET_INTEGER,
    416	.arg1_type	= ARG_ANYTHING,
    417};
    418
    419#ifdef CONFIG_CGROUP_BPF
    420
    421BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
    422{
    423	/* flags argument is not used now,
    424	 * but provides an ability to extend the API.
    425	 * verifier checks that its value is correct.
    426	 */
    427	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
    428	struct bpf_cgroup_storage *storage;
    429	struct bpf_cg_run_ctx *ctx;
    430	void *ptr;
    431
    432	/* get current cgroup storage from BPF run context */
    433	ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
    434	storage = ctx->prog_item->cgroup_storage[stype];
    435
    436	if (stype == BPF_CGROUP_STORAGE_SHARED)
    437		ptr = &READ_ONCE(storage->buf)->data[0];
    438	else
    439		ptr = this_cpu_ptr(storage->percpu_buf);
    440
    441	return (unsigned long)ptr;
    442}
    443
    444const struct bpf_func_proto bpf_get_local_storage_proto = {
    445	.func		= bpf_get_local_storage,
    446	.gpl_only	= false,
    447	.ret_type	= RET_PTR_TO_MAP_VALUE,
    448	.arg1_type	= ARG_CONST_MAP_PTR,
    449	.arg2_type	= ARG_ANYTHING,
    450};
    451#endif
    452
    453#define BPF_STRTOX_BASE_MASK 0x1F
    454
    455static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
    456			  unsigned long long *res, bool *is_negative)
    457{
    458	unsigned int base = flags & BPF_STRTOX_BASE_MASK;
    459	const char *cur_buf = buf;
    460	size_t cur_len = buf_len;
    461	unsigned int consumed;
    462	size_t val_len;
    463	char str[64];
    464
    465	if (!buf || !buf_len || !res || !is_negative)
    466		return -EINVAL;
    467
    468	if (base != 0 && base != 8 && base != 10 && base != 16)
    469		return -EINVAL;
    470
    471	if (flags & ~BPF_STRTOX_BASE_MASK)
    472		return -EINVAL;
    473
    474	while (cur_buf < buf + buf_len && isspace(*cur_buf))
    475		++cur_buf;
    476
    477	*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
    478	if (*is_negative)
    479		++cur_buf;
    480
    481	consumed = cur_buf - buf;
    482	cur_len -= consumed;
    483	if (!cur_len)
    484		return -EINVAL;
    485
    486	cur_len = min(cur_len, sizeof(str) - 1);
    487	memcpy(str, cur_buf, cur_len);
    488	str[cur_len] = '\0';
    489	cur_buf = str;
    490
    491	cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
    492	val_len = _parse_integer(cur_buf, base, res);
    493
    494	if (val_len & KSTRTOX_OVERFLOW)
    495		return -ERANGE;
    496
    497	if (val_len == 0)
    498		return -EINVAL;
    499
    500	cur_buf += val_len;
    501	consumed += cur_buf - str;
    502
    503	return consumed;
    504}
    505
    506static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
    507			 long long *res)
    508{
    509	unsigned long long _res;
    510	bool is_negative;
    511	int err;
    512
    513	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
    514	if (err < 0)
    515		return err;
    516	if (is_negative) {
    517		if ((long long)-_res > 0)
    518			return -ERANGE;
    519		*res = -_res;
    520	} else {
    521		if ((long long)_res < 0)
    522			return -ERANGE;
    523		*res = _res;
    524	}
    525	return err;
    526}
    527
    528BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
    529	   long *, res)
    530{
    531	long long _res;
    532	int err;
    533
    534	err = __bpf_strtoll(buf, buf_len, flags, &_res);
    535	if (err < 0)
    536		return err;
    537	if (_res != (long)_res)
    538		return -ERANGE;
    539	*res = _res;
    540	return err;
    541}
    542
    543const struct bpf_func_proto bpf_strtol_proto = {
    544	.func		= bpf_strtol,
    545	.gpl_only	= false,
    546	.ret_type	= RET_INTEGER,
    547	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    548	.arg2_type	= ARG_CONST_SIZE,
    549	.arg3_type	= ARG_ANYTHING,
    550	.arg4_type	= ARG_PTR_TO_LONG,
    551};
    552
    553BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
    554	   unsigned long *, res)
    555{
    556	unsigned long long _res;
    557	bool is_negative;
    558	int err;
    559
    560	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
    561	if (err < 0)
    562		return err;
    563	if (is_negative)
    564		return -EINVAL;
    565	if (_res != (unsigned long)_res)
    566		return -ERANGE;
    567	*res = _res;
    568	return err;
    569}
    570
    571const struct bpf_func_proto bpf_strtoul_proto = {
    572	.func		= bpf_strtoul,
    573	.gpl_only	= false,
    574	.ret_type	= RET_INTEGER,
    575	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
    576	.arg2_type	= ARG_CONST_SIZE,
    577	.arg3_type	= ARG_ANYTHING,
    578	.arg4_type	= ARG_PTR_TO_LONG,
    579};
    580#endif
    581
    582BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
    583{
    584	return strncmp(s1, s2, s1_sz);
    585}
    586
    587const struct bpf_func_proto bpf_strncmp_proto = {
    588	.func		= bpf_strncmp,
    589	.gpl_only	= false,
    590	.ret_type	= RET_INTEGER,
    591	.arg1_type	= ARG_PTR_TO_MEM,
    592	.arg2_type	= ARG_CONST_SIZE,
    593	.arg3_type	= ARG_PTR_TO_CONST_STR,
    594};
    595
    596BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
    597	   struct bpf_pidns_info *, nsdata, u32, size)
    598{
    599	struct task_struct *task = current;
    600	struct pid_namespace *pidns;
    601	int err = -EINVAL;
    602
    603	if (unlikely(size != sizeof(struct bpf_pidns_info)))
    604		goto clear;
    605
    606	if (unlikely((u64)(dev_t)dev != dev))
    607		goto clear;
    608
    609	if (unlikely(!task))
    610		goto clear;
    611
    612	pidns = task_active_pid_ns(task);
    613	if (unlikely(!pidns)) {
    614		err = -ENOENT;
    615		goto clear;
    616	}
    617
    618	if (!ns_match(&pidns->ns, (dev_t)dev, ino))
    619		goto clear;
    620
    621	nsdata->pid = task_pid_nr_ns(task, pidns);
    622	nsdata->tgid = task_tgid_nr_ns(task, pidns);
    623	return 0;
    624clear:
    625	memset((void *)nsdata, 0, (size_t) size);
    626	return err;
    627}
    628
    629const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
    630	.func		= bpf_get_ns_current_pid_tgid,
    631	.gpl_only	= false,
    632	.ret_type	= RET_INTEGER,
    633	.arg1_type	= ARG_ANYTHING,
    634	.arg2_type	= ARG_ANYTHING,
    635	.arg3_type      = ARG_PTR_TO_UNINIT_MEM,
    636	.arg4_type      = ARG_CONST_SIZE,
    637};
    638
    639static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
    640	.func		= bpf_get_raw_cpu_id,
    641	.gpl_only	= false,
    642	.ret_type	= RET_INTEGER,
    643};
    644
    645BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
    646	   u64, flags, void *, data, u64, size)
    647{
    648	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
    649		return -EINVAL;
    650
    651	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
    652}
    653
    654const struct bpf_func_proto bpf_event_output_data_proto =  {
    655	.func		= bpf_event_output_data,
    656	.gpl_only       = true,
    657	.ret_type       = RET_INTEGER,
    658	.arg1_type      = ARG_PTR_TO_CTX,
    659	.arg2_type      = ARG_CONST_MAP_PTR,
    660	.arg3_type      = ARG_ANYTHING,
    661	.arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
    662	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
    663};
    664
    665BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
    666	   const void __user *, user_ptr)
    667{
    668	int ret = copy_from_user(dst, user_ptr, size);
    669
    670	if (unlikely(ret)) {
    671		memset(dst, 0, size);
    672		ret = -EFAULT;
    673	}
    674
    675	return ret;
    676}
    677
    678const struct bpf_func_proto bpf_copy_from_user_proto = {
    679	.func		= bpf_copy_from_user,
    680	.gpl_only	= false,
    681	.ret_type	= RET_INTEGER,
    682	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    683	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    684	.arg3_type	= ARG_ANYTHING,
    685};
    686
    687BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
    688	   const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
    689{
    690	int ret;
    691
    692	/* flags is not used yet */
    693	if (unlikely(flags))
    694		return -EINVAL;
    695
    696	if (unlikely(!size))
    697		return 0;
    698
    699	ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
    700	if (ret == size)
    701		return 0;
    702
    703	memset(dst, 0, size);
    704	/* Return -EFAULT for partial read */
    705	return ret < 0 ? ret : -EFAULT;
    706}
    707
    708const struct bpf_func_proto bpf_copy_from_user_task_proto = {
    709	.func		= bpf_copy_from_user_task,
    710	.gpl_only	= true,
    711	.ret_type	= RET_INTEGER,
    712	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
    713	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
    714	.arg3_type	= ARG_ANYTHING,
    715	.arg4_type	= ARG_PTR_TO_BTF_ID,
    716	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
    717	.arg5_type	= ARG_ANYTHING
    718};
    719
    720BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
    721{
    722	if (cpu >= nr_cpu_ids)
    723		return (unsigned long)NULL;
    724
    725	return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
    726}
    727
    728const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
    729	.func		= bpf_per_cpu_ptr,
    730	.gpl_only	= false,
    731	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
    732	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
    733	.arg2_type	= ARG_ANYTHING,
    734};
    735
    736BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
    737{
    738	return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
    739}
    740
    741const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
    742	.func		= bpf_this_cpu_ptr,
    743	.gpl_only	= false,
    744	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
    745	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
    746};
    747
    748static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
    749		size_t bufsz)
    750{
    751	void __user *user_ptr = (__force void __user *)unsafe_ptr;
    752
    753	buf[0] = 0;
    754
    755	switch (fmt_ptype) {
    756	case 's':
    757#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
    758		if ((unsigned long)unsafe_ptr < TASK_SIZE)
    759			return strncpy_from_user_nofault(buf, user_ptr, bufsz);
    760		fallthrough;
    761#endif
    762	case 'k':
    763		return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
    764	case 'u':
    765		return strncpy_from_user_nofault(buf, user_ptr, bufsz);
    766	}
    767
    768	return -EINVAL;
    769}
    770
    771/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
    772 * arguments representation.
    773 */
    774#define MAX_BPRINTF_BUF_LEN	512
    775
    776/* Support executing three nested bprintf helper calls on a given CPU */
    777#define MAX_BPRINTF_NEST_LEVEL	3
    778struct bpf_bprintf_buffers {
    779	char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
    780};
    781static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
    782static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
    783
    784static int try_get_fmt_tmp_buf(char **tmp_buf)
    785{
    786	struct bpf_bprintf_buffers *bufs;
    787	int nest_level;
    788
    789	preempt_disable();
    790	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
    791	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
    792		this_cpu_dec(bpf_bprintf_nest_level);
    793		preempt_enable();
    794		return -EBUSY;
    795	}
    796	bufs = this_cpu_ptr(&bpf_bprintf_bufs);
    797	*tmp_buf = bufs->tmp_bufs[nest_level - 1];
    798
    799	return 0;
    800}
    801
    802void bpf_bprintf_cleanup(void)
    803{
    804	if (this_cpu_read(bpf_bprintf_nest_level)) {
    805		this_cpu_dec(bpf_bprintf_nest_level);
    806		preempt_enable();
    807	}
    808}
    809
    810/*
    811 * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
    812 *
    813 * Returns a negative value if fmt is an invalid format string or 0 otherwise.
    814 *
    815 * This can be used in two ways:
    816 * - Format string verification only: when bin_args is NULL
    817 * - Arguments preparation: in addition to the above verification, it writes in
    818 *   bin_args a binary representation of arguments usable by bstr_printf where
    819 *   pointers from BPF have been sanitized.
    820 *
    821 * In argument preparation mode, if 0 is returned, safe temporary buffers are
    822 * allocated and bpf_bprintf_cleanup should be called to free them after use.
    823 */
    824int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
    825			u32 **bin_args, u32 num_args)
    826{
    827	char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
    828	size_t sizeof_cur_arg, sizeof_cur_ip;
    829	int err, i, num_spec = 0;
    830	u64 cur_arg;
    831	char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
    832
    833	fmt_end = strnchr(fmt, fmt_size, 0);
    834	if (!fmt_end)
    835		return -EINVAL;
    836	fmt_size = fmt_end - fmt;
    837
    838	if (bin_args) {
    839		if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
    840			return -EBUSY;
    841
    842		tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
    843		*bin_args = (u32 *)tmp_buf;
    844	}
    845
    846	for (i = 0; i < fmt_size; i++) {
    847		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
    848			err = -EINVAL;
    849			goto out;
    850		}
    851
    852		if (fmt[i] != '%')
    853			continue;
    854
    855		if (fmt[i + 1] == '%') {
    856			i++;
    857			continue;
    858		}
    859
    860		if (num_spec >= num_args) {
    861			err = -EINVAL;
    862			goto out;
    863		}
    864
    865		/* The string is zero-terminated so if fmt[i] != 0, we can
    866		 * always access fmt[i + 1], in the worst case it will be a 0
    867		 */
    868		i++;
    869
    870		/* skip optional "[0 +-][num]" width formatting field */
    871		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
    872		       fmt[i] == ' ')
    873			i++;
    874		if (fmt[i] >= '1' && fmt[i] <= '9') {
    875			i++;
    876			while (fmt[i] >= '0' && fmt[i] <= '9')
    877				i++;
    878		}
    879
    880		if (fmt[i] == 'p') {
    881			sizeof_cur_arg = sizeof(long);
    882
    883			if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
    884			    fmt[i + 2] == 's') {
    885				fmt_ptype = fmt[i + 1];
    886				i += 2;
    887				goto fmt_str;
    888			}
    889
    890			if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
    891			    ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
    892			    fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
    893			    fmt[i + 1] == 'S') {
    894				/* just kernel pointers */
    895				if (tmp_buf)
    896					cur_arg = raw_args[num_spec];
    897				i++;
    898				goto nocopy_fmt;
    899			}
    900
    901			if (fmt[i + 1] == 'B') {
    902				if (tmp_buf)  {
    903					err = snprintf(tmp_buf,
    904						       (tmp_buf_end - tmp_buf),
    905						       "%pB",
    906						       (void *)(long)raw_args[num_spec]);
    907					tmp_buf += (err + 1);
    908				}
    909
    910				i++;
    911				num_spec++;
    912				continue;
    913			}
    914
    915			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
    916			if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
    917			    (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
    918				err = -EINVAL;
    919				goto out;
    920			}
    921
    922			i += 2;
    923			if (!tmp_buf)
    924				goto nocopy_fmt;
    925
    926			sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
    927			if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
    928				err = -ENOSPC;
    929				goto out;
    930			}
    931
    932			unsafe_ptr = (char *)(long)raw_args[num_spec];
    933			err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
    934						       sizeof_cur_ip);
    935			if (err < 0)
    936				memset(cur_ip, 0, sizeof_cur_ip);
    937
    938			/* hack: bstr_printf expects IP addresses to be
    939			 * pre-formatted as strings, ironically, the easiest way
    940			 * to do that is to call snprintf.
    941			 */
    942			ip_spec[2] = fmt[i - 1];
    943			ip_spec[3] = fmt[i];
    944			err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
    945				       ip_spec, &cur_ip);
    946
    947			tmp_buf += err + 1;
    948			num_spec++;
    949
    950			continue;
    951		} else if (fmt[i] == 's') {
    952			fmt_ptype = fmt[i];
    953fmt_str:
    954			if (fmt[i + 1] != 0 &&
    955			    !isspace(fmt[i + 1]) &&
    956			    !ispunct(fmt[i + 1])) {
    957				err = -EINVAL;
    958				goto out;
    959			}
    960
    961			if (!tmp_buf)
    962				goto nocopy_fmt;
    963
    964			if (tmp_buf_end == tmp_buf) {
    965				err = -ENOSPC;
    966				goto out;
    967			}
    968
    969			unsafe_ptr = (char *)(long)raw_args[num_spec];
    970			err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
    971						    fmt_ptype,
    972						    tmp_buf_end - tmp_buf);
    973			if (err < 0) {
    974				tmp_buf[0] = '\0';
    975				err = 1;
    976			}
    977
    978			tmp_buf += err;
    979			num_spec++;
    980
    981			continue;
    982		} else if (fmt[i] == 'c') {
    983			if (!tmp_buf)
    984				goto nocopy_fmt;
    985
    986			if (tmp_buf_end == tmp_buf) {
    987				err = -ENOSPC;
    988				goto out;
    989			}
    990
    991			*tmp_buf = raw_args[num_spec];
    992			tmp_buf++;
    993			num_spec++;
    994
    995			continue;
    996		}
    997
    998		sizeof_cur_arg = sizeof(int);
    999
   1000		if (fmt[i] == 'l') {
   1001			sizeof_cur_arg = sizeof(long);
   1002			i++;
   1003		}
   1004		if (fmt[i] == 'l') {
   1005			sizeof_cur_arg = sizeof(long long);
   1006			i++;
   1007		}
   1008
   1009		if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
   1010		    fmt[i] != 'x' && fmt[i] != 'X') {
   1011			err = -EINVAL;
   1012			goto out;
   1013		}
   1014
   1015		if (tmp_buf)
   1016			cur_arg = raw_args[num_spec];
   1017nocopy_fmt:
   1018		if (tmp_buf) {
   1019			tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
   1020			if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
   1021				err = -ENOSPC;
   1022				goto out;
   1023			}
   1024
   1025			if (sizeof_cur_arg == 8) {
   1026				*(u32 *)tmp_buf = *(u32 *)&cur_arg;
   1027				*(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
   1028			} else {
   1029				*(u32 *)tmp_buf = (u32)(long)cur_arg;
   1030			}
   1031			tmp_buf += sizeof_cur_arg;
   1032		}
   1033		num_spec++;
   1034	}
   1035
   1036	err = 0;
   1037out:
   1038	if (err)
   1039		bpf_bprintf_cleanup();
   1040	return err;
   1041}
   1042
   1043BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
   1044	   const void *, data, u32, data_len)
   1045{
   1046	int err, num_args;
   1047	u32 *bin_args;
   1048
   1049	if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
   1050	    (data_len && !data))
   1051		return -EINVAL;
   1052	num_args = data_len / 8;
   1053
   1054	/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
   1055	 * can safely give an unbounded size.
   1056	 */
   1057	err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
   1058	if (err < 0)
   1059		return err;
   1060
   1061	err = bstr_printf(str, str_size, fmt, bin_args);
   1062
   1063	bpf_bprintf_cleanup();
   1064
   1065	return err + 1;
   1066}
   1067
   1068const struct bpf_func_proto bpf_snprintf_proto = {
   1069	.func		= bpf_snprintf,
   1070	.gpl_only	= true,
   1071	.ret_type	= RET_INTEGER,
   1072	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
   1073	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
   1074	.arg3_type	= ARG_PTR_TO_CONST_STR,
   1075	.arg4_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
   1076	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
   1077};
   1078
   1079/* BPF map elements can contain 'struct bpf_timer'.
   1080 * Such map owns all of its BPF timers.
   1081 * 'struct bpf_timer' is allocated as part of map element allocation
   1082 * and it's zero initialized.
   1083 * That space is used to keep 'struct bpf_timer_kern'.
   1084 * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
   1085 * remembers 'struct bpf_map *' pointer it's part of.
   1086 * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
   1087 * bpf_timer_start() arms the timer.
   1088 * If user space reference to a map goes to zero at this point
   1089 * ops->map_release_uref callback is responsible for cancelling the timers,
   1090 * freeing their memory, and decrementing prog's refcnts.
   1091 * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
   1092 * Inner maps can contain bpf timers as well. ops->map_release_uref is
   1093 * freeing the timers when inner map is replaced or deleted by user space.
   1094 */
   1095struct bpf_hrtimer {
   1096	struct hrtimer timer;
   1097	struct bpf_map *map;
   1098	struct bpf_prog *prog;
   1099	void __rcu *callback_fn;
   1100	void *value;
   1101};
   1102
   1103/* the actual struct hidden inside uapi struct bpf_timer */
   1104struct bpf_timer_kern {
   1105	struct bpf_hrtimer *timer;
   1106	/* bpf_spin_lock is used here instead of spinlock_t to make
   1107	 * sure that it always fits into space reserved by struct bpf_timer
   1108	 * regardless of LOCKDEP and spinlock debug flags.
   1109	 */
   1110	struct bpf_spin_lock lock;
   1111} __attribute__((aligned(8)));
   1112
   1113static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
   1114
   1115static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
   1116{
   1117	struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
   1118	struct bpf_map *map = t->map;
   1119	void *value = t->value;
   1120	bpf_callback_t callback_fn;
   1121	void *key;
   1122	u32 idx;
   1123
   1124	BTF_TYPE_EMIT(struct bpf_timer);
   1125	callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
   1126	if (!callback_fn)
   1127		goto out;
   1128
   1129	/* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
   1130	 * cannot be preempted by another bpf_timer_cb() on the same cpu.
   1131	 * Remember the timer this callback is servicing to prevent
   1132	 * deadlock if callback_fn() calls bpf_timer_cancel() or
   1133	 * bpf_map_delete_elem() on the same timer.
   1134	 */
   1135	this_cpu_write(hrtimer_running, t);
   1136	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
   1137		struct bpf_array *array = container_of(map, struct bpf_array, map);
   1138
   1139		/* compute the key */
   1140		idx = ((char *)value - array->value) / array->elem_size;
   1141		key = &idx;
   1142	} else { /* hash or lru */
   1143		key = value - round_up(map->key_size, 8);
   1144	}
   1145
   1146	callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
   1147	/* The verifier checked that return value is zero. */
   1148
   1149	this_cpu_write(hrtimer_running, NULL);
   1150out:
   1151	return HRTIMER_NORESTART;
   1152}
   1153
   1154BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
   1155	   u64, flags)
   1156{
   1157	clockid_t clockid = flags & (MAX_CLOCKS - 1);
   1158	struct bpf_hrtimer *t;
   1159	int ret = 0;
   1160
   1161	BUILD_BUG_ON(MAX_CLOCKS != 16);
   1162	BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
   1163	BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
   1164
   1165	if (in_nmi())
   1166		return -EOPNOTSUPP;
   1167
   1168	if (flags >= MAX_CLOCKS ||
   1169	    /* similar to timerfd except _ALARM variants are not supported */
   1170	    (clockid != CLOCK_MONOTONIC &&
   1171	     clockid != CLOCK_REALTIME &&
   1172	     clockid != CLOCK_BOOTTIME))
   1173		return -EINVAL;
   1174	__bpf_spin_lock_irqsave(&timer->lock);
   1175	t = timer->timer;
   1176	if (t) {
   1177		ret = -EBUSY;
   1178		goto out;
   1179	}
   1180	if (!atomic64_read(&map->usercnt)) {
   1181		/* maps with timers must be either held by user space
   1182		 * or pinned in bpffs.
   1183		 */
   1184		ret = -EPERM;
   1185		goto out;
   1186	}
   1187	/* allocate hrtimer via map_kmalloc to use memcg accounting */
   1188	t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
   1189	if (!t) {
   1190		ret = -ENOMEM;
   1191		goto out;
   1192	}
   1193	t->value = (void *)timer - map->timer_off;
   1194	t->map = map;
   1195	t->prog = NULL;
   1196	rcu_assign_pointer(t->callback_fn, NULL);
   1197	hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
   1198	t->timer.function = bpf_timer_cb;
   1199	timer->timer = t;
   1200out:
   1201	__bpf_spin_unlock_irqrestore(&timer->lock);
   1202	return ret;
   1203}
   1204
   1205static const struct bpf_func_proto bpf_timer_init_proto = {
   1206	.func		= bpf_timer_init,
   1207	.gpl_only	= true,
   1208	.ret_type	= RET_INTEGER,
   1209	.arg1_type	= ARG_PTR_TO_TIMER,
   1210	.arg2_type	= ARG_CONST_MAP_PTR,
   1211	.arg3_type	= ARG_ANYTHING,
   1212};
   1213
   1214BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
   1215	   struct bpf_prog_aux *, aux)
   1216{
   1217	struct bpf_prog *prev, *prog = aux->prog;
   1218	struct bpf_hrtimer *t;
   1219	int ret = 0;
   1220
   1221	if (in_nmi())
   1222		return -EOPNOTSUPP;
   1223	__bpf_spin_lock_irqsave(&timer->lock);
   1224	t = timer->timer;
   1225	if (!t) {
   1226		ret = -EINVAL;
   1227		goto out;
   1228	}
   1229	if (!atomic64_read(&t->map->usercnt)) {
   1230		/* maps with timers must be either held by user space
   1231		 * or pinned in bpffs. Otherwise timer might still be
   1232		 * running even when bpf prog is detached and user space
   1233		 * is gone, since map_release_uref won't ever be called.
   1234		 */
   1235		ret = -EPERM;
   1236		goto out;
   1237	}
   1238	prev = t->prog;
   1239	if (prev != prog) {
   1240		/* Bump prog refcnt once. Every bpf_timer_set_callback()
   1241		 * can pick different callback_fn-s within the same prog.
   1242		 */
   1243		prog = bpf_prog_inc_not_zero(prog);
   1244		if (IS_ERR(prog)) {
   1245			ret = PTR_ERR(prog);
   1246			goto out;
   1247		}
   1248		if (prev)
   1249			/* Drop prev prog refcnt when swapping with new prog */
   1250			bpf_prog_put(prev);
   1251		t->prog = prog;
   1252	}
   1253	rcu_assign_pointer(t->callback_fn, callback_fn);
   1254out:
   1255	__bpf_spin_unlock_irqrestore(&timer->lock);
   1256	return ret;
   1257}
   1258
   1259static const struct bpf_func_proto bpf_timer_set_callback_proto = {
   1260	.func		= bpf_timer_set_callback,
   1261	.gpl_only	= true,
   1262	.ret_type	= RET_INTEGER,
   1263	.arg1_type	= ARG_PTR_TO_TIMER,
   1264	.arg2_type	= ARG_PTR_TO_FUNC,
   1265};
   1266
   1267BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
   1268{
   1269	struct bpf_hrtimer *t;
   1270	int ret = 0;
   1271
   1272	if (in_nmi())
   1273		return -EOPNOTSUPP;
   1274	if (flags)
   1275		return -EINVAL;
   1276	__bpf_spin_lock_irqsave(&timer->lock);
   1277	t = timer->timer;
   1278	if (!t || !t->prog) {
   1279		ret = -EINVAL;
   1280		goto out;
   1281	}
   1282	hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
   1283out:
   1284	__bpf_spin_unlock_irqrestore(&timer->lock);
   1285	return ret;
   1286}
   1287
   1288static const struct bpf_func_proto bpf_timer_start_proto = {
   1289	.func		= bpf_timer_start,
   1290	.gpl_only	= true,
   1291	.ret_type	= RET_INTEGER,
   1292	.arg1_type	= ARG_PTR_TO_TIMER,
   1293	.arg2_type	= ARG_ANYTHING,
   1294	.arg3_type	= ARG_ANYTHING,
   1295};
   1296
   1297static void drop_prog_refcnt(struct bpf_hrtimer *t)
   1298{
   1299	struct bpf_prog *prog = t->prog;
   1300
   1301	if (prog) {
   1302		bpf_prog_put(prog);
   1303		t->prog = NULL;
   1304		rcu_assign_pointer(t->callback_fn, NULL);
   1305	}
   1306}
   1307
   1308BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
   1309{
   1310	struct bpf_hrtimer *t;
   1311	int ret = 0;
   1312
   1313	if (in_nmi())
   1314		return -EOPNOTSUPP;
   1315	__bpf_spin_lock_irqsave(&timer->lock);
   1316	t = timer->timer;
   1317	if (!t) {
   1318		ret = -EINVAL;
   1319		goto out;
   1320	}
   1321	if (this_cpu_read(hrtimer_running) == t) {
   1322		/* If bpf callback_fn is trying to bpf_timer_cancel()
   1323		 * its own timer the hrtimer_cancel() will deadlock
   1324		 * since it waits for callback_fn to finish
   1325		 */
   1326		ret = -EDEADLK;
   1327		goto out;
   1328	}
   1329	drop_prog_refcnt(t);
   1330out:
   1331	__bpf_spin_unlock_irqrestore(&timer->lock);
   1332	/* Cancel the timer and wait for associated callback to finish
   1333	 * if it was running.
   1334	 */
   1335	ret = ret ?: hrtimer_cancel(&t->timer);
   1336	return ret;
   1337}
   1338
   1339static const struct bpf_func_proto bpf_timer_cancel_proto = {
   1340	.func		= bpf_timer_cancel,
   1341	.gpl_only	= true,
   1342	.ret_type	= RET_INTEGER,
   1343	.arg1_type	= ARG_PTR_TO_TIMER,
   1344};
   1345
   1346/* This function is called by map_delete/update_elem for individual element and
   1347 * by ops->map_release_uref when the user space reference to a map reaches zero.
   1348 */
   1349void bpf_timer_cancel_and_free(void *val)
   1350{
   1351	struct bpf_timer_kern *timer = val;
   1352	struct bpf_hrtimer *t;
   1353
   1354	/* Performance optimization: read timer->timer without lock first. */
   1355	if (!READ_ONCE(timer->timer))
   1356		return;
   1357
   1358	__bpf_spin_lock_irqsave(&timer->lock);
   1359	/* re-read it under lock */
   1360	t = timer->timer;
   1361	if (!t)
   1362		goto out;
   1363	drop_prog_refcnt(t);
   1364	/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
   1365	 * this timer, since it won't be initialized.
   1366	 */
   1367	timer->timer = NULL;
   1368out:
   1369	__bpf_spin_unlock_irqrestore(&timer->lock);
   1370	if (!t)
   1371		return;
   1372	/* Cancel the timer and wait for callback to complete if it was running.
   1373	 * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
   1374	 * right after for both preallocated and non-preallocated maps.
   1375	 * The timer->timer = NULL was already done and no code path can
   1376	 * see address 't' anymore.
   1377	 *
   1378	 * Check that bpf_map_delete/update_elem() wasn't called from timer
   1379	 * callback_fn. In such case don't call hrtimer_cancel() (since it will
   1380	 * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
   1381	 * return -1). Though callback_fn is still running on this cpu it's
   1382	 * safe to do kfree(t) because bpf_timer_cb() read everything it needed
   1383	 * from 't'. The bpf subprog callback_fn won't be able to access 't',
   1384	 * since timer->timer = NULL was already done. The timer will be
   1385	 * effectively cancelled because bpf_timer_cb() will return
   1386	 * HRTIMER_NORESTART.
   1387	 */
   1388	if (this_cpu_read(hrtimer_running) != t)
   1389		hrtimer_cancel(&t->timer);
   1390	kfree(t);
   1391}
   1392
   1393BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
   1394{
   1395	unsigned long *kptr = map_value;
   1396
   1397	return xchg(kptr, (unsigned long)ptr);
   1398}
   1399
   1400/* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg()
   1401 * helper is determined dynamically by the verifier.
   1402 */
   1403#define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
   1404
   1405const struct bpf_func_proto bpf_kptr_xchg_proto = {
   1406	.func         = bpf_kptr_xchg,
   1407	.gpl_only     = false,
   1408	.ret_type     = RET_PTR_TO_BTF_ID_OR_NULL,
   1409	.ret_btf_id   = BPF_PTR_POISON,
   1410	.arg1_type    = ARG_PTR_TO_KPTR,
   1411	.arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
   1412	.arg2_btf_id  = BPF_PTR_POISON,
   1413};
   1414
   1415/* Since the upper 8 bits of dynptr->size is reserved, the
   1416 * maximum supported size is 2^24 - 1.
   1417 */
   1418#define DYNPTR_MAX_SIZE	((1UL << 24) - 1)
   1419#define DYNPTR_TYPE_SHIFT	28
   1420#define DYNPTR_SIZE_MASK	0xFFFFFF
   1421#define DYNPTR_RDONLY_BIT	BIT(31)
   1422
   1423static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
   1424{
   1425	return ptr->size & DYNPTR_RDONLY_BIT;
   1426}
   1427
   1428static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
   1429{
   1430	ptr->size |= type << DYNPTR_TYPE_SHIFT;
   1431}
   1432
   1433static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
   1434{
   1435	return ptr->size & DYNPTR_SIZE_MASK;
   1436}
   1437
   1438int bpf_dynptr_check_size(u32 size)
   1439{
   1440	return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
   1441}
   1442
   1443void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
   1444		     enum bpf_dynptr_type type, u32 offset, u32 size)
   1445{
   1446	ptr->data = data;
   1447	ptr->offset = offset;
   1448	ptr->size = size;
   1449	bpf_dynptr_set_type(ptr, type);
   1450}
   1451
   1452void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
   1453{
   1454	memset(ptr, 0, sizeof(*ptr));
   1455}
   1456
   1457static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
   1458{
   1459	u32 size = bpf_dynptr_get_size(ptr);
   1460
   1461	if (len > size || offset > size - len)
   1462		return -E2BIG;
   1463
   1464	return 0;
   1465}
   1466
   1467BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
   1468{
   1469	int err;
   1470
   1471	err = bpf_dynptr_check_size(size);
   1472	if (err)
   1473		goto error;
   1474
   1475	/* flags is currently unsupported */
   1476	if (flags) {
   1477		err = -EINVAL;
   1478		goto error;
   1479	}
   1480
   1481	bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size);
   1482
   1483	return 0;
   1484
   1485error:
   1486	bpf_dynptr_set_null(ptr);
   1487	return err;
   1488}
   1489
   1490const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
   1491	.func		= bpf_dynptr_from_mem,
   1492	.gpl_only	= false,
   1493	.ret_type	= RET_INTEGER,
   1494	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
   1495	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
   1496	.arg3_type	= ARG_ANYTHING,
   1497	.arg4_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
   1498};
   1499
   1500BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset)
   1501{
   1502	int err;
   1503
   1504	if (!src->data)
   1505		return -EINVAL;
   1506
   1507	err = bpf_dynptr_check_off_len(src, offset, len);
   1508	if (err)
   1509		return err;
   1510
   1511	memcpy(dst, src->data + src->offset + offset, len);
   1512
   1513	return 0;
   1514}
   1515
   1516const struct bpf_func_proto bpf_dynptr_read_proto = {
   1517	.func		= bpf_dynptr_read,
   1518	.gpl_only	= false,
   1519	.ret_type	= RET_INTEGER,
   1520	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
   1521	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
   1522	.arg3_type	= ARG_PTR_TO_DYNPTR,
   1523	.arg4_type	= ARG_ANYTHING,
   1524};
   1525
   1526BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len)
   1527{
   1528	int err;
   1529
   1530	if (!dst->data || bpf_dynptr_is_rdonly(dst))
   1531		return -EINVAL;
   1532
   1533	err = bpf_dynptr_check_off_len(dst, offset, len);
   1534	if (err)
   1535		return err;
   1536
   1537	memcpy(dst->data + dst->offset + offset, src, len);
   1538
   1539	return 0;
   1540}
   1541
   1542const struct bpf_func_proto bpf_dynptr_write_proto = {
   1543	.func		= bpf_dynptr_write,
   1544	.gpl_only	= false,
   1545	.ret_type	= RET_INTEGER,
   1546	.arg1_type	= ARG_PTR_TO_DYNPTR,
   1547	.arg2_type	= ARG_ANYTHING,
   1548	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
   1549	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
   1550};
   1551
   1552BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
   1553{
   1554	int err;
   1555
   1556	if (!ptr->data)
   1557		return 0;
   1558
   1559	err = bpf_dynptr_check_off_len(ptr, offset, len);
   1560	if (err)
   1561		return 0;
   1562
   1563	if (bpf_dynptr_is_rdonly(ptr))
   1564		return 0;
   1565
   1566	return (unsigned long)(ptr->data + ptr->offset + offset);
   1567}
   1568
   1569const struct bpf_func_proto bpf_dynptr_data_proto = {
   1570	.func		= bpf_dynptr_data,
   1571	.gpl_only	= false,
   1572	.ret_type	= RET_PTR_TO_DYNPTR_MEM_OR_NULL,
   1573	.arg1_type	= ARG_PTR_TO_DYNPTR,
   1574	.arg2_type	= ARG_ANYTHING,
   1575	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
   1576};
   1577
   1578const struct bpf_func_proto bpf_get_current_task_proto __weak;
   1579const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
   1580const struct bpf_func_proto bpf_probe_read_user_proto __weak;
   1581const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
   1582const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
   1583const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
   1584const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
   1585
   1586const struct bpf_func_proto *
   1587bpf_base_func_proto(enum bpf_func_id func_id)
   1588{
   1589	switch (func_id) {
   1590	case BPF_FUNC_map_lookup_elem:
   1591		return &bpf_map_lookup_elem_proto;
   1592	case BPF_FUNC_map_update_elem:
   1593		return &bpf_map_update_elem_proto;
   1594	case BPF_FUNC_map_delete_elem:
   1595		return &bpf_map_delete_elem_proto;
   1596	case BPF_FUNC_map_push_elem:
   1597		return &bpf_map_push_elem_proto;
   1598	case BPF_FUNC_map_pop_elem:
   1599		return &bpf_map_pop_elem_proto;
   1600	case BPF_FUNC_map_peek_elem:
   1601		return &bpf_map_peek_elem_proto;
   1602	case BPF_FUNC_map_lookup_percpu_elem:
   1603		return &bpf_map_lookup_percpu_elem_proto;
   1604	case BPF_FUNC_get_prandom_u32:
   1605		return &bpf_get_prandom_u32_proto;
   1606	case BPF_FUNC_get_smp_processor_id:
   1607		return &bpf_get_raw_smp_processor_id_proto;
   1608	case BPF_FUNC_get_numa_node_id:
   1609		return &bpf_get_numa_node_id_proto;
   1610	case BPF_FUNC_tail_call:
   1611		return &bpf_tail_call_proto;
   1612	case BPF_FUNC_ktime_get_ns:
   1613		return &bpf_ktime_get_ns_proto;
   1614	case BPF_FUNC_ktime_get_boot_ns:
   1615		return &bpf_ktime_get_boot_ns_proto;
   1616	case BPF_FUNC_ringbuf_output:
   1617		return &bpf_ringbuf_output_proto;
   1618	case BPF_FUNC_ringbuf_reserve:
   1619		return &bpf_ringbuf_reserve_proto;
   1620	case BPF_FUNC_ringbuf_submit:
   1621		return &bpf_ringbuf_submit_proto;
   1622	case BPF_FUNC_ringbuf_discard:
   1623		return &bpf_ringbuf_discard_proto;
   1624	case BPF_FUNC_ringbuf_query:
   1625		return &bpf_ringbuf_query_proto;
   1626	case BPF_FUNC_ringbuf_reserve_dynptr:
   1627		return &bpf_ringbuf_reserve_dynptr_proto;
   1628	case BPF_FUNC_ringbuf_submit_dynptr:
   1629		return &bpf_ringbuf_submit_dynptr_proto;
   1630	case BPF_FUNC_ringbuf_discard_dynptr:
   1631		return &bpf_ringbuf_discard_dynptr_proto;
   1632	case BPF_FUNC_for_each_map_elem:
   1633		return &bpf_for_each_map_elem_proto;
   1634	case BPF_FUNC_loop:
   1635		return &bpf_loop_proto;
   1636	case BPF_FUNC_strncmp:
   1637		return &bpf_strncmp_proto;
   1638	case BPF_FUNC_dynptr_from_mem:
   1639		return &bpf_dynptr_from_mem_proto;
   1640	case BPF_FUNC_dynptr_read:
   1641		return &bpf_dynptr_read_proto;
   1642	case BPF_FUNC_dynptr_write:
   1643		return &bpf_dynptr_write_proto;
   1644	case BPF_FUNC_dynptr_data:
   1645		return &bpf_dynptr_data_proto;
   1646	default:
   1647		break;
   1648	}
   1649
   1650	if (!bpf_capable())
   1651		return NULL;
   1652
   1653	switch (func_id) {
   1654	case BPF_FUNC_spin_lock:
   1655		return &bpf_spin_lock_proto;
   1656	case BPF_FUNC_spin_unlock:
   1657		return &bpf_spin_unlock_proto;
   1658	case BPF_FUNC_jiffies64:
   1659		return &bpf_jiffies64_proto;
   1660	case BPF_FUNC_per_cpu_ptr:
   1661		return &bpf_per_cpu_ptr_proto;
   1662	case BPF_FUNC_this_cpu_ptr:
   1663		return &bpf_this_cpu_ptr_proto;
   1664	case BPF_FUNC_timer_init:
   1665		return &bpf_timer_init_proto;
   1666	case BPF_FUNC_timer_set_callback:
   1667		return &bpf_timer_set_callback_proto;
   1668	case BPF_FUNC_timer_start:
   1669		return &bpf_timer_start_proto;
   1670	case BPF_FUNC_timer_cancel:
   1671		return &bpf_timer_cancel_proto;
   1672	case BPF_FUNC_kptr_xchg:
   1673		return &bpf_kptr_xchg_proto;
   1674	default:
   1675		break;
   1676	}
   1677
   1678	if (!perfmon_capable())
   1679		return NULL;
   1680
   1681	switch (func_id) {
   1682	case BPF_FUNC_trace_printk:
   1683		return bpf_get_trace_printk_proto();
   1684	case BPF_FUNC_get_current_task:
   1685		return &bpf_get_current_task_proto;
   1686	case BPF_FUNC_get_current_task_btf:
   1687		return &bpf_get_current_task_btf_proto;
   1688	case BPF_FUNC_probe_read_user:
   1689		return &bpf_probe_read_user_proto;
   1690	case BPF_FUNC_probe_read_kernel:
   1691		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
   1692		       NULL : &bpf_probe_read_kernel_proto;
   1693	case BPF_FUNC_probe_read_user_str:
   1694		return &bpf_probe_read_user_str_proto;
   1695	case BPF_FUNC_probe_read_kernel_str:
   1696		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
   1697		       NULL : &bpf_probe_read_kernel_str_proto;
   1698	case BPF_FUNC_snprintf_btf:
   1699		return &bpf_snprintf_btf_proto;
   1700	case BPF_FUNC_snprintf:
   1701		return &bpf_snprintf_proto;
   1702	case BPF_FUNC_task_pt_regs:
   1703		return &bpf_task_pt_regs_proto;
   1704	case BPF_FUNC_trace_vprintk:
   1705		return bpf_get_trace_vprintk_proto();
   1706	default:
   1707		return NULL;
   1708	}
   1709}