summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf-cgroup.h8
-rw-r--r--include/linux/bpf.h231
-rw-r--r--include/linux/bpf_local_storage.h4
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/btf.h23
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/uapi/linux/bpf.h12
7 files changed, 134 insertions, 149 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 88a51b242adc..669d96d074ad 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -225,24 +225,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \
({ \
- u32 __unused_flags; \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- NULL, \
- &__unused_flags); \
+ NULL, NULL); \
__ret; \
})
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \
({ \
- u32 __unused_flags; \
int __ret = 0; \
if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
- t_ctx, \
- &__unused_flags); \
+ t_ctx, NULL); \
release_sock(sk); \
} \
__ret; \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bdb5298735ce..be94833d390a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
#include <linux/bpfptr.h>
+#include <linux/btf.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -147,14 +148,48 @@ struct bpf_map_ops {
bpf_callback_t callback_fn,
void *callback_ctx, u64 flags);
- /* BTF name and id of struct allocated by map_alloc */
- const char * const map_btf_name;
+ /* BTF id of struct allocated by map_alloc */
int *map_btf_id;
/* bpf_iter info used to open a seq_file */
const struct bpf_iter_seq_info *iter_seq_info;
};
+enum {
+ /* Support at most 8 pointers in a BPF map value */
+ BPF_MAP_VALUE_OFF_MAX = 8,
+ BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX +
+ 1 + /* for bpf_spin_lock */
+ 1, /* for bpf_timer */
+};
+
+enum bpf_kptr_type {
+ BPF_KPTR_UNREF,
+ BPF_KPTR_REF,
+};
+
+struct bpf_map_value_off_desc {
+ u32 offset;
+ enum bpf_kptr_type type;
+ struct {
+ struct btf *btf;
+ struct module *module;
+ btf_dtor_kfunc_t dtor;
+ u32 btf_id;
+ } kptr;
+};
+
+struct bpf_map_value_off {
+ u32 nr_off;
+ struct bpf_map_value_off_desc off[];
+};
+
+struct bpf_map_off_arr {
+ u32 cnt;
+ u32 field_off[BPF_MAP_OFF_ARR_MAX];
+ u8 field_sz[BPF_MAP_OFF_ARR_MAX];
+};
+
struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
@@ -171,6 +206,7 @@ struct bpf_map {
u64 map_extra; /* any per-map-type extra fields */
u32 map_flags;
int spin_lock_off; /* >=0 valid offset, <0 error */
+ struct bpf_map_value_off *kptr_off_tab;
int timer_off; /* >=0 valid offset, <0 error */
u32 id;
int numa_node;
@@ -182,10 +218,7 @@ struct bpf_map {
struct mem_cgroup *memcg;
#endif
char name[BPF_OBJ_NAME_LEN];
- bool bypass_spec_v1;
- bool frozen; /* write-once; write-protected by freeze_mutex */
- /* 14 bytes hole */
-
+ struct bpf_map_off_arr *off_arr;
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
@@ -205,6 +238,8 @@ struct bpf_map {
bool jited;
bool xdp_has_frags;
} owner;
+ bool bypass_spec_v1;
+ bool frozen; /* write-once; write-protected by freeze_mutex */
};
static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -217,43 +252,44 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
return map->timer_off >= 0;
}
+static inline bool map_value_has_kptrs(const struct bpf_map *map)
+{
+ return !IS_ERR_OR_NULL(map->kptr_off_tab);
+}
+
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
if (unlikely(map_value_has_spin_lock(map)))
memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
if (unlikely(map_value_has_timer(map)))
memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
+ if (unlikely(map_value_has_kptrs(map))) {
+ struct bpf_map_value_off *tab = map->kptr_off_tab;
+ int i;
+
+ for (i = 0; i < tab->nr_off; i++)
+ *(u64 *)(dst + tab->off[i].offset) = 0;
+ }
}
/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
- u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
+ u32 curr_off = 0;
+ int i;
- if (unlikely(map_value_has_spin_lock(map))) {
- s_off = map->spin_lock_off;
- s_sz = sizeof(struct bpf_spin_lock);
- }
- if (unlikely(map_value_has_timer(map))) {
- t_off = map->timer_off;
- t_sz = sizeof(struct bpf_timer);
+ if (likely(!map->off_arr)) {
+ memcpy(dst, src, map->value_size);
+ return;
}
- if (unlikely(s_sz || t_sz)) {
- if (s_off < t_off || !s_sz) {
- swap(s_off, t_off);
- swap(s_sz, t_sz);
- }
- memcpy(dst, src, t_off);
- memcpy(dst + t_off + t_sz,
- src + t_off + t_sz,
- s_off - t_off - t_sz);
- memcpy(dst + s_off + s_sz,
- src + s_off + s_sz,
- map->value_size - s_off - s_sz);
- } else {
- memcpy(dst, src, map->value_size);
+ for (i = 0; i < map->off_arr->cnt; i++) {
+ u32 next_off = map->off_arr->field_off[i];
+
+ memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
+ curr_off += map->off_arr->field_sz[i];
}
+ memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off);
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
@@ -342,7 +378,18 @@ enum bpf_type_flag {
*/
MEM_PERCPU = BIT(4 + BPF_BASE_TYPE_BITS),
- __BPF_TYPE_LAST_FLAG = MEM_PERCPU,
+ /* Indicates that the argument will be released. */
+ OBJ_RELEASE = BIT(5 + BPF_BASE_TYPE_BITS),
+
+ /* PTR is not trusted. This is only used with PTR_TO_BTF_ID, to mark
+ * unreferenced and referenced kptr loaded from map value using a load
+ * instruction, so that they can only be dereferenced but not escape the
+ * BPF program into the kernel (i.e. cannot be passed as arguments to
+ * kfunc or bpf helpers).
+ */
+ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS),
+
+ __BPF_TYPE_LAST_FLAG = PTR_UNTRUSTED,
};
/* Max number of base types. */
@@ -391,6 +438,7 @@ enum bpf_arg_type {
ARG_PTR_TO_STACK, /* pointer to stack */
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
+ ARG_PTR_TO_KPTR, /* pointer to referenced kptr */
__BPF_ARG_TYPE_MAX,
/* Extended arg_types. */
@@ -400,6 +448,7 @@ enum bpf_arg_type {
ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
+ ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
@@ -1221,7 +1270,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
- * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run);
+ * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run);
*
* the structure returned by bpf_prog_array_alloc() should be populated
* with program pointers and the last pointer must be NULL.
@@ -1315,83 +1364,22 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
-static __always_inline int
-BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
- const void *ctx, bpf_prog_run_fn run_prog,
- int retval, u32 *ret_flags)
-{
- const struct bpf_prog_array_item *item;
- const struct bpf_prog *prog;
- const struct bpf_prog_array *array;
- struct bpf_run_ctx *old_run_ctx;
- struct bpf_cg_run_ctx run_ctx;
- u32 func_ret;
-
- run_ctx.retval = retval;
- migrate_disable();
- rcu_read_lock();
- array = rcu_dereference(array_rcu);
- item = &array->items[0];
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
- while ((prog = READ_ONCE(item->prog))) {
- run_ctx.prog_item = item;
- func_ret = run_prog(prog, ctx);
- if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
- run_ctx.retval = -EPERM;
- *(ret_flags) |= (func_ret >> 1);
- item++;
- }
- bpf_reset_run_ctx(old_run_ctx);
- rcu_read_unlock();
- migrate_enable();
- return run_ctx.retval;
-}
-
-static __always_inline int
-BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
- const void *ctx, bpf_prog_run_fn run_prog,
- int retval)
-{
- const struct bpf_prog_array_item *item;
- const struct bpf_prog *prog;
- const struct bpf_prog_array *array;
- struct bpf_run_ctx *old_run_ctx;
- struct bpf_cg_run_ctx run_ctx;
-
- run_ctx.retval = retval;
- migrate_disable();
- rcu_read_lock();
- array = rcu_dereference(array_rcu);
- item = &array->items[0];
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
- while ((prog = READ_ONCE(item->prog))) {
- run_ctx.prog_item = item;
- if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
- run_ctx.retval = -EPERM;
- item++;
- }
- bpf_reset_run_ctx(old_run_ctx);
- rcu_read_unlock();
- migrate_enable();
- return run_ctx.retval;
-}
-
static __always_inline u32
-BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+bpf_prog_run_array(const struct bpf_prog_array *array,
const void *ctx, bpf_prog_run_fn run_prog)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
- const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_trace_run_ctx run_ctx;
u32 ret = 1;
- migrate_disable();
- rcu_read_lock();
- array = rcu_dereference(array_rcu);
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held");
+
if (unlikely(!array))
- goto out;
+ return ret;
+
+ migrate_disable();
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
item = &array->items[0];
while ((prog = READ_ONCE(item->prog))) {
@@ -1400,50 +1388,10 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
item++;
}
bpf_reset_run_ctx(old_run_ctx);
-out:
- rcu_read_unlock();
migrate_enable();
return ret;
}
-/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
- * so BPF programs can request cwr for TCP packets.
- *
- * Current cgroup skb programs can only return 0 or 1 (0 to drop the
- * packet. This macro changes the behavior so the low order bit
- * indicates whether the packet should be dropped (0) or not (1)
- * and the next bit is a congestion notification bit. This could be
- * used by TCP to call tcp_enter_cwr()
- *
- * Hence, new allowed return values of CGROUP EGRESS BPF programs are:
- * 0: drop packet
- * 1: keep packet
- * 2: drop packet and cn
- * 3: keep packet and cn
- *
- * This macro then converts it to one of the NET_XMIT or an error
- * code that is then interpreted as drop packet (and no cn):
- * 0: NET_XMIT_SUCCESS skb should be transmitted
- * 1: NET_XMIT_DROP skb should be dropped and cn
- * 2: NET_XMIT_CN skb should be transmitted and cn
- * 3: -err skb should be dropped
- */
-#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
- ({ \
- u32 _flags = 0; \
- bool _cn; \
- u32 _ret; \
- _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
- _cn = _flags & BPF_RET_SET_CN; \
- if (_ret && !IS_ERR_VALUE((long)_ret)) \
- _ret = -EFAULT; \
- if (!_ret) \
- _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
- else \
- _ret = (_cn ? NET_XMIT_DROP : _ret); \
- _ret; \
- })
-
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
@@ -1497,6 +1445,12 @@ void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
+struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset);
+void bpf_map_free_kptr_off_tab(struct bpf_map *map);
+struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map);
+bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
+void bpf_map_free_kptrs(struct bpf_map *map, void *map_value);
+
struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
@@ -1793,7 +1747,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
u32 *next_btf_id, enum bpf_type_flag *flag);
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
- const struct btf *need_btf, u32 need_type_id);
+ const struct btf *need_btf, u32 need_type_id,
+ bool strict);
int btf_distill_func_proto(struct bpf_verifier_log *log,
struct btf *btf,
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 493e63258497..7ea18d4da84b 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -143,9 +143,9 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem,
- bool uncharge_omem);
+ bool uncharge_omem, bool use_trace_rcu);
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu);
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3a9d2d7cc6b7..1f1e7f2ea967 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -523,8 +523,7 @@ int check_ptr_off_reg(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno);
int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
- enum bpf_arg_type arg_type,
- bool is_release_func);
+ enum bpf_arg_type arg_type);
int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 36bc09b8e890..2611cea2c2b6 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -17,6 +17,7 @@ enum btf_kfunc_type {
BTF_KFUNC_TYPE_ACQUIRE,
BTF_KFUNC_TYPE_RELEASE,
BTF_KFUNC_TYPE_RET_NULL,
+ BTF_KFUNC_TYPE_KPTR_ACQUIRE,
BTF_KFUNC_TYPE_MAX,
};
@@ -35,11 +36,19 @@ struct btf_kfunc_id_set {
struct btf_id_set *acquire_set;
struct btf_id_set *release_set;
struct btf_id_set *ret_null_set;
+ struct btf_id_set *kptr_acquire_set;
};
struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX];
};
};
+struct btf_id_dtor_kfunc {
+ u32 btf_id;
+ u32 kfunc_btf_id;
+};
+
+typedef void (*btf_dtor_kfunc_t)(void *);
+
extern const struct file_operations btf_fops;
void btf_get(struct btf *btf);
@@ -123,6 +132,8 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
+struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
+ const struct btf_type *t);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
@@ -344,6 +355,9 @@ bool btf_kfunc_id_set_contains(const struct btf *btf,
enum btf_kfunc_type type, u32 kfunc_btf_id);
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
const struct btf_kfunc_id_set *s);
+s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
+int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
+ struct module *owner);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
@@ -367,6 +381,15 @@ static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
{
return 0;
}
+static inline s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id)
+{
+ return -ENOENT;
+}
+static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors,
+ u32 add_cnt, struct module *owner)
+{
+ return 0;
+}
#endif
#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5cbc184ca685..dc4b3e1cf21b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3925,7 +3925,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features,
unsigned int offset);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
-int skb_ensure_writable(struct sk_buff *skb, int write_len);
+int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len);
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
int skb_vlan_pop(struct sk_buff *skb);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d14b10b85e51..444fe6f1cf35 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5143,6 +5143,17 @@ union bpf_attr {
* The **hash_algo** is returned on success,
* **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
* invalid arguments are passed.
+ *
+ * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ * Description
+ * Exchange kptr at pointer *map_value* with *ptr*, and return the
+ * old value. *ptr* can be NULL, otherwise it must be a referenced
+ * pointer which will be released when this helper is called.
+ * Return
+ * The old value of kptr (which can be NULL). The returned pointer
+ * if not NULL, is a reference which must be released using its
+ * corresponding release function, or moved into a BPF map before
+ * program exit.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5339,6 +5350,7 @@ union bpf_attr {
FN(copy_from_user_task), \
FN(skb_set_tstamp), \
FN(ima_file_hash), \
+ FN(kptr_xchg), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper