summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-07-22 12:34:55 -0700
committerDavid S. Miller <davem@davemloft.net>2020-07-22 12:35:33 -0700
commitdee72f8a0c2d4ddb566151b2982d553461339315 (patch)
treeb52b7479eb55397044a4e93d8cc92e8fdccd8d83 /kernel
parentfa56a987449bcf4c1cb68369a187af3515b85c78 (diff)
parent9165e1d70fb34ce438e78aad90408cfa86e4c2d0 (diff)
downloadcachepc-linux-dee72f8a0c2d4ddb566151b2982d553461339315.tar.gz
cachepc-linux-dee72f8a0c2d4ddb566151b2982d553461339315.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-07-21 The following pull-request contains BPF updates for your *net-next* tree. We've added 46 non-merge commits during the last 6 day(s) which contain a total of 68 files changed, 4929 insertions(+), 526 deletions(-). The main changes are: 1) Run BPF program on socket lookup, from Jakub. 2) Introduce cpumap, from Lorenzo. 3) s390 JIT fixes, from Ilya. 4) teach riscv JIT to emit compressed insns, from Luke. 5) use build time computed BTF ids in bpf iter, from Yonghong. ==================== Purely independent overlapping changes in both filter.h and xdp.h Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c6
-rw-r--r--kernel/bpf/core.c55
-rw-r--r--kernel/bpf/cpumap.c167
-rw-r--r--kernel/bpf/map_iter.c7
-rw-r--r--kernel/bpf/net_namespace.c131
-rw-r--r--kernel/bpf/syscall.c9
-rw-r--r--kernel/bpf/task_iter.c12
-rw-r--r--kernel/bpf/verifier.c13
8 files changed, 354 insertions, 46 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 03d6d43bb1d6..ee36b7f60936 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3672,7 +3672,6 @@ struct btf *btf_parse_vmlinux(void)
goto errout;
bpf_struct_ops_init(btf, log);
- init_btf_sock_ids(btf);
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
@@ -3818,16 +3817,17 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
return true;
/* this is a pointer to another type */
- info->reg_type = PTR_TO_BTF_ID;
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
if (ctx_arg_info->offset == off) {
info->reg_type = ctx_arg_info->reg_type;
- break;
+ info->btf_id = ctx_arg_info->btf_id;
+ return true;
}
}
+ info->reg_type = PTR_TO_BTF_ID;
if (tgt_prog) {
ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
if (ret > 0) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9df4cc9a2907..7be02e555ab9 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1958,6 +1958,61 @@ void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
}
}
+/**
+ * bpf_prog_array_delete_safe_at() - Replaces the program at the given
+ * index into the program array with
+ * a dummy no-op program.
+ * @array: a bpf_prog_array
+ * @index: the index of the program to replace
+ *
+ * Skips over dummy programs, by not counting them, when calculating
+ * the the position of the program to replace.
+ *
+ * Return:
+ * * 0 - Success
+ * * -EINVAL - Invalid index value. Must be a non-negative integer.
+ * * -ENOENT - Index out of range
+ */
+int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index)
+{
+ return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog);
+}
+
+/**
+ * bpf_prog_array_update_at() - Updates the program at the given index
+ * into the program array.
+ * @array: a bpf_prog_array
+ * @index: the index of the program to update
+ * @prog: the program to insert into the array
+ *
+ * Skips over dummy programs, by not counting them, when calculating
+ * the position of the program to update.
+ *
+ * Return:
+ * * 0 - Success
+ * * -EINVAL - Invalid index value. Must be a non-negative integer.
+ * * -ENOENT - Index out of range
+ */
+int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
+ struct bpf_prog *prog)
+{
+ struct bpf_prog_array_item *item;
+
+ if (unlikely(index < 0))
+ return -EINVAL;
+
+ for (item = array->items; item->prog; item++) {
+ if (item->prog == &dummy_bpf_prog.prog)
+ continue;
+ if (!index) {
+ WRITE_ONCE(item->prog, prog);
+ return 0;
+ }
+ index--;
+ }
+ return -ENOENT;
+}
+
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index bd8658055c16..f1c46529929b 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -52,7 +52,6 @@ struct xdp_bulk_queue {
struct bpf_cpu_map_entry {
u32 cpu; /* kthread CPU and map index */
int map_id; /* Back reference to map */
- u32 qsize; /* Queue size placeholder for map lookup */
/* XDP can run multiple RX-ring queues, need __percpu enqueue store */
struct xdp_bulk_queue __percpu *bulkq;
@@ -62,10 +61,14 @@ struct bpf_cpu_map_entry {
/* Queue with potential multi-producers, and single-consumer kthread */
struct ptr_ring *queue;
struct task_struct *kthread;
- struct work_struct kthread_stop_wq;
+
+ struct bpf_cpumap_val value;
+ struct bpf_prog *prog;
atomic_t refcnt; /* Control when this struct can be free'ed */
struct rcu_head rcu;
+
+ struct work_struct kthread_stop_wq;
};
struct bpf_cpu_map {
@@ -80,6 +83,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
+ u32 value_size = attr->value_size;
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
u64 cost;
@@ -90,7 +94,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
- attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
+ (value_size != offsetofend(struct bpf_cpumap_val, qsize) &&
+ value_size != offsetofend(struct bpf_cpumap_val, bpf_prog.fd)) ||
+ attr->map_flags & ~BPF_F_NUMA_NODE)
return ERR_PTR(-EINVAL);
cmap = kzalloc(sizeof(*cmap), GFP_USER);
@@ -212,6 +218,8 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
{
if (atomic_dec_and_test(&rcpu->refcnt)) {
+ if (rcpu->prog)
+ bpf_prog_put(rcpu->prog);
/* The queue should be empty at this point */
__cpu_map_ring_cleanup(rcpu->queue);
ptr_ring_cleanup(rcpu->queue, NULL);
@@ -220,6 +228,75 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
}
}
+static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
+ void **frames, int n,
+ struct xdp_cpumap_stats *stats)
+{
+ struct xdp_rxq_info rxq;
+ struct xdp_buff xdp;
+ int i, nframes = 0;
+
+ if (!rcpu->prog)
+ return n;
+
+ rcu_read_lock_bh();
+
+ xdp_set_return_frame_no_direct();
+ xdp.rxq = &rxq;
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ u32 act;
+ int err;
+
+ rxq.dev = xdpf->dev_rx;
+ rxq.mem = xdpf->mem;
+ /* TODO: report queue_index to xdp_rxq_info */
+
+ xdp_convert_frame_to_buff(xdpf, &xdp);
+
+ act = bpf_prog_run_xdp(rcpu->prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ err = xdp_update_frame_from_buff(&xdp, xdpf);
+ if (err < 0) {
+ xdp_return_frame(xdpf);
+ stats->drop++;
+ } else {
+ frames[nframes++] = xdpf;
+ stats->pass++;
+ }
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(xdpf->dev_rx, &xdp,
+ rcpu->prog);
+ if (unlikely(err)) {
+ xdp_return_frame(xdpf);
+ stats->drop++;
+ } else {
+ stats->redirect++;
+ }
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fallthrough */
+ case XDP_DROP:
+ xdp_return_frame(xdpf);
+ stats->drop++;
+ break;
+ }
+ }
+
+ if (stats->redirect)
+ xdp_do_flush_map();
+
+ xdp_clear_return_frame_no_direct();
+
+ rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
+
+ return nframes;
+}
+
#define CPUMAP_BATCH 8
static int cpu_map_kthread_run(void *data)
@@ -234,11 +311,12 @@ static int cpu_map_kthread_run(void *data)
* kthread_stop signal until queue is empty.
*/
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
+ struct xdp_cpumap_stats stats = {}; /* zero stats */
+ gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
unsigned int drops = 0, sched = 0;
void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
- gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
- int i, n, m;
+ int i, n, m, nframes;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
@@ -259,8 +337,8 @@ static int cpu_map_kthread_run(void *data)
* kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue.
*/
- n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
-
+ n = __ptr_ring_consume_batched(rcpu->queue, frames,
+ CPUMAP_BATCH);
for (i = 0; i < n; i++) {
void *f = frames[i];
struct page *page = virt_to_page(f);
@@ -272,15 +350,19 @@ static int cpu_map_kthread_run(void *data)
prefetchw(page);
}
- m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
- if (unlikely(m == 0)) {
- for (i = 0; i < n; i++)
- skbs[i] = NULL; /* effect: xdp_return_frame */
- drops = n;
+ /* Support running another XDP prog on this CPU */
+ nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
+ if (nframes) {
+ m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
+ if (unlikely(m == 0)) {
+ for (i = 0; i < nframes; i++)
+ skbs[i] = NULL; /* effect: xdp_return_frame */
+ drops += nframes;
+ }
}
local_bh_disable();
- for (i = 0; i < n; i++) {
+ for (i = 0; i < nframes; i++) {
struct xdp_frame *xdpf = frames[i];
struct sk_buff *skb = skbs[i];
int ret;
@@ -297,7 +379,7 @@ static int cpu_map_kthread_run(void *data)
drops++;
}
/* Feedback loop via tracepoint */
- trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
+ trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
local_bh_enable(); /* resched point, may call do_softirq() */
}
@@ -307,13 +389,38 @@ static int cpu_map_kthread_run(void *data)
return 0;
}
-static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
- int map_id)
+bool cpu_map_prog_allowed(struct bpf_map *map)
{
+ return map->map_type == BPF_MAP_TYPE_CPUMAP &&
+ map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
+}
+
+static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
+{
+ struct bpf_prog *prog;
+
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (prog->expected_attach_type != BPF_XDP_CPUMAP) {
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
+ rcpu->value.bpf_prog.id = prog->aux->id;
+ rcpu->prog = prog;
+
+ return 0;
+}
+
+static struct bpf_cpu_map_entry *
+__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
+{
+ int numa, err, i, fd = value->bpf_prog.fd;
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
struct xdp_bulk_queue *bq;
- int numa, err, i;
/* Have map->numa_node, but choose node of redirect target CPU */
numa = cpu_to_node(cpu);
@@ -338,19 +445,22 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
if (!rcpu->queue)
goto free_bulkq;
- err = ptr_ring_init(rcpu->queue, qsize, gfp);
+ err = ptr_ring_init(rcpu->queue, value->qsize, gfp);
if (err)
goto free_queue;
rcpu->cpu = cpu;
rcpu->map_id = map_id;
- rcpu->qsize = qsize;
+ rcpu->value.qsize = value->qsize;
+
+ if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
+ goto free_ptr_ring;
/* Setup kthread */
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
"cpumap/%d/map:%d", cpu, map_id);
if (IS_ERR(rcpu->kthread))
- goto free_ptr_ring;
+ goto free_prog;
get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */
get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */
@@ -361,6 +471,9 @@ static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
return rcpu;
+free_prog:
+ if (rcpu->prog)
+ bpf_prog_put(rcpu->prog);
free_ptr_ring:
ptr_ring_cleanup(rcpu->queue, NULL);
free_queue:
@@ -437,12 +550,12 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ struct bpf_cpumap_val cpumap_value = {};
struct bpf_cpu_map_entry *rcpu;
-
/* Array index key correspond to CPU number */
u32 key_cpu = *(u32 *)key;
- /* Value is the queue size */
- u32 qsize = *(u32 *)value;
+
+ memcpy(&cpumap_value, value, map->value_size);
if (unlikely(map_flags > BPF_EXIST))
return -EINVAL;
@@ -450,18 +563,18 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
return -EEXIST;
- if (unlikely(qsize > 16384)) /* sanity limit on qsize */
+ if (unlikely(cpumap_value.qsize > 16384)) /* sanity limit on qsize */
return -EOVERFLOW;
/* Make sure CPU is a valid possible cpu */
if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu))
return -ENODEV;
- if (qsize == 0) {
+ if (cpumap_value.qsize == 0) {
rcpu = NULL; /* Same as deleting */
} else {
/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
- rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
+ rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id);
if (!rcpu)
return -ENOMEM;
rcpu->cmap = cmap;
@@ -523,7 +636,7 @@ static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
struct bpf_cpu_map_entry *rcpu =
__cpu_map_lookup_elem(map, *(u32 *)key);
- return rcpu ? &rcpu->qsize : NULL;
+ return rcpu ? &rcpu->value : NULL;
}
static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index c69071e334bf..8a7af11b411f 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -4,6 +4,7 @@
#include <linux/fs.h>
#include <linux/filter.h>
#include <linux/kernel.h>
+#include <linux/btf_ids.h>
struct bpf_iter_seq_map_info {
u32 mid;
@@ -81,7 +82,10 @@ static const struct seq_operations bpf_map_seq_ops = {
.show = bpf_map_seq_show,
};
-static const struct bpf_iter_reg bpf_map_reg_info = {
+BTF_ID_LIST(btf_bpf_map_id)
+BTF_ID(struct, bpf_map)
+
+static struct bpf_iter_reg bpf_map_reg_info = {
.target = "bpf_map",
.seq_ops = &bpf_map_seq_ops,
.init_seq_private = NULL,
@@ -96,6 +100,7 @@ static const struct bpf_iter_reg bpf_map_reg_info = {
static int __init bpf_map_iter_init(void)
{
+ bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id;
return bpf_iter_reg_target(&bpf_map_reg_info);
}
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c
index 310241ca7991..71405edd667c 100644
--- a/kernel/bpf/net_namespace.c
+++ b/kernel/bpf/net_namespace.c
@@ -25,6 +25,32 @@ struct bpf_netns_link {
/* Protects updates to netns_bpf */
DEFINE_MUTEX(netns_bpf_mutex);
+static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
+{
+ switch (type) {
+#ifdef CONFIG_INET
+ case NETNS_BPF_SK_LOOKUP:
+ static_branch_dec(&bpf_sk_lookup_enabled);
+ break;
+#endif
+ default:
+ break;
+ }
+}
+
+static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
+{
+ switch (type) {
+#ifdef CONFIG_INET
+ case NETNS_BPF_SK_LOOKUP:
+ static_branch_inc(&bpf_sk_lookup_enabled);
+ break;
+#endif
+ default:
+ break;
+ }
+}
+
/* Must be called with netns_bpf_mutex held. */
static void netns_bpf_run_array_detach(struct net *net,
enum netns_bpf_attach_type type)
@@ -36,12 +62,50 @@ static void netns_bpf_run_array_detach(struct net *net,
bpf_prog_array_free(run_array);
}
+static int link_index(struct net *net, enum netns_bpf_attach_type type,
+ struct bpf_netns_link *link)
+{
+ struct bpf_netns_link *pos;
+ int i = 0;
+
+ list_for_each_entry(pos, &net->bpf.links[type], node) {
+ if (pos == link)
+ return i;
+ i++;
+ }
+ return -ENOENT;
+}
+
+static int link_count(struct net *net, enum netns_bpf_attach_type type)
+{
+ struct list_head *pos;
+ int i = 0;
+
+ list_for_each(pos, &net->bpf.links[type])
+ i++;
+ return i;
+}
+
+static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
+ struct bpf_prog_array *prog_array)
+{
+ struct bpf_netns_link *pos;
+ unsigned int i = 0;
+
+ list_for_each_entry(pos, &net->bpf.links[type], node) {
+ prog_array->items[i].prog = pos->link.prog;
+ i++;
+ }
+}
+
static void bpf_netns_link_release(struct bpf_link *link)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
enum netns_bpf_attach_type type = net_link->netns_type;
+ struct bpf_prog_array *old_array, *new_array;
struct net *net;
+ int cnt, idx;
mutex_lock(&netns_bpf_mutex);
@@ -53,9 +117,30 @@ static void bpf_netns_link_release(struct bpf_link *link)
if (!net)
goto out_unlock;
- netns_bpf_run_array_detach(net, type);
+ /* Mark attach point as unused */
+ netns_bpf_attach_type_unneed(type);
+
+ /* Remember link position in case of safe delete */
+ idx = link_index(net, type, net_link);
list_del(&net_link->node);
+ cnt = link_count(net, type);
+ if (!cnt) {
+ netns_bpf_run_array_detach(net, type);
+ goto out_unlock;
+ }
+
+ old_array = rcu_dereference_protected(net->bpf.run_array[type],
+ lockdep_is_held(&netns_bpf_mutex));
+ new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
+ if (!new_array) {
+ WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
+ goto out_unlock;
+ }
+ fill_prog_array(net, type, new_array);
+ rcu_assign_pointer(net->bpf.run_array[type], new_array);
+ bpf_prog_array_free(old_array);
+
out_unlock:
mutex_unlock(&netns_bpf_mutex);
}
@@ -77,7 +162,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
enum netns_bpf_attach_type type = net_link->netns_type;
struct bpf_prog_array *run_array;
struct net *net;
- int ret = 0;
+ int idx, ret;
if (old_prog && old_prog != link->prog)
return -EPERM;
@@ -95,7 +180,10 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
run_array = rcu_dereference_protected(net->bpf.run_array[type],
lockdep_is_held(&netns_bpf_mutex));
- WRITE_ONCE(run_array->items[0].prog, new_prog);
+ idx = link_index(net, type, net_link);
+ ret = bpf_prog_array_update_at(run_array, idx, new_prog);
+ if (ret)
+ goto out_unlock;
old_prog = xchg(&link->prog, new_prog);
bpf_prog_put(old_prog);
@@ -309,18 +397,30 @@ int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
return ret;
}
+static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
+{
+ switch (type) {
+ case NETNS_BPF_FLOW_DISSECTOR:
+ return 1;
+ case NETNS_BPF_SK_LOOKUP:
+ return 64;
+ default:
+ return 0;
+ }
+}
+
static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
enum netns_bpf_attach_type type)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
struct bpf_prog_array *run_array;
- int err;
+ int cnt, err;
mutex_lock(&netns_bpf_mutex);
- /* Allow attaching only one prog or link for now */
- if (!list_empty(&net->bpf.links[type])) {
+ cnt = link_count(net, type);
+ if (cnt >= netns_bpf_max_progs(type)) {
err = -E2BIG;
goto out_unlock;
}
@@ -334,6 +434,9 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
case NETNS_BPF_FLOW_DISSECTOR:
err = flow_dissector_bpf_prog_attach_check(net, link->prog);
break;
+ case NETNS_BPF_SK_LOOKUP:
+ err = 0; /* nothing to check */
+ break;
default:
err = -EINVAL;
break;
@@ -341,16 +444,22 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
if (err)
goto out_unlock;
- run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
+ run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
if (!run_array) {
err = -ENOMEM;
goto out_unlock;
}
- run_array->items[0].prog = link->prog;
- rcu_assign_pointer(net->bpf.run_array[type], run_array);
list_add_tail(&net_link->node, &net->bpf.links[type]);
+ fill_prog_array(net, type, run_array);
+ run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
+ lockdep_is_held(&netns_bpf_mutex));
+ bpf_prog_array_free(run_array);
+
+ /* Mark attach point as used */
+ netns_bpf_attach_type_need(type);
+
out_unlock:
mutex_unlock(&netns_bpf_mutex);
return err;
@@ -426,8 +535,10 @@ static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
mutex_lock(&netns_bpf_mutex);
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
netns_bpf_run_array_detach(net, type);
- list_for_each_entry(net_link, &net->bpf.links[type], node)
+ list_for_each_entry(net_link, &net->bpf.links[type], node) {
net_link->net = NULL; /* auto-detach link */
+ netns_bpf_attach_type_unneed(type);
+ }
if (net->bpf.progs[type])
bpf_prog_put(net->bpf.progs[type]);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7ea9dfbebd8c..d07417d17712 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2022,6 +2022,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ if (expected_attach_type == BPF_SK_LOOKUP)
+ return 0;
+ return -EINVAL;
case BPF_PROG_TYPE_EXT:
if (expected_attach_type)
return -EINVAL;
@@ -2756,6 +2760,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_SK_LOOKUP:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
if (!capable(CAP_NET_ADMIN))
@@ -2817,6 +2822,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_CGROUP_SOCKOPT;
case BPF_TRACE_ITER:
return BPF_PROG_TYPE_TRACING;
+ case BPF_SK_LOOKUP:
+ return BPF_PROG_TYPE_SK_LOOKUP;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -2953,6 +2960,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
case BPF_FLOW_DISSECTOR:
+ case BPF_SK_LOOKUP:
return netns_bpf_prog_query(attr, uattr);
default:
return -EINVAL;
@@ -3891,6 +3899,7 @@ static int link_create(union bpf_attr *attr)
ret = tracing_bpf_link_attach(attr, prog);
break;
case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_SK_LOOKUP:
ret = netns_bpf_link_create(attr, prog);
break;
default:
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 4dbf2b6035f8..2feecf095609 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/fdtable.h>
#include <linux/filter.h>
+#include <linux/btf_ids.h>
struct bpf_iter_seq_task_common {
struct pid_namespace *ns;
@@ -312,7 +313,11 @@ static const struct seq_operations task_file_seq_ops = {
.show = task_file_seq_show,
};
-static const struct bpf_iter_reg task_reg_info = {
+BTF_ID_LIST(btf_task_file_ids)
+BTF_ID(struct, task_struct)
+BTF_ID(struct, file)
+
+static struct bpf_iter_reg task_reg_info = {
.target = "task",
.seq_ops = &task_seq_ops,
.init_seq_private = init_seq_pidns,
@@ -325,7 +330,7 @@ static const struct bpf_iter_reg task_reg_info = {
},
};
-static const struct bpf_iter_reg task_file_reg_info = {
+static struct bpf_iter_reg task_file_reg_info = {
.target = "task_file",
.seq_ops = &task_file_seq_ops,
.init_seq_private = init_seq_pidns,
@@ -344,10 +349,13 @@ static int __init task_iter_init(void)
{
int ret;
+ task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
ret = bpf_iter_reg_target(&task_reg_info);
if (ret)
return ret;
+ task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
+ task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
return bpf_iter_reg_target(&task_file_reg_info);
}
late_initcall(task_iter_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3c1efc9d08fd..9a6703bc3f36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3878,10 +3878,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
}
meta->ref_obj_id = reg->ref_obj_id;
}
- } else if (arg_type == ARG_PTR_TO_SOCKET) {
+ } else if (arg_type == ARG_PTR_TO_SOCKET ||
+ arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
expected_type = PTR_TO_SOCKET;
- if (type != expected_type)
- goto err_type;
+ if (!(register_is_null(reg) &&
+ arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
+ if (type != expected_type)
+ goto err_type;
+ }
} else if (arg_type == ARG_PTR_TO_BTF_ID) {
expected_type = PTR_TO_BTF_ID;
if (type != expected_type)
@@ -7354,6 +7358,9 @@ static int check_return_code(struct bpf_verifier_env *env)
return -ENOTSUPP;
}
break;
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ range = tnum_range(SK_DROP, SK_PASS);
+ break;
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.