From 2c78ee898d8f10ae6fb2fa23a3fbaec96b1b7366 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 13 May 2020 16:03:54 -0700 Subject: bpf: Implement CAP_BPF Implement permissions as stated in uapi/linux/capability.h In order to do that the verifier allow_ptr_leaks flag is split into four flags and they are set as: env->allow_ptr_leaks = bpf_allow_ptr_leaks(); env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); The first three currently equivalent to perfmon_capable(), since leaking kernel pointers and reading kernel memory via side channel attacks is roughly equivalent to reading kernel memory with cap_perfmon. 'bpf_capable' enables bounded loops, precision tracking, bpf to bpf calls and other verifier features. 'allow_ptr_leaks' enable ptr leaks, ptr conversions, subtraction of pointers. 'bypass_spec_v1' disables speculative analysis in the verifier, run time mitigations in bpf array, and enables indirect variable access in bpf programs. 'bypass_spec_v4' disables emission of sanitation code by the verifier. That means that the networking BPF program loaded with CAP_BPF + CAP_NET_ADMIN will have speculative checks done by the verifier and other spectre mitigation applied. Such networking BPF program will not be able to leak kernel pointers and will not be able to access arbitrary kernel memory. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200513230355.7858-3-alexei.starovoitov@gmail.com --- kernel/bpf/arraymap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 95d77770353c..1d5bb0d983b2 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -77,7 +77,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int ret, numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; - bool unpriv = !capable(CAP_SYS_ADMIN); + bool bypass_spec_v1 = bpf_bypass_spec_v1(); u64 cost, array_size, mask64; struct bpf_map_memory mem; struct bpf_array *array; @@ -95,7 +95,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) mask64 -= 1; index_mask = mask64; - if (unpriv) { + if (!bypass_spec_v1) { /* round up array size to nearest power of 2, * since cpu will speculate within index_mask limits */ @@ -149,7 +149,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); } array->index_mask = index_mask; - array->map.unpriv_array = unpriv; + array->map.bypass_spec_v1 = bypass_spec_v1; /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); @@ -219,7 +219,7 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); - if (map->unpriv_array) { + if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { @@ -1053,7 +1053,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map, *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); - if (map->unpriv_array) { + if (!map->bypass_spec_v1) { *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { -- cgit v1.2.3-71-gd317 From 41c48f3a98231738c5ce79f6f2aa6e40ba924d18 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:43 -0700 Subject: bpf: Support access to bpf map fields There are multiple use-cases when it's convenient to have access to bpf map fields, both `struct bpf_map` and map type specific struct-s such as `struct bpf_array`, `struct bpf_htab`, etc. For example while working with sock arrays it can be necessary to calculate the key based on map->max_entries (some_hash % max_entries). Currently this is solved by communicating max_entries via "out-of-band" channel, e.g. via additional map with known key to get info about target map. That works, but is not very convenient and error-prone while working with many maps. In other cases necessary data is dynamic (i.e. unknown at loading time) and it's impossible to get it at all. For example while working with a hash table it can be convenient to know how much capacity is already used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case). At the same time kernel knows this info and can provide it to bpf program. Fill this gap by adding support to access bpf map fields from bpf program for both `struct bpf_map` and map type specific fields. Support is implemented via btf_struct_access() so that a user can define their own `struct bpf_map` or map type specific struct in their program with only necessary fields and preserve_access_index attribute, cast a map to this struct and use a field. For example: struct bpf_map { __u32 max_entries; } __attribute__((preserve_access_index)); struct bpf_array { struct bpf_map map; __u32 elem_size; } __attribute__((preserve_access_index)); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); __type(key, __u32); __type(value, __u32); } m_array SEC(".maps"); SEC("cgroup_skb/egress") int cg_skb(void *ctx) { struct bpf_array *array = (struct bpf_array *)&m_array; struct bpf_map *map = (struct bpf_map *)&m_array; /* .. use map->max_entries or array->map.max_entries .. */ } Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of corresponding struct. Only reading from map fields is supported. For btf_struct_access() to work there should be a way to know btf id of a struct that corresponds to a map type. To get btf id there should be a way to get a stringified name of map-specific struct, such as "bpf_array", "bpf_htab", etc for a map type. Two new fields are added to `struct bpf_map_ops` to handle it: * .map_btf_name keeps a btf name of a struct returned by map_alloc(); * .map_btf_id is used to cache btf id of that struct. To make btf ids calculation cheaper they're calculated once while preparing btf_vmlinux and cached same way as it's done for btf_id field of `struct bpf_func_proto` While calculating btf ids, struct names are NOT checked for collision. Collisions will be checked as a part of the work to prepare btf ids used in verifier in compile time that should land soon. The only known collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs net/core/sock_map.c) was fixed earlier. Both new fields .map_btf_name and .map_btf_id must be set for a map type for the feature to work. If neither is set for a map type, verifier will return ENOTSUPP on a try to access map_ptr of corresponding type. If just one of them set, it's verifier misconfiguration. Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be supported separately. The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by perfmon_capable() so that unpriv programs won't have access to bpf map fields. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com --- include/linux/bpf.h | 9 +++ include/linux/bpf_verifier.h | 1 + kernel/bpf/arraymap.c | 3 + kernel/bpf/btf.c | 40 +++++++++++ kernel/bpf/hashtab.c | 3 + kernel/bpf/verifier.c | 82 +++++++++++++++++++--- .../selftests/bpf/verifier/map_ptr_mixing.c | 2 +- 7 files changed, 131 insertions(+), 9 deletions(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..1e1501ee53ce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -92,6 +92,10 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + + /* BTF name and id of struct allocated by map_alloc */ + const char * const map_btf_name; + int *map_btf_id; }; struct bpf_map_memory { @@ -1109,6 +1113,11 @@ static inline bool bpf_allow_ptr_leaks(void) return perfmon_capable(); } +static inline bool bpf_allow_ptr_to_map_access(void) +{ + return perfmon_capable(); +} + static inline bool bpf_bypass_spec_v1(void) { return perfmon_capable(); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index ca08db4ffb5f..53c7bd568c5d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -379,6 +379,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool allow_ptr_to_map_access; bool bpf_capable; bool bypass_spec_v1; bool bypass_spec_v4; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 11584618e861..e7caa48812fb 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -494,6 +494,7 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) vma->vm_pgoff + pgoff); } +static int array_map_btf_id; const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -510,6 +511,8 @@ const struct bpf_map_ops array_map_ops = { .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, + .map_btf_name = "bpf_array", + .map_btf_id = &array_map_btf_id, }; const struct bpf_map_ops percpu_array_map_ops = { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 3eb804618a53..e377d1981730 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3571,6 +3571,41 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, return ctx_type; } +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) +#define BPF_LINK_TYPE(_id, _name) +#define BPF_MAP_TYPE(_id, _ops) \ + [_id] = &_ops, +#include +#undef BPF_PROG_TYPE +#undef BPF_LINK_TYPE +#undef BPF_MAP_TYPE +}; + +static int btf_vmlinux_map_ids_init(const struct btf *btf, + struct bpf_verifier_log *log) +{ + const struct bpf_map_ops *ops; + int i, btf_id; + + for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) { + ops = btf_vmlinux_map_ops[i]; + if (!ops || (!ops->map_btf_name && !ops->map_btf_id)) + continue; + if (!ops->map_btf_name || !ops->map_btf_id) { + bpf_log(log, "map type %d is misconfigured\n", i); + return -EINVAL; + } + btf_id = btf_find_by_name_kind(btf, ops->map_btf_name, + BTF_KIND_STRUCT); + if (btf_id < 0) + return btf_id; + *ops->map_btf_id = btf_id; + } + + return 0; +} + static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, struct btf *btf, const struct btf_type *t, @@ -3633,6 +3668,11 @@ struct btf *btf_parse_vmlinux(void) /* btf_parse_vmlinux() runs under bpf_verifier_lock */ bpf_ctx_convert.t = btf_type_by_id(btf, btf_id); + /* find bpf map structs for map_ptr access checking */ + err = btf_vmlinux_map_ids_init(btf, log); + if (err < 0) + goto errout; + bpf_struct_ops_init(btf, log); btf_verifier_env_free(env); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b4b288a3c3c9..2c5999e02060 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1614,6 +1614,7 @@ htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, true, false); } +static int htab_map_btf_id; const struct bpf_map_ops htab_map_ops = { .map_alloc_check = htab_map_alloc_check, .map_alloc = htab_map_alloc, @@ -1625,6 +1626,8 @@ const struct bpf_map_ops htab_map_ops = { .map_gen_lookup = htab_map_gen_lookup, .map_seq_show_elem = htab_map_seq_show_elem, BATCH_OPS(htab), + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_map_btf_id, }; const struct bpf_map_ops htab_lru_map_ops = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a1857c4ffaaf..7460f967cb75 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1351,6 +1351,19 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, __mark_reg_not_init(env, regs + regno); } +static void mark_btf_ld_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, u32 regno, + enum bpf_reg_type reg_type, u32 btf_id) +{ + if (reg_type == SCALAR_VALUE) { + mark_reg_unknown(env, regs, regno); + return; + } + mark_reg_known_zero(env, regs, regno); + regs[regno].type = PTR_TO_BTF_ID; + regs[regno].btf_id = btf_id; +} + #define DEF_NOT_SUBREG (0) static void init_reg_state(struct bpf_verifier_env *env, struct bpf_func_state *state) @@ -3182,19 +3195,68 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (ret < 0) return ret; - if (atype == BPF_READ && value_regno >= 0) { - if (ret == SCALAR_VALUE) { - mark_reg_unknown(env, regs, value_regno); - return 0; - } - mark_reg_known_zero(env, regs, value_regno); - regs[value_regno].type = PTR_TO_BTF_ID; - regs[value_regno].btf_id = btf_id; + if (atype == BPF_READ && value_regno >= 0) + mark_btf_ld_reg(env, regs, value_regno, ret, btf_id); + + return 0; +} + +static int check_ptr_to_map_access(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, + int regno, int off, int size, + enum bpf_access_type atype, + int value_regno) +{ + struct bpf_reg_state *reg = regs + regno; + struct bpf_map *map = reg->map_ptr; + const struct btf_type *t; + const char *tname; + u32 btf_id; + int ret; + + if (!btf_vmlinux) { + verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n"); + return -ENOTSUPP; + } + + if (!map->ops->map_btf_id || !*map->ops->map_btf_id) { + verbose(env, "map_ptr access not supported for map type %d\n", + map->map_type); + return -ENOTSUPP; + } + + t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id); + tname = btf_name_by_offset(btf_vmlinux, t->name_off); + + if (!env->allow_ptr_to_map_access) { + verbose(env, + "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", + tname); + return -EPERM; } + if (off < 0) { + verbose(env, "R%d is %s invalid negative access: off=%d\n", + regno, tname, off); + return -EACCES; + } + + if (atype != BPF_READ) { + verbose(env, "only read from %s is supported\n", tname); + return -EACCES; + } + + ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id); + if (ret < 0) + return ret; + + if (value_regno >= 0) + mark_btf_ld_reg(env, regs, value_regno, ret, btf_id); + return 0; } + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -3363,6 +3425,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (reg->type == PTR_TO_BTF_ID) { err = check_ptr_to_btf_access(env, regs, regno, off, size, t, value_regno); + } else if (reg->type == CONST_PTR_TO_MAP) { + err = check_ptr_to_map_access(env, regs, regno, off, size, t, + value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -10951,6 +11016,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->strict_alignment = false; env->allow_ptr_leaks = bpf_allow_ptr_leaks(); + env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access(); env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c index cd26ee6b7b1d..1f2b8c4cb26d 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c @@ -56,7 +56,7 @@ .fixup_map_in_map = { 16 }, .fixup_map_array_48b = { 13 }, .result = REJECT, - .errstr = "R0 invalid mem access 'map_ptr'", + .errstr = "only read from bpf_array is supported", }, { "cond: two branches returning different map pointers for lookup (tail, tail)", -- cgit v1.2.3-71-gd317 From 2872e9ac33a4440173418147351ed4f93177e763 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:44 -0700 Subject: bpf: Set map_btf_{name, id} for all map types Set map_btf_name and map_btf_id for all map types so that map fields can be accessed by bpf programs. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/a825f808f22af52b018dbe82f1c7d29dab5fc978.1592600985.git.rdna@fb.com --- kernel/bpf/arraymap.c | 15 +++++++++++++++ kernel/bpf/bpf_struct_ops.c | 3 +++ kernel/bpf/cpumap.c | 3 +++ kernel/bpf/devmap.c | 6 ++++++ kernel/bpf/hashtab.c | 12 ++++++++++++ kernel/bpf/local_storage.c | 3 +++ kernel/bpf/lpm_trie.c | 3 +++ kernel/bpf/queue_stack_maps.c | 6 ++++++ kernel/bpf/reuseport_array.c | 3 +++ kernel/bpf/ringbuf.c | 3 +++ kernel/bpf/stackmap.c | 3 +++ net/core/bpf_sk_storage.c | 3 +++ net/core/sock_map.c | 6 ++++++ net/xdp/xskmap.c | 3 +++ 14 files changed, 72 insertions(+) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index e7caa48812fb..ec5cd11032aa 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -515,6 +515,7 @@ const struct bpf_map_ops array_map_ops = { .map_btf_id = &array_map_btf_id, }; +static int percpu_array_map_btf_id; const struct bpf_map_ops percpu_array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -525,6 +526,8 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_delete_elem = array_map_delete_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &percpu_array_map_btf_id, }; static int fd_array_map_alloc_check(union bpf_attr *attr) @@ -871,6 +874,7 @@ static void prog_array_map_free(struct bpf_map *map) fd_array_map_free(map); } +static int prog_array_map_btf_id; const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = prog_array_map_alloc, @@ -886,6 +890,8 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = prog_array_map_clear, .map_seq_show_elem = prog_array_map_seq_show_elem, + .map_btf_name = "bpf_array", + .map_btf_id = &prog_array_map_btf_id, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, @@ -964,6 +970,7 @@ static void perf_event_fd_array_release(struct bpf_map *map, rcu_read_unlock(); } +static int perf_event_array_map_btf_id; const struct bpf_map_ops perf_event_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, @@ -975,6 +982,8 @@ const struct bpf_map_ops perf_event_array_map_ops = { .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &perf_event_array_map_btf_id, }; #ifdef CONFIG_CGROUPS @@ -997,6 +1006,7 @@ static void cgroup_fd_array_free(struct bpf_map *map) fd_array_map_free(map); } +static int cgroup_array_map_btf_id; const struct bpf_map_ops cgroup_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, @@ -1007,6 +1017,8 @@ const struct bpf_map_ops cgroup_array_map_ops = { .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &cgroup_array_map_btf_id, }; #endif @@ -1080,6 +1092,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } +static int array_of_maps_map_btf_id; const struct bpf_map_ops array_of_maps_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, @@ -1092,4 +1105,6 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &array_of_maps_map_btf_id, }; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index c6b0decaa46a..969c5d47f81f 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -611,6 +611,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) return map; } +static int bpf_struct_ops_map_btf_id; const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_alloc_check = bpf_struct_ops_map_alloc_check, .map_alloc = bpf_struct_ops_map_alloc, @@ -620,6 +621,8 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_delete_elem = bpf_struct_ops_map_delete_elem, .map_update_elem = bpf_struct_ops_map_update_elem, .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, + .map_btf_name = "bpf_struct_ops_map", + .map_btf_id = &bpf_struct_ops_map_btf_id, }; /* "const void *" because some subsystem is diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 27595fc6da56..bd8658055c16 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -543,6 +543,7 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } +static int cpu_map_btf_id; const struct bpf_map_ops cpu_map_ops = { .map_alloc = cpu_map_alloc, .map_free = cpu_map_free, @@ -551,6 +552,8 @@ const struct bpf_map_ops cpu_map_ops = { .map_lookup_elem = cpu_map_lookup_elem, .map_get_next_key = cpu_map_get_next_key, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_cpu_map", + .map_btf_id = &cpu_map_btf_id, }; static int bq_flush_to_queue(struct xdp_bulk_queue *bq) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 0cbb72cdaf63..58acc46861ef 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -747,6 +747,7 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, map, key, value, map_flags); } +static int dev_map_btf_id; const struct bpf_map_ops dev_map_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free, @@ -755,8 +756,11 @@ const struct bpf_map_ops dev_map_ops = { .map_update_elem = dev_map_update_elem, .map_delete_elem = dev_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_dtab", + .map_btf_id = &dev_map_btf_id, }; +static int dev_map_hash_map_btf_id; const struct bpf_map_ops dev_map_hash_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free, @@ -765,6 +769,8 @@ const struct bpf_map_ops dev_map_hash_ops = { .map_update_elem = dev_map_hash_update_elem, .map_delete_elem = dev_map_hash_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_dtab", + .map_btf_id = &dev_map_hash_map_btf_id, }; static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 2c5999e02060..acd06081d81d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1630,6 +1630,7 @@ const struct bpf_map_ops htab_map_ops = { .map_btf_id = &htab_map_btf_id, }; +static int htab_lru_map_btf_id; const struct bpf_map_ops htab_lru_map_ops = { .map_alloc_check = htab_map_alloc_check, .map_alloc = htab_map_alloc, @@ -1642,6 +1643,8 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_gen_lookup = htab_lru_map_gen_lookup, .map_seq_show_elem = htab_map_seq_show_elem, BATCH_OPS(htab_lru), + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_lru_map_btf_id, }; /* Called from eBPF program */ @@ -1746,6 +1749,7 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } +static int htab_percpu_map_btf_id; const struct bpf_map_ops htab_percpu_map_ops = { .map_alloc_check = htab_map_alloc_check, .map_alloc = htab_map_alloc, @@ -1756,8 +1760,11 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_delete_elem = htab_map_delete_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, BATCH_OPS(htab_percpu), + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_percpu_map_btf_id, }; +static int htab_lru_percpu_map_btf_id; const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_alloc_check = htab_map_alloc_check, .map_alloc = htab_map_alloc, @@ -1768,6 +1775,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_delete_elem = htab_lru_map_delete_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, BATCH_OPS(htab_lru_percpu), + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_lru_percpu_map_btf_id, }; static int fd_htab_map_alloc_check(union bpf_attr *attr) @@ -1890,6 +1899,7 @@ static void htab_of_map_free(struct bpf_map *map) fd_htab_map_free(map); } +static int htab_of_maps_map_btf_id; const struct bpf_map_ops htab_of_maps_map_ops = { .map_alloc_check = fd_htab_map_alloc_check, .map_alloc = htab_of_map_alloc, @@ -1902,4 +1912,6 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = htab_of_map_gen_lookup, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_of_maps_map_btf_id, }; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 33d01866bcc2..51bd5a8cb01b 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -409,6 +409,7 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key, rcu_read_unlock(); } +static int cgroup_storage_map_btf_id; const struct bpf_map_ops cgroup_storage_map_ops = { .map_alloc = cgroup_storage_map_alloc, .map_free = cgroup_storage_map_free, @@ -418,6 +419,8 @@ const struct bpf_map_ops cgroup_storage_map_ops = { .map_delete_elem = cgroup_storage_delete_elem, .map_check_btf = cgroup_storage_check_btf, .map_seq_show_elem = cgroup_storage_seq_show_elem, + .map_btf_name = "bpf_cgroup_storage_map", + .map_btf_id = &cgroup_storage_map_btf_id, }; int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index c8cc4e4cf98d..1abd4e3f906d 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -735,6 +735,7 @@ static int trie_check_btf(const struct bpf_map *map, -EINVAL : 0; } +static int trie_map_btf_id; const struct bpf_map_ops trie_map_ops = { .map_alloc = trie_alloc, .map_free = trie_free, @@ -743,4 +744,6 @@ const struct bpf_map_ops trie_map_ops = { .map_update_elem = trie_update_elem, .map_delete_elem = trie_delete_elem, .map_check_btf = trie_check_btf, + .map_btf_name = "lpm_trie", + .map_btf_id = &trie_map_btf_id, }; diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 05c8e043b9d2..80c66a6d7c54 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -262,6 +262,7 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, return -EINVAL; } +static int queue_map_btf_id; const struct bpf_map_ops queue_map_ops = { .map_alloc_check = queue_stack_map_alloc_check, .map_alloc = queue_stack_map_alloc, @@ -273,8 +274,11 @@ const struct bpf_map_ops queue_map_ops = { .map_pop_elem = queue_map_pop_elem, .map_peek_elem = queue_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, + .map_btf_name = "bpf_queue_stack", + .map_btf_id = &queue_map_btf_id, }; +static int stack_map_btf_id; const struct bpf_map_ops stack_map_ops = { .map_alloc_check = queue_stack_map_alloc_check, .map_alloc = queue_stack_map_alloc, @@ -286,4 +290,6 @@ const struct bpf_map_ops stack_map_ops = { .map_pop_elem = stack_map_pop_elem, .map_peek_elem = stack_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, + .map_btf_name = "bpf_queue_stack", + .map_btf_id = &stack_map_btf_id, }; diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 21cde24386db..a09922f656e4 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -345,6 +345,7 @@ static int reuseport_array_get_next_key(struct bpf_map *map, void *key, return 0; } +static int reuseport_array_map_btf_id; const struct bpf_map_ops reuseport_array_ops = { .map_alloc_check = reuseport_array_alloc_check, .map_alloc = reuseport_array_alloc, @@ -352,4 +353,6 @@ const struct bpf_map_ops reuseport_array_ops = { .map_lookup_elem = reuseport_array_lookup_elem, .map_get_next_key = reuseport_array_get_next_key, .map_delete_elem = reuseport_array_delete_elem, + .map_btf_name = "reuseport_array", + .map_btf_id = &reuseport_array_map_btf_id, }; diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 180414bb0d3e..dbf37aff4827 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -294,6 +294,7 @@ static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp, return 0; } +static int ringbuf_map_btf_id; const struct bpf_map_ops ringbuf_map_ops = { .map_alloc = ringbuf_map_alloc, .map_free = ringbuf_map_free, @@ -303,6 +304,8 @@ const struct bpf_map_ops ringbuf_map_ops = { .map_update_elem = ringbuf_map_update_elem, .map_delete_elem = ringbuf_map_delete_elem, .map_get_next_key = ringbuf_map_get_next_key, + .map_btf_name = "bpf_ringbuf_map", + .map_btf_id = &ringbuf_map_btf_id, }; /* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself, diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 599488f25e40..27dc9b1b08a5 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -613,6 +613,7 @@ static void stack_map_free(struct bpf_map *map) put_callchain_buffers(); } +static int stack_trace_map_btf_id; const struct bpf_map_ops stack_trace_map_ops = { .map_alloc = stack_map_alloc, .map_free = stack_map_free, @@ -621,6 +622,8 @@ const struct bpf_map_ops stack_trace_map_ops = { .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_stack_map", + .map_btf_id = &stack_trace_map_btf_id, }; static int __init stack_map_init(void) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 1dae4b543243..6f921c4ddc2c 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -919,6 +919,7 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) return -ENOENT; } +static int sk_storage_map_btf_id; const struct bpf_map_ops sk_storage_map_ops = { .map_alloc_check = bpf_sk_storage_map_alloc_check, .map_alloc = bpf_sk_storage_map_alloc, @@ -928,6 +929,8 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_update_elem = bpf_fd_sk_storage_update_elem, .map_delete_elem = bpf_fd_sk_storage_delete_elem, .map_check_btf = bpf_sk_storage_map_check_btf, + .map_btf_name = "bpf_sk_storage_map", + .map_btf_id = &sk_storage_map_btf_id, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 2b884f2d562a..4c1123c749bb 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -643,6 +643,7 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = { .arg4_type = ARG_ANYTHING, }; +static int sock_map_btf_id; const struct bpf_map_ops sock_map_ops = { .map_alloc = sock_map_alloc, .map_free = sock_map_free, @@ -653,6 +654,8 @@ const struct bpf_map_ops sock_map_ops = { .map_lookup_elem = sock_map_lookup, .map_release_uref = sock_map_release_progs, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_stab", + .map_btf_id = &sock_map_btf_id, }; struct bpf_shtab_elem { @@ -1176,6 +1179,7 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = { .arg4_type = ARG_ANYTHING, }; +static int sock_hash_map_btf_id; const struct bpf_map_ops sock_hash_ops = { .map_alloc = sock_hash_alloc, .map_free = sock_hash_free, @@ -1186,6 +1190,8 @@ const struct bpf_map_ops sock_hash_ops = { .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_shtab", + .map_btf_id = &sock_hash_map_btf_id, }; static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 1dc7208c71ba..8367adbbe9df 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -254,6 +254,7 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, spin_unlock_bh(&map->lock); } +static int xsk_map_btf_id; const struct bpf_map_ops xsk_map_ops = { .map_alloc = xsk_map_alloc, .map_free = xsk_map_free, @@ -264,4 +265,6 @@ const struct bpf_map_ops xsk_map_ops = { .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "xsk_map", + .map_btf_id = &xsk_map_btf_id, }; -- cgit v1.2.3-71-gd317 From bba1dc0b55ac462d24ed1228ad49800c238cd6d7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 29 Jun 2020 21:33:39 -0700 Subject: bpf: Remove redundant synchronize_rcu. bpf_free_used_maps() or close(map_fd) will trigger map_free callback. bpf_free_used_maps() is called after bpf prog is no longer executing: bpf_prog_put->call_rcu->bpf_prog_free->bpf_free_used_maps. Hence there is no need to call synchronize_rcu() to protect map elements. Note that hash_of_maps and array_of_maps update/delete inner maps via sys_bpf() that calls maybe_wait_bpf_programs() and synchronize_rcu(). Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Paul E. McKenney Link: https://lore.kernel.org/bpf/20200630043343.53195-2-alexei.starovoitov@gmail.com --- kernel/bpf/arraymap.c | 9 --------- kernel/bpf/hashtab.c | 8 +++----- kernel/bpf/lpm_trie.c | 5 ----- kernel/bpf/queue_stack_maps.c | 7 ------- kernel/bpf/reuseport_array.c | 2 -- kernel/bpf/ringbuf.c | 7 ------- kernel/bpf/stackmap.c | 3 --- 7 files changed, 3 insertions(+), 38 deletions(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index ec5cd11032aa..c66e8273fccd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -386,13 +386,6 @@ static void array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); - /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, - * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding programs to complete - * and free the array - */ - synchronize_rcu(); - if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); @@ -546,8 +539,6 @@ static void fd_array_map_free(struct bpf_map *map) struct bpf_array *array = container_of(map, struct bpf_array, map); int i; - synchronize_rcu(); - /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) BUG_ON(array->ptrs[i] != NULL); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index acd06081d81d..d4378d7d442b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1290,12 +1290,10 @@ static void htab_map_free(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, - * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding critical sections in - * these programs to complete + /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. + * bpf_free_used_maps() is called after bpf prog is no longer executing. + * There is no need to synchronize_rcu() here to protect map elements. */ - synchronize_rcu(); /* some of free_htab_elem() callbacks for elements of this map may * not have executed. Wait for them. diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 1abd4e3f906d..44474bf3ab7a 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -589,11 +589,6 @@ static void trie_free(struct bpf_map *map) struct lpm_trie_node __rcu **slot; struct lpm_trie_node *node; - /* Wait for outstanding programs to complete - * update/lookup/delete/get_next_key and free the trie. - */ - synchronize_rcu(); - /* Always start at the root and walk down to a node that has no * children. Then free that node, nullify its reference in the parent * and start over. diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 80c66a6d7c54..44184f82916a 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -101,13 +101,6 @@ static void queue_stack_map_free(struct bpf_map *map) { struct bpf_queue_stack *qs = bpf_queue_stack(map); - /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, - * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding critical sections in - * these programs to complete - */ - synchronize_rcu(); - bpf_map_area_free(qs); } diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index a09922f656e4..3625c4fcc65c 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -96,8 +96,6 @@ static void reuseport_array_free(struct bpf_map *map) struct sock *sk; u32 i; - synchronize_rcu(); - /* * ops->map_*_elem() will not be able to access this * array now. Hence, this function only races with diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index dbf37aff4827..13a8d3967e07 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -215,13 +215,6 @@ static void ringbuf_map_free(struct bpf_map *map) { struct bpf_ringbuf_map *rb_map; - /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, - * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding critical sections in - * these programs to complete - */ - synchronize_rcu(); - rb_map = container_of(map, struct bpf_ringbuf_map, map); bpf_ringbuf_free(rb_map->rb); kfree(rb_map); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 27dc9b1b08a5..071f98d0f7c6 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -604,9 +604,6 @@ static void stack_map_free(struct bpf_map *map) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); - /* wait for bpf programs to complete before freeing stack map */ - synchronize_rcu(); - bpf_map_area_free(smap->elems); pcpu_freelist_destroy(&smap->freelist); bpf_map_area_free(smap); -- cgit v1.2.3-71-gd317 From d3cc2ab546adc6e52b65f36f7c34820d2830d0c9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:15 -0700 Subject: bpf: Implement bpf iterator for array maps The bpf iterators for array and percpu array are implemented. Similar to hash maps, for percpu array map, bpf program will receive values from all cpus. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184115.590532-1-yhs@fb.com --- kernel/bpf/arraymap.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/map_iter.c | 6 ++- 2 files changed, 142 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c66e8273fccd..8ff419b632a6 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -487,6 +487,142 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) vma->vm_pgoff + pgoff); } +struct bpf_iter_seq_array_map_info { + struct bpf_map *map; + void *percpu_value_buf; + u32 index; +}; + +static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_map *map = info->map; + struct bpf_array *array; + u32 index; + + if (info->index >= map->max_entries) + return NULL; + + if (*pos == 0) + ++*pos; + array = container_of(map, struct bpf_array, map); + index = info->index & array->index_mask; + if (info->percpu_value_buf) + return array->pptrs[index]; + return array->value + array->elem_size * index; +} + +static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_map *map = info->map; + struct bpf_array *array; + u32 index; + + ++*pos; + ++info->index; + if (info->index >= map->max_entries) + return NULL; + + array = container_of(map, struct bpf_array, map); + index = info->index & array->index_mask; + if (info->percpu_value_buf) + return array->pptrs[index]; + return array->value + array->elem_size * index; +} + +static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_iter__bpf_map_elem ctx = {}; + struct bpf_map *map = info->map; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int off = 0, cpu = 0; + void __percpu **pptr; + u32 size; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, v == NULL); + if (!prog) + return 0; + + ctx.meta = &meta; + ctx.map = info->map; + if (v) { + ctx.key = &info->index; + + if (!info->percpu_value_buf) { + ctx.value = v; + } else { + pptr = v; + size = round_up(map->value_size, 8); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(info->percpu_value_buf + off, + per_cpu_ptr(pptr, cpu), + size); + off += size; + } + ctx.value = info->percpu_value_buf; + } + } + + return bpf_iter_run_prog(prog, &ctx); +} + +static int bpf_array_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_array_map_seq_show(seq, v); +} + +static void bpf_array_map_seq_stop(struct seq_file *seq, void *v) +{ + if (!v) + (void)__bpf_array_map_seq_show(seq, NULL); +} + +static int bpf_iter_init_array_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_array_map_info *seq_info = priv_data; + struct bpf_map *map = aux->map; + void *value_buf; + u32 buf_size; + + if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + buf_size = round_up(map->value_size, 8) * num_possible_cpus(); + value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); + if (!value_buf) + return -ENOMEM; + + seq_info->percpu_value_buf = value_buf; + } + + seq_info->map = map; + return 0; +} + +static void bpf_iter_fini_array_map(void *priv_data) +{ + struct bpf_iter_seq_array_map_info *seq_info = priv_data; + + kfree(seq_info->percpu_value_buf); +} + +static const struct seq_operations bpf_array_map_seq_ops = { + .start = bpf_array_map_seq_start, + .next = bpf_array_map_seq_next, + .stop = bpf_array_map_seq_stop, + .show = bpf_array_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_array_map_seq_ops, + .init_seq_private = bpf_iter_init_array_map, + .fini_seq_private = bpf_iter_fini_array_map, + .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), +}; + static int array_map_btf_id; const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, @@ -506,6 +642,7 @@ const struct bpf_map_ops array_map_ops = { .map_update_batch = generic_map_update_batch, .map_btf_name = "bpf_array", .map_btf_id = &array_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int percpu_array_map_btf_id; @@ -521,6 +658,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_check_btf = array_map_check_btf, .map_btf_name = "bpf_array", .map_btf_id = &percpu_array_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int fd_array_map_alloc_check(union bpf_attr *attr) diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index bcb68b55bf65..fbe1f557cb88 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -106,10 +106,12 @@ static int bpf_iter_check_map(struct bpf_prog *prog, bool is_percpu = false; if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) is_percpu = true; else if (map->map_type != BPF_MAP_TYPE_HASH && - map->map_type != BPF_MAP_TYPE_LRU_HASH) + map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) return -EINVAL; key_acc_size = prog->aux->max_rdonly_access; -- cgit v1.2.3-71-gd317