From 09c02d553c49fa6965fe39e28355b62f6ad02792 Mon Sep 17 00:00:00 2001 From: Carlos Neira Date: Thu, 14 Jan 2021 11:10:36 -0300 Subject: bpf, selftests: Fold test_current_pid_tgid_new_ns into test_progs. Currently tests for bpf_get_ns_current_pid_tgid() are outside test_progs. This change folds test cases into test_progs. Changes from v11: - Fixed test failure is not detected. - Removed EXIT(3) call as it will stop test_progs execution. Signed-off-by: Carlos Neira Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210114141033.GA17348@localhost Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 3 +- .../selftests/bpf/prog_tests/ns_current_pid_tgid.c | 118 +++++++-------- .../selftests/bpf/progs/test_ns_current_pid_tgid.c | 28 ++-- .../selftests/bpf/test_current_pid_tgid_new_ns.c | 160 --------------------- 5 files changed, 70 insertions(+), 240 deletions(-) delete mode 100644 tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index f5b7ef93618c..9abca0616ec0 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -26,7 +26,6 @@ test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user test_sysctl -test_current_pid_tgid_new_ns xdping test_cpp *.skel.h diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 0552b07717b6..63d6288e419c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -36,8 +36,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sysctl \ - test_progs-no_alu32 \ - test_current_pid_tgid_new_ns + test_progs-no_alu32 # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index e74dc501b27f..31a3114906e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -1,85 +1,87 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ + +#define _GNU_SOURCE #include +#include "test_ns_current_pid_tgid.skel.h" #include #include #include #include +#include +#include +#include +#include -struct bss { - __u64 dev; - __u64 ino; - __u64 pid_tgid; - __u64 user_pid_tgid; -}; +#define STACK_SIZE (1024 * 1024) +static char child_stack[STACK_SIZE]; -void test_ns_current_pid_tgid(void) +static int test_current_pid_tgid(void *args) { - const char *probe_name = "raw_tracepoint/sys_enter"; - const char *file = "test_ns_current_pid_tgid.o"; - int err, key = 0, duration = 0; - struct bpf_link *link = NULL; - struct bpf_program *prog; - struct bpf_map *bss_map; - struct bpf_object *obj; - struct bss bss; + struct test_ns_current_pid_tgid__bss *bss; + struct test_ns_current_pid_tgid *skel; + int err = -1, duration = 0; + pid_t tgid, pid; struct stat st; - __u64 id; - - obj = bpf_object__open_file(file, NULL); - if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) - return; - err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + skel = test_ns_current_pid_tgid__open_and_load(); + if (CHECK(!skel, "skel_open_load", "failed to load skeleton\n")) goto cleanup; - bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); - if (CHECK(!bss_map, "find_bss_map", "failed\n")) + pid = syscall(SYS_gettid); + tgid = getpid(); + + err = stat("/proc/self/ns/pid", &st); + if (CHECK(err, "stat", "failed /proc/self/ns/pid: %d\n", err)) goto cleanup; - prog = bpf_object__find_program_by_title(obj, probe_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", - probe_name)) + bss = skel->bss; + bss->dev = st.st_dev; + bss->ino = st.st_ino; + bss->user_pid = 0; + bss->user_tgid = 0; + + err = test_ns_current_pid_tgid__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) goto cleanup; - memset(&bss, 0, sizeof(bss)); - pid_t tid = syscall(SYS_gettid); - pid_t pid = getpid(); + /* trigger tracepoint */ + usleep(1); + ASSERT_EQ(bss->user_pid, pid, "pid"); + ASSERT_EQ(bss->user_tgid, tgid, "tgid"); + err = 0; - id = (__u64) tid << 32 | pid; - bss.user_pid_tgid = id; +cleanup: + test_ns_current_pid_tgid__destroy(skel); - if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) { - perror("Failed to stat /proc/self/ns/pid"); - goto cleanup; - } + return err; +} - bss.dev = st.st_dev; - bss.ino = st.st_ino; +static void test_ns_current_pid_tgid_new_ns(void) +{ + int wstatus, duration = 0; + pid_t cpid; - err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); - if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err)) - goto cleanup; + /* Create a process in a new namespace, this process + * will be the init process of this new namespace hence will be pid 1. + */ + cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE, + CLONE_NEWPID | SIGCHLD, NULL); - link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", - PTR_ERR(link))) { - link = NULL; - goto cleanup; - } + if (CHECK(cpid == -1, "clone", strerror(errno))) + return; - /* trigger some syscalls */ - usleep(1); + if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno))) + return; - err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); - if (CHECK(err, "set_bss", "failed to get bss : %d\n", err)) - goto cleanup; + if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed")) + return; +} - if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", - "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) - goto cleanup; -cleanup: - bpf_link__destroy(link); - bpf_object__close(obj); +void test_ns_current_pid_tgid(void) +{ + if (test__start_subtest("ns_current_pid_tgid_root_ns")) + test_current_pid_tgid(NULL); + if (test__start_subtest("ns_current_pid_tgid_new_ns")) + test_ns_current_pid_tgid_new_ns(); } diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c index 1dca70a6de2f..0763d49f9c42 100644 --- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c @@ -5,31 +5,21 @@ #include #include -static volatile struct { - __u64 dev; - __u64 ino; - __u64 pid_tgid; - __u64 user_pid_tgid; -} res; +__u64 user_pid = 0; +__u64 user_tgid = 0; +__u64 dev = 0; +__u64 ino = 0; -SEC("raw_tracepoint/sys_enter") -int trace(void *ctx) +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int handler(const void *ctx) { - __u64 ns_pid_tgid, expected_pid; struct bpf_pidns_info nsdata; - __u32 key = 0; - if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata, - sizeof(struct bpf_pidns_info))) + if (bpf_get_ns_current_pid_tgid(dev, ino, &nsdata, sizeof(struct bpf_pidns_info))) return 0; - ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid; - expected_pid = res.user_pid_tgid; - - if (expected_pid != ns_pid_tgid) - return 0; - - res.pid_tgid = ns_pid_tgid; + user_pid = nsdata.pid; + user_tgid = nsdata.tgid; return 0; } diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c deleted file mode 100644 index ec53b1ef90d2..000000000000 --- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c +++ /dev/null @@ -1,160 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include "test_progs.h" - -#define CHECK_NEWNS(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s:FAIL:%s ", __func__, tag); \ - printf(format); \ - } else { \ - printf("%s:PASS:%s\n", __func__, tag); \ - } \ - __ret; \ -}) - -struct bss { - __u64 dev; - __u64 ino; - __u64 pid_tgid; - __u64 user_pid_tgid; -}; - -int main(int argc, char **argv) -{ - pid_t pid; - int exit_code = 1; - struct stat st; - - printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n"); - - if (stat("/proc/self/ns/pid", &st)) { - perror("stat failed on /proc/self/ns/pid ns\n"); - printf("%s:FAILED\n", argv[0]); - return exit_code; - } - - if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS), - "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno)) - return exit_code; - - pid = fork(); - if (pid == -1) { - perror("Fork() failed\n"); - printf("%s:FAILED\n", argv[0]); - return exit_code; - } - - if (pid > 0) { - int status; - - usleep(5); - waitpid(pid, &status, 0); - return 0; - } else { - - pid = fork(); - if (pid == -1) { - perror("Fork() failed\n"); - printf("%s:FAILED\n", argv[0]); - return exit_code; - } - - if (pid > 0) { - int status; - waitpid(pid, &status, 0); - return 0; - } else { - if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL), - "Unmounting proc", "Cannot umount proc! errno=%d\n", errno)) - return exit_code; - - if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL), - "Mounting proc", "Cannot mount proc! errno=%d\n", errno)) - return exit_code; - - const char *probe_name = "raw_tracepoint/sys_enter"; - const char *file = "test_ns_current_pid_tgid.o"; - struct bpf_link *link = NULL; - struct bpf_program *prog; - struct bpf_map *bss_map; - struct bpf_object *obj; - int exit_code = 1; - int err, key = 0; - struct bss bss; - struct stat st; - __u64 id; - - obj = bpf_object__open_file(file, NULL); - if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) - return exit_code; - - err = bpf_object__load(obj); - if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno)) - goto cleanup; - - bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); - if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n")) - goto cleanup; - - prog = bpf_object__find_program_by_title(obj, probe_name); - if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n", - probe_name)) - goto cleanup; - - memset(&bss, 0, sizeof(bss)); - pid_t tid = syscall(SYS_gettid); - pid_t pid = getpid(); - - id = (__u64) tid << 32 | pid; - bss.user_pid_tgid = id; - - if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st), - "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno)) - goto cleanup; - - bss.dev = st.st_dev; - bss.ino = st.st_ino; - - err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); - if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err)) - goto cleanup; - - link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); - if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n", - PTR_ERR(link))) { - link = NULL; - goto cleanup; - } - - /* trigger some syscalls */ - usleep(1); - - err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); - if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err)) - goto cleanup; - - if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", - "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) - goto cleanup; - - exit_code = 0; - printf("%s:PASS\n", argv[0]); -cleanup: - if (!link) { - bpf_link__destroy(link); - link = NULL; - } - bpf_object__close(obj); - } - } - return 0; -} -- cgit v1.2.3-71-gd317 From 79d1b684e21533bd417df50704a9692830eb8358 Mon Sep 17 00:00:00 2001 From: Gary Lin Date: Tue, 19 Jan 2021 18:25:01 +0800 Subject: selftests/bpf: Add verifier tests for x64 jit jump padding There are 3 tests added into verifier's jit tests to trigger x64 jit jump padding. The first test can be represented as the following assembly code: 1: bpf_call bpf_get_prandom_u32 2: if r0 == 1 goto pc+128 3: if r0 == 2 goto pc+128 ... 129: if r0 == 128 goto pc+128 130: goto pc+128 131: goto pc+127 ... 256: goto pc+2 257: goto pc+1 258: r0 = 1 259: ret We first store a random number to r0 and add the corresponding conditional jumps (2~129) to make verifier believe that those jump instructions from 130 to 257 are reachable. When the program is sent to x64 jit, it starts to optimize out the NOP jumps backwards from 257. Since there are 128 such jumps, the program easily reaches 15 passes and triggers jump padding. Here is the x64 jit code of the first test: 0: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0] 5: 66 90 xchg ax,ax 7: 55 push rbp 8: 48 89 e5 mov rbp,rsp b: e8 4c 90 75 e3 call 0xffffffffe375905c 10: 48 83 f8 01 cmp rax,0x1 14: 0f 84 fe 04 00 00 je 0x518 1a: 48 83 f8 02 cmp rax,0x2 1e: 0f 84 f9 04 00 00 je 0x51d ... f6: 48 83 f8 18 cmp rax,0x18 fa: 0f 84 8b 04 00 00 je 0x58b 100: 48 83 f8 19 cmp rax,0x19 104: 0f 84 86 04 00 00 je 0x590 10a: 48 83 f8 1a cmp rax,0x1a 10e: 0f 84 81 04 00 00 je 0x595 ... 500: 0f 84 83 01 00 00 je 0x689 506: 48 81 f8 80 00 00 00 cmp rax,0x80 50d: 0f 84 76 01 00 00 je 0x689 513: e9 71 01 00 00 jmp 0x689 518: e9 6c 01 00 00 jmp 0x689 ... 5fe: e9 86 00 00 00 jmp 0x689 603: e9 81 00 00 00 jmp 0x689 608: 0f 1f 00 nop DWORD PTR [rax] 60b: eb 7c jmp 0x689 60d: eb 7a jmp 0x689 ... 683: eb 04 jmp 0x689 685: eb 02 jmp 0x689 687: 66 90 xchg ax,ax 689: b8 01 00 00 00 mov eax,0x1 68e: c9 leave 68f: c3 ret As expected, a 3 bytes NOPs is inserted at 608 due to the transition from imm32 jmp to imm8 jmp. A 2 bytes NOPs is also inserted at 687 to replace a NOP jump. The second test case is tricky. Here is the assembly code: 1: bpf_call bpf_get_prandom_u32 2: if r0 == 1 goto pc+2048 3: if r0 == 2 goto pc+2048 ... 2049: if r0 == 2048 goto pc+2048 2050: goto pc+2048 2051: goto pc+16 2052: goto pc+15 ... 2064: goto pc+3 2065: goto pc+2 2066: goto pc+1 ... [repeat "goto pc+16".."goto pc+1" 127 times] ... 4099: r0 = 2 4100: ret There are 4 major parts of the program. 1) 1~2049: Those are instructions to make 2050~4098 reachable. Some of them also could generate the padding for jmp_cond. 2) 2050: This is the target instruction for the imm32 nop jmp padding. 3) 2051~4098: The repeated "goto 1~16" instructions are designed to be consumed by the nop jmp optimization. In the end, those instrucitons become 128 continuous 0 offset jmp and are optimized out in 1 pass, and this make insn 2050 an imm32 nop jmp in the next pass, so that we can trigger the 5 bytes padding. 4) 4099~4100: Those are the instructions to end the program. The x64 jit code is like this: 0: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0] 5: 66 90 xchg ax,ax 7: 55 push rbp 8: 48 89 e5 mov rbp,rsp b: e8 bc 7b d5 d3 call 0xffffffffd3d57bcc 10: 48 83 f8 01 cmp rax,0x1 14: 0f 84 7e 66 00 00 je 0x6698 1a: 48 83 f8 02 cmp rax,0x2 1e: 0f 84 74 66 00 00 je 0x6698 24: 48 83 f8 03 cmp rax,0x3 28: 0f 84 6a 66 00 00 je 0x6698 2e: 48 83 f8 04 cmp rax,0x4 32: 0f 84 60 66 00 00 je 0x6698 38: 48 83 f8 05 cmp rax,0x5 3c: 0f 84 56 66 00 00 je 0x6698 42: 48 83 f8 06 cmp rax,0x6 46: 0f 84 4c 66 00 00 je 0x6698 ... 666c: 48 81 f8 fe 07 00 00 cmp rax,0x7fe 6673: 0f 1f 40 00 nop DWORD PTR [rax+0x0] 6677: 74 1f je 0x6698 6679: 48 81 f8 ff 07 00 00 cmp rax,0x7ff 6680: 0f 1f 40 00 nop DWORD PTR [rax+0x0] 6684: 74 12 je 0x6698 6686: 48 81 f8 00 08 00 00 cmp rax,0x800 668d: 0f 1f 40 00 nop DWORD PTR [rax+0x0] 6691: 74 05 je 0x6698 6693: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0] 6698: b8 02 00 00 00 mov eax,0x2 669d: c9 leave 669e: c3 ret Since insn 2051~4098 are optimized out right before the padding pass, there are several conditional jumps from the first part are replaced with imm8 jmp_cond, and this triggers the 4 bytes padding, for example at 6673, 6680, and 668d. On the other hand, Insn 2050 is replaced with the 5 bytes nops at 6693. The third test is to invoke the first and second tests as subprogs to test bpf2bpf. Per the system log, there was one more jit happened with only one pass and the same jit code was produced. v4: - Add the second test case which triggers jmp_cond padding and imm32 nop jmp padding. - Add the new test case as another subprog Signed-off-by: Gary Lin Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210119102501.511-4-glin@suse.com --- tools/testing/selftests/bpf/test_verifier.c | 72 +++++++++++++++++++++++++++++ tools/testing/selftests/bpf/verifier/jit.c | 24 ++++++++++ 2 files changed, 96 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index f8569f04064b..59bfa6201d1d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -297,6 +297,78 @@ static void bpf_fill_scale(struct bpf_test *self) } } +static int bpf_fill_torturous_jumps_insn_1(struct bpf_insn *insn) +{ + unsigned int len = 259, hlen = 128; + int i; + + insn[0] = BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32); + for (i = 1; i <= hlen; i++) { + insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, i, hlen); + insn[i + hlen] = BPF_JMP_A(hlen - i); + } + insn[len - 2] = BPF_MOV64_IMM(BPF_REG_0, 1); + insn[len - 1] = BPF_EXIT_INSN(); + + return len; +} + +static int bpf_fill_torturous_jumps_insn_2(struct bpf_insn *insn) +{ + unsigned int len = 4100, jmp_off = 2048; + int i, j; + + insn[0] = BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32); + for (i = 1; i <= jmp_off; i++) { + insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, i, jmp_off); + } + insn[i++] = BPF_JMP_A(jmp_off); + for (; i <= jmp_off * 2 + 1; i+=16) { + for (j = 0; j < 16; j++) { + insn[i + j] = BPF_JMP_A(16 - j - 1); + } + } + + insn[len - 2] = BPF_MOV64_IMM(BPF_REG_0, 2); + insn[len - 1] = BPF_EXIT_INSN(); + + return len; +} + +static void bpf_fill_torturous_jumps(struct bpf_test *self) +{ + struct bpf_insn *insn = self->fill_insns; + int i = 0; + + switch (self->retval) { + case 1: + self->prog_len = bpf_fill_torturous_jumps_insn_1(insn); + return; + case 2: + self->prog_len = bpf_fill_torturous_jumps_insn_2(insn); + return; + case 3: + /* main */ + insn[i++] = BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4); + insn[i++] = BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 262); + insn[i++] = BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0); + insn[i++] = BPF_MOV64_IMM(BPF_REG_0, 3); + insn[i++] = BPF_EXIT_INSN(); + + /* subprog 1 */ + i += bpf_fill_torturous_jumps_insn_1(insn + i); + + /* subprog 2 */ + i += bpf_fill_torturous_jumps_insn_2(insn + i); + + self->prog_len = i; + return; + default: + self->prog_len = 0; + break; + } +} + /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ #define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index c33adf344fae..df215e004566 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -105,3 +105,27 @@ .result = ACCEPT, .retval = 2, }, +{ + "jit: torturous jumps, imm8 nop jmp and pure jump padding", + .insns = { }, + .fill_helper = bpf_fill_torturous_jumps, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "jit: torturous jumps, imm32 nop jmp and jmp_cond padding", + .insns = { }, + .fill_helper = bpf_fill_torturous_jumps, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, +}, +{ + "jit: torturous jumps in subprog", + .insns = { }, + .fill_helper = bpf_fill_torturous_jumps, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 3, +}, -- cgit v1.2.3-71-gd317 From 407be92206d54517765e028c8b79032eb8f8ac86 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 19 Jan 2021 12:22:37 +0000 Subject: selftests: bpf: Add a new test for bare tracepoints Reuse module_attach infrastructure to add a new bare tracepoint to check we can attach to it as a raw tracepoint. Signed-off-by: Qais Yousef Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210119122237.2426878-3-qais.yousef@arm.com --- .../selftests/bpf/bpf_testmod/bpf_testmod-events.h | 6 +++++ .../selftests/bpf/bpf_testmod/bpf_testmod.c | 21 ++++++++++++++++- .../selftests/bpf/bpf_testmod/bpf_testmod.h | 6 +++++ .../selftests/bpf/prog_tests/module_attach.c | 27 ++++++++++++++++++++++ .../selftests/bpf/progs/test_module_attach.c | 10 ++++++++ 5 files changed, 69 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index b83ea448bc79..89c6d58e5dd6 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -28,6 +28,12 @@ TRACE_EVENT(bpf_testmod_test_read, __entry->pid, __entry->comm, __entry->off, __entry->len) ); +/* A bare tracepoint with no event associated with it */ +DECLARE_TRACE(bpf_testmod_test_write_bare, + TP_PROTO(struct task_struct *task, struct bpf_testmod_test_write_ctx *ctx), + TP_ARGS(task, ctx) +); + #endif /* _BPF_TESTMOD_EVENTS_H */ #undef TRACE_INCLUDE_PATH diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 0b991e115d1f..141d8da687d2 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -31,9 +31,28 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, EXPORT_SYMBOL(bpf_testmod_test_read); ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO); +noinline ssize_t +bpf_testmod_test_write(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + struct bpf_testmod_test_write_ctx ctx = { + .buf = buf, + .off = off, + .len = len, + }; + + trace_bpf_testmod_test_write_bare(current, &ctx); + + return -EIO; /* always fail */ +} +EXPORT_SYMBOL(bpf_testmod_test_write); +ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO); + static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { - .attr = { .name = "bpf_testmod", .mode = 0444, }, + .attr = { .name = "bpf_testmod", .mode = 0666, }, .read = bpf_testmod_test_read, + .write = bpf_testmod_test_write, }; static int bpf_testmod_init(void) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index b81adfedb4f6..b3892dc40111 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -11,4 +11,10 @@ struct bpf_testmod_test_read_ctx { size_t len; }; +struct bpf_testmod_test_write_ctx { + char *buf; + loff_t off; + size_t len; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 50796b651f72..5bc53d53d86e 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -21,9 +21,34 @@ static int trigger_module_test_read(int read_sz) return 0; } +static int trigger_module_test_write(int write_sz) +{ + int fd, err; + char *buf = malloc(write_sz); + + if (!buf) + return -ENOMEM; + + memset(buf, 'a', write_sz); + buf[write_sz-1] = '\0'; + + fd = open("/sys/kernel/bpf_testmod", O_WRONLY); + err = -errno; + if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) { + free(buf); + return err; + } + + write(fd, buf, write_sz); + close(fd); + free(buf); + return 0; +} + void test_module_attach(void) { const int READ_SZ = 456; + const int WRITE_SZ = 457; struct test_module_attach* skel; struct test_module_attach__bss *bss; int err; @@ -48,8 +73,10 @@ void test_module_attach(void) /* trigger tracepoint */ ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read"); + ASSERT_OK(trigger_module_test_write(WRITE_SZ), "trigger_write"); ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp"); + ASSERT_EQ(bss->raw_tp_bare_write_sz, WRITE_SZ, "raw_tp_bare"); ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf"); ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry"); ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual"); diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index efd1e287ac17..bd37ceec5587 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -17,6 +17,16 @@ int BPF_PROG(handle_raw_tp, return 0; } +__u32 raw_tp_bare_write_sz = 0; + +SEC("raw_tp/bpf_testmod_test_write_bare") +int BPF_PROG(handle_raw_tp_bare, + struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx) +{ + raw_tp_bare_write_sz = BPF_CORE_READ(write_ctx, len); + return 0; +} + __u32 tp_btf_read_sz = 0; SEC("tp_btf/bpf_testmod_test_read") -- cgit v1.2.3-71-gd317 From 13ca51d5eb358edcb673afccb48c3440b9fda21b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 19 Jan 2021 07:35:18 -0800 Subject: bpf: Permit size-0 datasec llvm patch https://reviews.llvm.org/D84002 permitted to emit empty rodata datasec if the elf .rodata section contains read-only data from local variables. These local variables will be not emitted as BTF_KIND_VARs since llvm converted these local variables as static variables with private linkage without debuginfo types. Such an empty rodata datasec will make skeleton code generation easy since for skeleton a rodata struct will be generated if there is a .rodata elf section. The existence of a rodata btf datasec is also consistent with the existence of a rodata map created by libbpf. The btf with such an empty rodata datasec will fail in the kernel though as kernel will reject a datasec with zero vlen and zero size. For example, for the below code, int sys_enter(void *ctx) { int fmt[6] = {1, 2, 3, 4, 5, 6}; int dst[6]; bpf_probe_read(dst, sizeof(dst), fmt); return 0; } We got the below btf (bpftool btf dump ./test.o): [1] PTR '(anon)' type_id=0 [2] FUNC_PROTO '(anon)' ret_type_id=3 vlen=1 'ctx' type_id=1 [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED [4] FUNC 'sys_enter' type_id=2 linkage=global [5] INT 'char' size=1 bits_offset=0 nr_bits=8 encoding=SIGNED [6] ARRAY '(anon)' type_id=5 index_type_id=7 nr_elems=4 [7] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none) [8] VAR '_license' type_id=6, linkage=global-alloc [9] DATASEC '.rodata' size=0 vlen=0 [10] DATASEC 'license' size=0 vlen=1 type_id=8 offset=0 size=4 When loading the ./test.o to the kernel with bpftool, we see the following error: libbpf: Error loading BTF: Invalid argument(22) libbpf: magic: 0xeb9f ... [6] ARRAY (anon) type_id=5 index_type_id=7 nr_elems=4 [7] INT __ARRAY_SIZE_TYPE__ size=4 bits_offset=0 nr_bits=32 encoding=(none) [8] VAR _license type_id=6 linkage=1 [9] DATASEC .rodata size=24 vlen=0 vlen == 0 libbpf: Error loading .BTF into kernel: -22. BTF is optional, ignoring. Basically, libbpf changed .rodata datasec size to 24 since elf .rodata section size is 24. The kernel then rejected the BTF since vlen = 0. Note that the above kernel verifier failure can be worked around with changing local variable "fmt" to a static or global, optionally const, variable. This patch permits a datasec with vlen = 0 in kernel. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210119153519.3901963-1-yhs@fb.com --- kernel/bpf/btf.c | 5 ----- tools/testing/selftests/bpf/prog_tests/btf.c | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8962f988514f..756a93f534b6 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3540,11 +3540,6 @@ static s32 btf_datasec_check_meta(struct btf_verifier_env *env, return -EINVAL; } - if (!btf_type_vlen(t)) { - btf_verifier_log_type(env, t, "vlen == 0"); - return -EINVAL; - } - if (!t->size) { btf_verifier_log_type(env, t, "size == 0"); return -EINVAL; diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8ae97e2a4b9d..055d2c0486ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3509,6 +3509,27 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 3 /* arr_t */, .max_entries = 4, }, +/* + * elf .rodata section size 4 and btf .rodata section vlen 0. + */ +{ + .descr = "datasec: vlen == 0", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* .rodata section */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 0), 4), + /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0.rodata"), + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, }; /* struct btf_raw_test raw_tests[] */ -- cgit v1.2.3-71-gd317 From 9cacf81f8161111db25f98e78a7a0e32ae142b3f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 15 Jan 2021 08:34:59 -0800 Subject: bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE Add custom implementation of getsockopt hook for TCP_ZEROCOPY_RECEIVE. We skip generic hooks for TCP_ZEROCOPY_RECEIVE and have a custom call in do_tcp_getsockopt using the on-stack data. This removes 3% overhead for locking/unlocking the socket. Without this patch: 3.38% 0.07% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt | --3.30%--__cgroup_bpf_run_filter_getsockopt | --0.81%--__kmalloc With the patch applied: 0.52% 0.12% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt_kern Note, exporting uapi/tcp.h requires removing netinet/tcp.h from test_progs.h because those headers have confliciting definitions. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210115163501.805133-2-sdf@google.com --- include/linux/bpf-cgroup.h | 27 +- include/linux/indirect_call_wrapper.h | 6 + include/net/sock.h | 2 + include/net/tcp.h | 1 + kernel/bpf/cgroup.c | 46 +++ net/ipv4/tcp.c | 14 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + net/socket.c | 3 + tools/include/uapi/linux/tcp.h | 357 +++++++++++++++++++++ .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 1 + .../selftests/bpf/prog_tests/cls_redirect.c | 1 + .../selftests/bpf/prog_tests/sockmap_basic.c | 1 + .../testing/selftests/bpf/prog_tests/sockopt_sk.c | 28 ++ tools/testing/selftests/bpf/progs/sockopt_sk.c | 23 +- tools/testing/selftests/bpf/test_progs.h | 1 - 16 files changed, 506 insertions(+), 7 deletions(-) create mode 100644 tools/include/uapi/linux/tcp.h (limited to 'tools') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 72e69a0e1e8c..bcb2915e6124 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -147,6 +147,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, int __user *optlen, int max_optlen, int retval); +int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, + int optname, void *optval, + int *optlen, int retval); + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -364,10 +368,23 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, ({ \ int __ret = retval; \ if (cgroup_bpf_enabled) \ - __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \ - optname, optval, \ - optlen, max_optlen, \ - retval); \ + if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ + !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ + tcp_bpf_bypass_getsockopt, \ + level, optname)) \ + __ret = __cgroup_bpf_run_filter_getsockopt( \ + sock, level, optname, optval, optlen, \ + max_optlen, retval); \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ + optlen, retval) \ +({ \ + int __ret = retval; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_getsockopt_kern( \ + sock, level, optname, optval, optlen, retval); \ __ret; \ }) @@ -452,6 +469,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) +#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ + optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 54c02c84906a..cfcfef37b2f1 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -60,4 +60,10 @@ #define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__) #endif +#if IS_ENABLED(CONFIG_INET) +#define INDIRECT_CALL_INET_1(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_INET_1(f, f1, ...) f(__VA_ARGS__) +#endif + #endif diff --git a/include/net/sock.h b/include/net/sock.h index 129d200bccb4..7644ea64a376 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1174,6 +1174,8 @@ struct proto { int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); + bool (*bpf_bypass_getsockopt)(int level, + int optname); void (*release_cb)(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index 78d13c88720f..4bb42fb19711 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -403,6 +403,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +bool tcp_bpf_bypass_getsockopt(int level, int optname); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 96555a8a2c54..416e7738981b 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1486,6 +1486,52 @@ out: sockopt_free_buf(&ctx); return ret; } + +int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, + int optname, void *optval, + int *optlen, int retval) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_sockopt_kern ctx = { + .sk = sk, + .level = level, + .optname = optname, + .retval = retval, + .optlen = *optlen, + .optval = optval, + .optval_end = optval + *optlen, + }; + int ret; + + /* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy + * user data back into BPF buffer when reval != 0. This is + * done as an optimization to avoid extra copy, assuming + * kernel won't populate the data in case of an error. + * Here we always pass the data and memset() should + * be called if that data shouldn't be "exported". + */ + + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], + &ctx, BPF_PROG_RUN); + if (!ret) + return -EPERM; + + if (ctx.optlen > *optlen) + return -EFAULT; + + /* BPF programs only allowed to set retval to 0, not some + * arbitrary value. + */ + if (ctx.retval != 0 && ctx.retval != retval) + return -EFAULT; + + /* BPF programs can shrink the buffer, export the modifications. + */ + if (ctx.optlen != 0) + *optlen = ctx.optlen; + + return ctx.retval; +} #endif static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 856ae516ac18..26aa923cf522 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4099,6 +4099,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); + err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, + &zc, &len, err); release_sock(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, err)) goto zerocopy_rcv_sk_err; @@ -4133,6 +4135,18 @@ zerocopy_rcv_out: return 0; } +bool tcp_bpf_bypass_getsockopt(int level, int optname) +{ + /* TCP do_tcp_getsockopt has optimized getsockopt implementation + * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE. + */ + if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE) + return true; + + return false; +} +EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt); + int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 777306b5bc22..62b6fd385a47 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2793,6 +2793,7 @@ struct proto tcp_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0e1509b02cb3..8539715ff035 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2121,6 +2121,7 @@ struct proto tcpv6_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, diff --git a/net/socket.c b/net/socket.c index 33e8b6c4e1d3..7f0617ab5437 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2126,6 +2126,9 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, return __sys_setsockopt(fd, level, optname, optval, optlen); } +INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, + int optname)); + /* * Get a socket option. Because we don't know the option lengths we have * to pass a user mode parameter for the protocols to sort out. diff --git a/tools/include/uapi/linux/tcp.h b/tools/include/uapi/linux/tcp.h new file mode 100644 index 000000000000..13ceeb395eb8 --- /dev/null +++ b/tools/include/uapi/linux/tcp.h @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol. + * + * Version: @(#)tcp.h 1.0.2 04/28/93 + * + * Author: Fred N. van Kempen, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _UAPI_LINUX_TCP_H +#define _UAPI_LINUX_TCP_H + +#include +#include +#include + +struct tcphdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; +#else +#error "Adjust your defines" +#endif + __be16 window; + __sum16 check; + __be16 urg_ptr; +}; + +/* + * The union cast uses a gcc extension to avoid aliasing problems + * (union is compatible to any of its members) + * This means this part of the code is -fstrict-aliasing safe now. + */ +union tcp_word_hdr { + struct tcphdr hdr; + __be32 words[5]; +}; + +#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) + +enum { + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) +}; + +/* + * TCP general constants + */ +#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */ +#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */ + +/* TCP socket options */ +#define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */ +#define TCP_MAXSEG 2 /* Limit MSS */ +#define TCP_CORK 3 /* Never send partially complete segments */ +#define TCP_KEEPIDLE 4 /* Start keeplives after this period */ +#define TCP_KEEPINTVL 5 /* Interval between keepalives */ +#define TCP_KEEPCNT 6 /* Number of keepalives before death */ +#define TCP_SYNCNT 7 /* Number of SYN retransmits */ +#define TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ +#define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ +#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ +#define TCP_INFO 11 /* Information about this connection. */ +#define TCP_QUICKACK 12 /* Block/reenable quick acks */ +#define TCP_CONGESTION 13 /* Congestion control algorithm */ +#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ +#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ +#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ +#define TCP_REPAIR 19 /* TCP sock is under repair right now */ +#define TCP_REPAIR_QUEUE 20 +#define TCP_QUEUE_SEQ 21 +#define TCP_REPAIR_OPTIONS 22 +#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ +#define TCP_TIMESTAMP 24 +#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ +#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ +#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ +#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ +#define TCP_ULP 31 /* Attach a ULP to a TCP connection */ +#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ +#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ +#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */ +#define TCP_ZEROCOPY_RECEIVE 35 +#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ + +#define TCP_CM_INQ TCP_INQ + +#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ + + +#define TCP_REPAIR_ON 1 +#define TCP_REPAIR_OFF 0 +#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ + +struct tcp_repair_opt { + __u32 opt_code; + __u32 opt_val; +}; + +struct tcp_repair_window { + __u32 snd_wl1; + __u32 snd_wnd; + __u32 max_window; + + __u32 rcv_wnd; + __u32 rcv_wup; +}; + +enum { + TCP_NO_QUEUE, + TCP_RECV_QUEUE, + TCP_SEND_QUEUE, + TCP_QUEUES_NR, +}; + +/* why fastopen failed from client perspective */ +enum tcp_fastopen_client_fail { + TFO_STATUS_UNSPEC, /* catch-all */ + TFO_COOKIE_UNAVAILABLE, /* if not in TFO_CLIENT_NO_COOKIE mode */ + TFO_DATA_NOT_ACKED, /* SYN-ACK did not ack SYN data */ + TFO_SYN_RETRANSMITTED, /* SYN-ACK did not ack SYN data after timeout */ +}; + +/* for TCP_INFO socket option */ +#define TCPI_OPT_TIMESTAMPS 1 +#define TCPI_OPT_SACK 2 +#define TCPI_OPT_WSCALE 4 +#define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ +#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ +#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ + +/* + * Sender's congestion state indicating normal or abnormal situations + * in the last round of packets sent. The state is driven by the ACK + * information and timer events. + */ +enum tcp_ca_state { + /* + * Nothing bad has been observed recently. + * No apparent reordering, packet loss, or ECN marks. + */ + TCP_CA_Open = 0, +#define TCPF_CA_Open (1< +#include #include #include "bpf_dctcp.skel.h" #include "bpf_cubic.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 9781d85cb223..e075d03ab630 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -7,6 +7,7 @@ #include #include +#include #include diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 85f73261fab0..b8b48cac2ac3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Cloudflare #include +#include #include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index b25c9c45c148..d5b44b135c00 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -2,6 +2,12 @@ #include #include "cgroup_helpers.h" +#include + +#ifndef SOL_TCP +#define SOL_TCP IPPROTO_TCP +#endif + #define SOL_CUSTOM 0xdeadbeef static int getsetsockopt(void) @@ -11,6 +17,7 @@ static int getsetsockopt(void) char u8[4]; __u32 u32; char cc[16]; /* TCP_CA_NAME_MAX */ + struct tcp_zerocopy_receive zc; } buf = {}; socklen_t optlen; char *big_buf = NULL; @@ -154,6 +161,27 @@ static int getsetsockopt(void) goto err; } + /* TCP_ZEROCOPY_RECEIVE triggers */ + memset(&buf, 0, sizeof(buf)); + optlen = sizeof(buf.zc); + err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); + if (err) { + log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", + err, errno); + goto err; + } + + memset(&buf, 0, sizeof(buf)); + buf.zc.address = 12345; /* rejected by BPF */ + optlen = sizeof(buf.zc); + errno = 0; + err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); + if (errno != EPERM) { + log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", + err, errno); + goto err; + } + free(big_buf); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index 712df7b49cb1..d3597f81e6e9 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include -#include +#include #include +#include #include char _license[] SEC("license") = "GPL"; @@ -12,6 +12,10 @@ __u32 _version SEC("version") = 1; #define PAGE_SIZE 4096 #endif +#ifndef SOL_TCP +#define SOL_TCP IPPROTO_TCP +#endif + #define SOL_CUSTOM 0xdeadbeef struct sockopt_sk { @@ -57,6 +61,21 @@ int _getsockopt(struct bpf_sockopt *ctx) return 1; } + if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { + /* Verify that TCP_ZEROCOPY_RECEIVE triggers. + * It has a custom implementation for performance + * reasons. + */ + + if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) + return 0; /* EPERM, bounds check */ + + if (((struct tcp_zerocopy_receive *)optval)->address != 0) + return 0; /* EPERM, unexpected data */ + + return 1; + } + if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index e49e2fdde942..f7c2fd89d01a 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -16,7 +16,6 @@ typedef __u16 __sum16; #include #include #include -#include #include #include #include -- cgit v1.2.3-71-gd317 From 6095d5a271ad6dce30489526771a9040d78e0895 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 21 Jan 2021 21:22:03 +0100 Subject: libbpf: Use string table index from index table if needed For very large ELF objects (with many sections), we could get special value SHN_XINDEX (65535) for elf object's string table index - e_shstrndx. Call elf_getshdrstrndx to get the proper string table index, instead of reading it directly from ELF header. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210121202203.9346-4-jolsa@kernel.org --- tools/lib/bpf/btf.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9970a288dda5..d9c10830d749 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -858,6 +858,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, Elf_Scn *scn = NULL; Elf *elf = NULL; GElf_Ehdr ehdr; + size_t shstrndx; if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", path); @@ -882,7 +883,14 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, pr_warn("failed to get EHDR from %s\n", path); goto done; } - if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { + + if (elf_getshdrstrndx(elf, &shstrndx)) { + pr_warn("failed to get section names section index for %s\n", + path); + goto done; + } + + if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) { pr_warn("failed to get e_shstrndx from %s\n", path); goto done; } @@ -897,7 +905,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, idx, path); goto done; } - name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + name = elf_strptr(elf, shstrndx, sh.sh_name); if (!name) { pr_warn("failed to get section(%d) name from %s\n", idx, path); -- cgit v1.2.3-71-gd317 From 443edcefb8213155c0da22c4a999f4a49858fa39 Mon Sep 17 00:00:00 2001 From: Junlin Yang Date: Thu, 21 Jan 2021 20:23:09 +0800 Subject: selftest/bpf: Fix typo Change 'exeeds' to 'exceeds'. Signed-off-by: Junlin Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210121122309.1501-1-angkery@163.com --- tools/testing/selftests/bpf/prog_tests/btf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 055d2c0486ed..6a7ee7420701 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -914,7 +914,7 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Member exceeds struct_size", }, -/* Test member exeeds the size of struct +/* Test member exceeds the size of struct * * struct A { * int m; @@ -948,7 +948,7 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Member exceeds struct_size", }, -/* Test member exeeds the size of struct +/* Test member exceeds the size of struct * * struct A { * int m; -- cgit v1.2.3-71-gd317 From 78ed4045914c63054f2f377471b5a94f7006d61e Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 11:53:51 +0100 Subject: libbpf, xsk: Select AF_XDP BPF program based on kernel version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add detection for kernel version, and adapt the BPF program based on kernel support. This way, users will get the best possible performance from the BPF program. Signed-off-by: Björn Töpel Signed-off-by: Marek Majtyka Signed-off-by: Daniel Borkmann Reviewed-by: Maciej Fijalkowski Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20210122105351.11751-4-bjorn.topel@gmail.com --- tools/lib/bpf/xsk.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e3e41ceeb1bc..20500fb1f17e 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -46,6 +46,11 @@ #define PF_XDP AF_XDP #endif +enum xsk_prog { + XSK_PROG_FALLBACK, + XSK_PROG_REDIRECT_FLAGS, +}; + struct xsk_umem { struct xsk_ring_prod *fill_save; struct xsk_ring_cons *comp_save; @@ -351,6 +356,54 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) +static enum xsk_prog get_xsk_prog(void) +{ + enum xsk_prog detected = XSK_PROG_FALLBACK; + struct bpf_load_program_attr prog_attr; + struct bpf_create_map_attr map_attr; + __u32 size_out, retval, duration; + char data_in = 0, data_out; + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, XDP_PASS), + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd, ret; + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_XSKMAP; + map_attr.key_size = sizeof(int); + map_attr.value_size = sizeof(int); + map_attr.max_entries = 1; + + map_fd = bpf_create_map_xattr(&map_attr); + if (map_fd < 0) + return detected; + + insns[0].imm = map_fd; + + memset(&prog_attr, 0, sizeof(prog_attr)); + prog_attr.prog_type = BPF_PROG_TYPE_XDP; + prog_attr.insns = insns; + prog_attr.insns_cnt = ARRAY_SIZE(insns); + prog_attr.license = "GPL"; + + prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + if (prog_fd < 0) { + close(map_fd); + return detected; + } + + ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration); + if (!ret && retval == XDP_PASS) + detected = XSK_PROG_REDIRECT_FLAGS; + close(prog_fd); + close(map_fd); + return detected; +} + static int xsk_load_xdp_prog(struct xsk_socket *xsk) { static const int log_buf_size = 16 * 1024; @@ -358,7 +411,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) char log_buf[log_buf_size]; int err, prog_fd; - /* This is the C-program: + /* This is the fallback C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) * { * int ret, index = ctx->rx_queue_index; @@ -414,9 +467,31 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* The jumps are to this instruction */ BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, + /* This is the post-5.3 kernel C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS); + * } + */ + struct bpf_insn prog_redirect_flags[] = { + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + size_t insns_cnt[] = {sizeof(prog) / sizeof(struct bpf_insn), + sizeof(prog_redirect_flags) / sizeof(struct bpf_insn), + }; + struct bpf_insn *progs[] = {prog, prog_redirect_flags}; + enum xsk_prog option = get_xsk_prog(); + + prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option], "LGPL-2.1 or BSD-2-Clause", 0, log_buf, log_buf_size); if (prog_fd < 0) { -- cgit v1.2.3-71-gd317 From 7140ef14007e472ea97853ae7046c483f9272397 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:14 +0100 Subject: selftests/bpf: Remove a lot of ifobject casting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of passing void * all over the place, let us pass the actual type (ifobject) and remove the void-ptr-to-type-ptr casting. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-2-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 88 ++++++++++++++++---------------- 1 file changed, 43 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 1e722ee76b1f..cd1dd2b7458f 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -224,14 +224,14 @@ static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt return csum_tcpudp_magic(saddr, daddr, len, proto, csum); } -static void gen_eth_hdr(void *data, struct ethhdr *eth_hdr) +static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) { - memcpy(eth_hdr->h_dest, ((struct ifobject *)data)->dst_mac, ETH_ALEN); - memcpy(eth_hdr->h_source, ((struct ifobject *)data)->src_mac, ETH_ALEN); + memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); eth_hdr->h_proto = htons(ETH_P_IP); } -static void gen_ip_hdr(void *data, struct iphdr *ip_hdr) +static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) { ip_hdr->version = IP_PKT_VER; ip_hdr->ihl = 0x5; @@ -241,15 +241,15 @@ static void gen_ip_hdr(void *data, struct iphdr *ip_hdr) ip_hdr->frag_off = 0; ip_hdr->ttl = IPDEFTTL; ip_hdr->protocol = IPPROTO_UDP; - ip_hdr->saddr = ((struct ifobject *)data)->src_ip; - ip_hdr->daddr = ((struct ifobject *)data)->dst_ip; + ip_hdr->saddr = ifobject->src_ip; + ip_hdr->daddr = ifobject->dst_ip; ip_hdr->check = 0; } -static void gen_udp_hdr(void *data, void *arg, struct udphdr *udp_hdr) +static void gen_udp_hdr(void *data, struct ifobject *ifobject, struct udphdr *udp_hdr) { - udp_hdr->source = htons(((struct ifobject *)arg)->src_port); - udp_hdr->dest = htons(((struct ifobject *)arg)->dst_port); + udp_hdr->source = htons(ifobject->src_port); + udp_hdr->dest = htons(ifobject->dst_port); udp_hdr->len = htons(UDP_PKT_SIZE); memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(((struct generic_data *)data)->seqnum), UDP_PKT_DATA_SIZE); @@ -628,28 +628,27 @@ static inline int get_batch_size(int pkt_cnt) return opt_pkt_count - pkt_cnt; } -static void complete_tx_only_all(void *arg) +static void complete_tx_only_all(struct ifobject *ifobject) { bool pending; do { pending = false; - if (((struct ifobject *)arg)->xsk->outstanding_tx) { - complete_tx_only(((struct ifobject *) - arg)->xsk, BATCH_SIZE); - pending = !!((struct ifobject *)arg)->xsk->outstanding_tx; + if (ifobject->xsk->outstanding_tx) { + complete_tx_only(ifobject->xsk, BATCH_SIZE); + pending = !!ifobject->xsk->outstanding_tx; } } while (pending); } -static void tx_only_all(void *arg) +static void tx_only_all(struct ifobject *ifobject) { struct pollfd fds[MAX_SOCKS] = { }; u32 frame_nb = 0; int pkt_cnt = 0; int ret; - fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk); + fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); fds[0].events = POLLOUT; while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { @@ -664,12 +663,12 @@ static void tx_only_all(void *arg) continue; } - tx_only(((struct ifobject *)arg)->xsk, &frame_nb, batch_size); + tx_only(ifobject->xsk, &frame_nb, batch_size); pkt_cnt += batch_size; } if (opt_pkt_count) - complete_tx_only_all(arg); + complete_tx_only_all(ifobject); } static void worker_pkt_dump(void) @@ -780,14 +779,14 @@ static void worker_pkt_validate(void) } } -static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr, +static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr, atomic_int *spinningptr) { int ctr = 0; int ret; - xsk_configure_umem((struct ifobject *)arg, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); - ret = xsk_configure_socket((struct ifobject *)arg); + xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); + ret = xsk_configure_socket(ifobject); /* Retry Create Socket if it fails as xsk_socket__create() * is asynchronous @@ -798,9 +797,8 @@ static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr, pthread_mutex_lock(mutexptr); while (ret && ctr < SOCK_RECONF_CTR) { atomic_store(spinningptr, 1); - xsk_configure_umem((struct ifobject *)arg, - bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); - ret = xsk_configure_socket((struct ifobject *)arg); + xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); + ret = xsk_configure_socket(ifobject); usleep(USLEEP_MAX); ctr++; } @@ -818,6 +816,7 @@ static void *worker_testapp_validate(void *arg) struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data)); struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr)); struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; + struct ifobject *ifobject = (struct ifobject *)arg; void *bufs = NULL; pthread_attr_setstacksize(&attr, THREAD_STACK); @@ -828,49 +827,48 @@ static void *worker_testapp_validate(void *arg) if (bufs == MAP_FAILED) exit_with_error(errno); - if (strcmp(((struct ifobject *)arg)->nsname, "")) - switch_namespace(((struct ifobject *)arg)->ifdict_index); + if (strcmp(ifobject->nsname, "")) + switch_namespace(ifobject->ifdict_index); } - if (((struct ifobject *)arg)->fv.vector == tx) { + if (ifobject->fv.vector == tx) { int spinningrxctr = 0; if (!bidi_pass) - thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx); + thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx); while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) { spinningrxctr++; usleep(USLEEP_MAX); } - ksft_print_msg("Interface [%s] vector [Tx]\n", ((struct ifobject *)arg)->ifname); + ksft_print_msg("Interface [%s] vector [Tx]\n", ifobject->ifname); for (int i = 0; i < num_frames; i++) { /*send EOT frame */ if (i == (num_frames - 1)) data->seqnum = -1; else data->seqnum = i; - gen_udp_hdr((void *)data, (void *)arg, udp_hdr); - gen_ip_hdr((void *)arg, ip_hdr); + gen_udp_hdr((void *)data, ifobject, udp_hdr); + gen_ip_hdr(ifobject, ip_hdr); gen_udp_csum(udp_hdr, ip_hdr); - gen_eth_hdr((void *)arg, eth_hdr); - gen_eth_frame(((struct ifobject *)arg)->umem, - i * XSK_UMEM__DEFAULT_FRAME_SIZE); + gen_eth_hdr(ifobject, eth_hdr); + gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE); } free(data); ksft_print_msg("Sending %d packets on interface %s\n", - (opt_pkt_count - 1), ((struct ifobject *)arg)->ifname); - tx_only_all(arg); - } else if (((struct ifobject *)arg)->fv.vector == rx) { + (opt_pkt_count - 1), ifobject->ifname); + tx_only_all(ifobject); + } else if (ifobject->fv.vector == rx) { struct pollfd fds[MAX_SOCKS] = { }; int ret; if (!bidi_pass) - thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx); + thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx); - ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname); - xsk_populate_fill_ring(((struct ifobject *)arg)->umem); + ksft_print_msg("Interface [%s] vector [Rx]\n", ifobject->ifname); + xsk_populate_fill_ring(ifobject->umem); TAILQ_INIT(&head); if (debug_pkt_dump) { @@ -879,7 +877,7 @@ static void *worker_testapp_validate(void *arg) exit_with_error(errno); } - fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk); + fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); fds[0].events = POLLIN; pthread_mutex_lock(&sync_mutex); @@ -892,7 +890,7 @@ static void *worker_testapp_validate(void *arg) if (ret <= 0) continue; } - rx_pkt(((struct ifobject *)arg)->xsk, fds); + rx_pkt(ifobject->xsk, fds); worker_pkt_validate(); if (sigvar) @@ -900,15 +898,15 @@ static void *worker_testapp_validate(void *arg) } ksft_print_msg("Received %d packets on interface %s\n", - pkt_counter, ((struct ifobject *)arg)->ifname); + pkt_counter, ifobject->ifname); if (opt_teardown) ksft_print_msg("Destroying socket\n"); } if (!opt_bidi || (opt_bidi && bidi_pass)) { - xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk); - (void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem); + xsk_socket__delete(ifobject->xsk->xsk); + (void)xsk_umem__delete(ifobject->umem->umem); } pthread_exit(NULL); } -- cgit v1.2.3-71-gd317 From 449f0874fd4ee36c1eb0664432796ddb912936fa Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:15 +0100 Subject: selftests/bpf: Remove unused enums MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The enums undef and bidi are not used. Remove them. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-3-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 61f595b6f200..0e9f9b7e61c2 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -92,8 +92,6 @@ struct flow_vector { enum fvector { tx, rx, - bidi, - undef, } vector; }; -- cgit v1.2.3-71-gd317 From a86072838b67a3cdbb2ee2abc6c0ab3fb0d60be5 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:16 +0100 Subject: selftests/bpf: Fix style warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Silence three checkpatch style warnings. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-4-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index cd1dd2b7458f..77d1bda37afa 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -734,10 +734,10 @@ static void worker_pkt_validate(void) break; /*do not increment pktcounter if !(tos=0x9 and ipv4) */ if ((((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->version == IP_PKT_VER) - && (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos == + sizeof(struct ethhdr)))->version == IP_PKT_VER) && + (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos == IP_PKT_TOS)) { - payloadseqnum = *((uint32_t *) (pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE)); + payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE)); if (debug_pkt_dump && payloadseqnum != EOT) { pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame)); pkt_obj->payload = (char *)malloc(PKT_SIZE); @@ -767,10 +767,10 @@ static void worker_pkt_validate(void) } else { ksft_print_msg("Invalid frame received: "); ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", - ((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->version, - ((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->tos); + ((struct iphdr *)(pkt_node_rx_q->pkt_frame + + sizeof(struct ethhdr)))->version, + ((struct iphdr *)(pkt_node_rx_q->pkt_frame + + sizeof(struct ethhdr)))->tos); TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); free(pkt_node_rx_q->pkt_frame); free(pkt_node_rx_q); -- cgit v1.2.3-71-gd317 From 4896d7e37ea5217d42e210bfcf4d56964044704f Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:17 +0100 Subject: selftests/bpf: Remove memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The allocated entry is immediately overwritten by an assignment. Fix that. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-5-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 77d1bda37afa..9f40d310805a 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -728,7 +728,6 @@ static void worker_pkt_validate(void) u32 payloadseqnum = -2; while (1) { - pkt_node_rx_q = malloc(sizeof(struct pkt)); pkt_node_rx_q = TAILQ_LAST(&head, head_s); if (!pkt_node_rx_q) break; -- cgit v1.2.3-71-gd317 From 8a9cba7ea858da134d18aa9ea09e1e6606d8ade6 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:18 +0100 Subject: selftests/bpf: Improve readability of xdpxceiver/worker_pkt_validate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a local variable to get rid of lot of casting. Move common code outside the if/else-clause. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-6-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 9f40d310805a..ab2ed7b85f9e 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -726,16 +726,17 @@ static void worker_pkt_dump(void) static void worker_pkt_validate(void) { u32 payloadseqnum = -2; + struct iphdr *iphdr; while (1) { pkt_node_rx_q = TAILQ_LAST(&head, head_s); if (!pkt_node_rx_q) break; + + iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)); + /*do not increment pktcounter if !(tos=0x9 and ipv4) */ - if ((((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->version == IP_PKT_VER) && - (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos == - IP_PKT_TOS)) { + if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE)); if (debug_pkt_dump && payloadseqnum != EOT) { pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame)); @@ -757,24 +758,18 @@ static void worker_pkt_validate(void) ksft_exit_xfail(); } - TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); - free(pkt_node_rx_q->pkt_frame); - free(pkt_node_rx_q); - pkt_node_rx_q = NULL; prev_pkt = payloadseqnum; pkt_counter++; } else { ksft_print_msg("Invalid frame received: "); - ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", - ((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->version, - ((struct iphdr *)(pkt_node_rx_q->pkt_frame + - sizeof(struct ethhdr)))->tos); - TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); - free(pkt_node_rx_q->pkt_frame); - free(pkt_node_rx_q); - pkt_node_rx_q = NULL; + ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, + iphdr->tos); } + + TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes); + free(pkt_node_rx_q->pkt_frame); + free(pkt_node_rx_q); + pkt_node_rx_q = NULL; } } -- cgit v1.2.3-71-gd317 From 0b50bd48cfe744def605cafe991ca3db60d326d8 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:19 +0100 Subject: selftests/bpf: Remove casting by introduce local variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let us use a local variable in nsswitchthread(), so we can remove a lot of casting for better readability. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-7-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index ab2ed7b85f9e..bea006ad8e17 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -382,21 +382,19 @@ static bool switch_namespace(int idx) static void *nsswitchthread(void *args) { - if (switch_namespace(((struct targs *)args)->idx)) { - ifdict[((struct targs *)args)->idx]->ifindex = - if_nametoindex(ifdict[((struct targs *)args)->idx]->ifname); - if (!ifdict[((struct targs *)args)->idx]->ifindex) { - ksft_test_result_fail - ("ERROR: [%s] interface \"%s\" does not exist\n", - __func__, ifdict[((struct targs *)args)->idx]->ifname); - ((struct targs *)args)->retptr = false; + struct targs *targs = args; + + targs->retptr = false; + + if (switch_namespace(targs->idx)) { + ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname); + if (!ifdict[targs->idx]->ifindex) { + ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n", + __func__, ifdict[targs->idx]->ifname); } else { - ksft_print_msg("Interface found: %s\n", - ifdict[((struct targs *)args)->idx]->ifname); - ((struct targs *)args)->retptr = true; + ksft_print_msg("Interface found: %s\n", ifdict[targs->idx]->ifname); + targs->retptr = true; } - } else { - ((struct targs *)args)->retptr = false; } pthread_exit(NULL); } -- cgit v1.2.3-71-gd317 From 124000e48b7eec032435b2a33e2038a9c7514b71 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:20 +0100 Subject: selftests/bpf: Change type from void * to struct ifaceconfigobj * MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of casting from void *, let us use the actual type in init_iface_config(). Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-8-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index bea006ad8e17..c2cfc0b6d19e 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -981,25 +981,25 @@ static void testapp_sockets(void) print_ksft_result(); } -static void init_iface_config(void *ifaceconfig) +static void init_iface_config(struct ifaceconfigobj *ifaceconfig) { /*Init interface0 */ ifdict[0]->fv.vector = tx; - memcpy(ifdict[0]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN); - memcpy(ifdict[0]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN); - ifdict[0]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr; - ifdict[0]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr; - ifdict[0]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port; - ifdict[0]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port; + memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN); + memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN); + ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr; + ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr; + ifdict[0]->dst_port = ifaceconfig->dst_port; + ifdict[0]->src_port = ifaceconfig->src_port; /*Init interface1 */ ifdict[1]->fv.vector = rx; - memcpy(ifdict[1]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN); - memcpy(ifdict[1]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN); - ifdict[1]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr; - ifdict[1]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr; - ifdict[1]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port; - ifdict[1]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port; + memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN); + memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN); + ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr; + ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr; + ifdict[1]->dst_port = ifaceconfig->src_port; + ifdict[1]->src_port = ifaceconfig->dst_port; } int main(int argc, char **argv) @@ -1038,7 +1038,7 @@ int main(int argc, char **argv) num_frames = ++opt_pkt_count; - init_iface_config((void *)ifaceconfig); + init_iface_config(ifaceconfig); pthread_init_mutex(); -- cgit v1.2.3-71-gd317 From 59a4a87e4b265f476558617d5671c33ff7176012 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:21 +0100 Subject: selftests/bpf: Change type from void * to struct generic_data * MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of casting from void *, let us use the actual type in gen_udp_hdr(). Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-9-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index c2cfc0b6d19e..993ce9b7aa76 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -246,13 +246,13 @@ static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) ip_hdr->check = 0; } -static void gen_udp_hdr(void *data, struct ifobject *ifobject, struct udphdr *udp_hdr) +static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject, + struct udphdr *udp_hdr) { udp_hdr->source = htons(ifobject->src_port); udp_hdr->dest = htons(ifobject->dst_port); udp_hdr->len = htons(UDP_PKT_SIZE); - memset32_htonl(pkt_data + PKT_HDR_SIZE, - htonl(((struct generic_data *)data)->seqnum), UDP_PKT_DATA_SIZE); + memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE); } static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) @@ -841,7 +841,7 @@ static void *worker_testapp_validate(void *arg) data->seqnum = -1; else data->seqnum = i; - gen_udp_hdr((void *)data, ifobject, udp_hdr); + gen_udp_hdr(data, ifobject, udp_hdr); gen_ip_hdr(ifobject, ip_hdr); gen_udp_csum(udp_hdr, ip_hdr); gen_eth_hdr(ifobject, eth_hdr); -- cgit v1.2.3-71-gd317 From 829725ec7bf538d36f44117eaeb36bdf57be8e54 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:22 +0100 Subject: selftests/bpf: Define local variables at the beginning of a block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use C89 rules for variable definition. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-10-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 993ce9b7aa76..34bdcae9b908 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -567,9 +567,11 @@ static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds) } for (i = 0; i < rcvd; i++) { - u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; - (void)xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; - u64 orig = xsk_umem__extract_addr(addr); + u64 addr, orig; + + addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; + xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); + orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE); @@ -905,6 +907,8 @@ static void *worker_testapp_validate(void *arg) static void testapp_validate(void) { + struct timespec max_wait = { 0, 0 }; + pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, THREAD_STACK); @@ -929,8 +933,6 @@ static void testapp_validate(void) exit_with_error(errno); } - struct timespec max_wait = { 0, 0 }; - if (clock_gettime(CLOCK_REALTIME, &max_wait)) exit_with_error(errno); max_wait.tv_sec += TMOUT_SEC; -- cgit v1.2.3-71-gd317 From 93dd4a06c0e300a2a6538a39f8a30e7b83ff2c66 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:23 +0100 Subject: selftests/bpf: Avoid heap allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data variable is only used locally. Instead of using the heap, stick to using the stack. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-11-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 34bdcae9b908..2da59b142c03 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -807,10 +807,10 @@ static void *worker_testapp_validate(void *arg) { struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr)); - struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data)); struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr)); struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; struct ifobject *ifobject = (struct ifobject *)arg; + struct generic_data data; void *bufs = NULL; pthread_attr_setstacksize(&attr, THREAD_STACK); @@ -840,17 +840,16 @@ static void *worker_testapp_validate(void *arg) for (int i = 0; i < num_frames; i++) { /*send EOT frame */ if (i == (num_frames - 1)) - data->seqnum = -1; + data.seqnum = -1; else - data->seqnum = i; - gen_udp_hdr(data, ifobject, udp_hdr); + data.seqnum = i; + gen_udp_hdr(&data, ifobject, udp_hdr); gen_ip_hdr(ifobject, ip_hdr); gen_udp_csum(udp_hdr, ip_hdr); gen_eth_hdr(ifobject, eth_hdr); gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE); } - free(data); ksft_print_msg("Sending %d packets on interface %s\n", (opt_pkt_count - 1), ifobject->ifname); tx_only_all(ifobject); -- cgit v1.2.3-71-gd317 From d08a17d6de203cca245db11715c95af0b87ec5a3 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:24 +0100 Subject: selftests/bpf: Consistent malloc/calloc usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use calloc instead of malloc where it makes sense, and avoid C++-style void *-cast. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-12-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 2da59b142c03..a64e2a929e70 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -411,7 +411,7 @@ static int validate_interfaces(void) if (strcmp(ifdict[i]->nsname, "")) { struct targs *targs; - targs = (struct targs *)malloc(sizeof(struct targs)); + targs = malloc(sizeof(*targs)); if (!targs) exit_with_error(errno); @@ -578,7 +578,7 @@ static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds) if (!pkt_node_rx) exit_with_error(errno); - pkt_node_rx->pkt_frame = (char *)malloc(PKT_SIZE); + pkt_node_rx->pkt_frame = malloc(PKT_SIZE); if (!pkt_node_rx->pkt_frame) exit_with_error(errno); @@ -739,8 +739,8 @@ static void worker_pkt_validate(void) if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE)); if (debug_pkt_dump && payloadseqnum != EOT) { - pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame)); - pkt_obj->payload = (char *)malloc(PKT_SIZE); + pkt_obj = malloc(sizeof(*pkt_obj)); + pkt_obj->payload = malloc(PKT_SIZE); memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE); pkt_buf[payloadseqnum] = pkt_obj; } @@ -865,7 +865,7 @@ static void *worker_testapp_validate(void *arg) TAILQ_INIT(&head); if (debug_pkt_dump) { - pkt_buf = malloc(sizeof(struct pkt_frame **) * num_frames); + pkt_buf = calloc(num_frames, sizeof(*pkt_buf)); if (!pkt_buf) exit_with_error(errno); } @@ -1017,7 +1017,7 @@ int main(int argc, char **argv) u16 UDP_DST_PORT = 2020; u16 UDP_SRC_PORT = 2121; - ifaceconfig = (struct ifaceconfigobj *)malloc(sizeof(struct ifaceconfigobj)); + ifaceconfig = malloc(sizeof(struct ifaceconfigobj)); memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN); memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN); inet_aton(IP1, &ifaceconfig->dst_ip); @@ -1026,7 +1026,7 @@ int main(int argc, char **argv) ifaceconfig->src_port = UDP_SRC_PORT; for (int i = 0; i < MAX_INTERFACES; i++) { - ifdict[i] = (struct ifobject *)malloc(sizeof(struct ifobject)); + ifdict[i] = malloc(sizeof(struct ifobject)); if (!ifdict[i]) exit_with_error(errno); -- cgit v1.2.3-71-gd317 From 095af986525a509c9378edf777aa9e0773645f13 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 22 Jan 2021 16:47:25 +0100 Subject: selftests/bpf: Avoid useless void *-casts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to cast to void * when the argument is void *. Avoid cluttering of code. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210122154725.22140-13-bjorn.topel@gmail.com --- tools/testing/selftests/bpf/xdpxceiver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index a64e2a929e70..99ea6cf069e6 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -416,7 +416,7 @@ static int validate_interfaces(void) exit_with_error(errno); targs->idx = i; - if (pthread_create(&ns_thread, NULL, nsswitchthread, (void *)targs)) + if (pthread_create(&ns_thread, NULL, nsswitchthread, targs)) exit_with_error(errno); pthread_join(ns_thread, NULL); @@ -923,12 +923,12 @@ static void testapp_validate(void) /*Spawn RX thread */ if (!opt_bidi || (opt_bidi && !bidi_pass)) { - if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1])) + if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1])) exit_with_error(errno); } else if (opt_bidi && bidi_pass) { /*switch Tx/Rx vectors */ ifdict[0]->fv.vector = rx; - if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[0])) + if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0])) exit_with_error(errno); } @@ -943,12 +943,12 @@ static void testapp_validate(void) /*Spawn TX thread */ if (!opt_bidi || (opt_bidi && !bidi_pass)) { - if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0])) + if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0])) exit_with_error(errno); } else if (opt_bidi && bidi_pass) { /*switch Tx/Rx vectors */ ifdict[1]->fv.vector = tx; - if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[1])) + if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1])) exit_with_error(errno); } -- cgit v1.2.3-71-gd317 From 726bf76fcd093bb16fc5f9215bf1c606ab699c6b Mon Sep 17 00:00:00 2001 From: Florian Lehner Date: Sat, 23 Jan 2021 19:52:21 +0100 Subject: tools, headers: Sync struct bpf_perf_event_data Update struct bpf_perf_event_data with the addr field to match the tools headers with the kernel headers. Signed-off-by: Florian Lehner Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210123185221.23946-1-dev@der-flo.net --- tools/include/uapi/linux/bpf_perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h index 8f95303f9d80..eb1b9d21250c 100644 --- a/tools/include/uapi/linux/bpf_perf_event.h +++ b/tools/include/uapi/linux/bpf_perf_event.h @@ -13,6 +13,7 @@ struct bpf_perf_event_data { bpf_user_pt_regs_t regs; __u64 sample_period; + __u64 addr; }; #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ -- cgit v1.2.3-71-gd317 From 190d1c921ad0862da14807e1670f54020f48e889 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 25 Jan 2021 13:05:46 +0800 Subject: samples/bpf: Set flag __SANE_USERSPACE_TYPES__ for MIPS to fix build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There exists many build warnings when make M=samples/bpf on the Loongson platform, this issue is MIPS related, x86 compiles just fine. Here are some warnings: CC samples/bpf/ibumad_user.o samples/bpf/ibumad_user.c: In function ‘dump_counts’: samples/bpf/ibumad_user.c:46:24: warning: format ‘%llu’ expects argument of type ‘long long unsigned int’, but argument 3 has type ‘__u64’ {aka ‘long unsigned int’} [-Wformat=] printf("0x%02x : %llu\n", key, value); ~~~^ ~~~~~ %lu CC samples/bpf/offwaketime_user.o samples/bpf/offwaketime_user.c: In function ‘print_ksym’: samples/bpf/offwaketime_user.c:34:17: warning: format ‘%llx’ expects argument of type ‘long long unsigned int’, but argument 3 has type ‘__u64’ {aka ‘long unsigned int’} [-Wformat=] printf("%s/%llx;", sym->name, addr); ~~~^ ~~~~ %lx samples/bpf/offwaketime_user.c: In function ‘print_stack’: samples/bpf/offwaketime_user.c:68:17: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 3 has type ‘__u64’ {aka ‘long unsigned int’} [-Wformat=] printf(";%s %lld\n", key->waker, count); ~~~^ ~~~~~ %ld MIPS needs __SANE_USERSPACE_TYPES__ before to select 'int-ll64.h' in arch/mips/include/uapi/asm/types.h, then it can avoid build warnings when printing __u64 with %llu, %llx or %lld. The header tools/include/linux/types.h defines __SANE_USERSPACE_TYPES__, it seems that we can include in the source files which have build warnings, but it has no effect due to actually it includes usr/include/linux/types.h instead of tools/include/linux/types.h, the problem is that "usr/include" is preferred first than "tools/include" in samples/bpf/Makefile, that sounds like a ugly hack to -Itools/include before -Iusr/include. So define __SANE_USERSPACE_TYPES__ for MIPS in samples/bpf/Makefile is proper, if add "TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__" in samples/bpf/Makefile, it appears the following error: Auto-detecting system features: ... libelf: [ on ] ... zlib: [ on ] ... bpf: [ OFF ] BPF API too old make[3]: *** [Makefile:293: bpfdep] Error 1 make[2]: *** [Makefile:156: all] Error 2 With #ifndef __SANE_USERSPACE_TYPES__ in tools/include/linux/types.h, the above error has gone and this ifndef change does not hurt other compilations. Signed-off-by: Tiezhu Yang Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/1611551146-14052-1-git-send-email-yangtiezhu@loongson.cn --- samples/bpf/Makefile | 4 ++++ tools/include/linux/types.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'tools') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index d06144613ca2..362f314566b3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -183,6 +183,10 @@ BPF_EXTRA_CFLAGS := $(ARM_ARCH_SELECTOR) TPROGS_CFLAGS += $(ARM_ARCH_SELECTOR) endif +ifeq ($(ARCH), mips) +TPROGS_CFLAGS += -D__SANE_USERSPACE_TYPES__ +endif + TPROGS_CFLAGS += -Wall -O2 TPROGS_CFLAGS += -Wmissing-prototypes TPROGS_CFLAGS += -Wstrict-prototypes diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index 154eb4e3ca7c..e9c5a215837d 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -6,7 +6,10 @@ #include #include +#ifndef __SANE_USERSPACE_TYPES__ #define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ +#endif + #include #include -- cgit v1.2.3-71-gd317 From 86ce322d21eb032ed8fdd294d0fb095d2debb430 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Jan 2021 22:50:18 -0800 Subject: selftests/bpf: Don't exit on failed bpf_testmod unload Fix bug in handling bpf_testmod unloading that will cause test_progs exiting prematurely if bpf_testmod unloading failed. This is especially problematic when running a subset of test_progs that doesn't require root permissions and doesn't rely on bpf_testmod, yet will fail immediately due to exit(1) in unload_bpf_testmod(). Fixes: 9f7fa225894c ("selftests/bpf: Add bpf_testmod kernel module for testing") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210126065019.1268027-1-andrii@kernel.org --- tools/testing/selftests/bpf/test_progs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 213628ee721c..6396932b97e2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -390,7 +390,7 @@ static void unload_bpf_testmod(void) return; } fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); - exit(1); + return; } if (env.verbosity > VERBOSE_NONE) fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); -- cgit v1.2.3-71-gd317 From 8259fdeb3032621c7cc7aa3f2676ffd470303305 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 27 Jan 2021 11:31:40 -0800 Subject: selftests/bpf: Verify that rebinding to port < 1024 from BPF works Return 3 to indicate that permission check for port 111 should be skipped. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210127193140.3170382-2-sdf@google.com --- tools/testing/selftests/bpf/prog_tests/bind_perm.c | 109 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/bind_perm.c | 45 +++++++++ 2 files changed, 154 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c new file mode 100644 index 000000000000..d0f06e40c16d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "bind_perm.skel.h" + +#include +#include +#include + +static int duration; + +void try_bind(int family, int port, int expected_errno) +{ + struct sockaddr_storage addr = {}; + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + int fd = -1; + + fd = socket(family, SOCK_STREAM, 0); + if (CHECK(fd < 0, "fd", "errno %d", errno)) + goto close_socket; + + if (family == AF_INET) { + sin = (struct sockaddr_in *)&addr; + sin->sin_family = family; + sin->sin_port = htons(port); + } else { + sin6 = (struct sockaddr_in6 *)&addr; + sin6->sin6_family = family; + sin6->sin6_port = htons(port); + } + + errno = 0; + bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(errno, expected_errno, "bind"); + +close_socket: + if (fd >= 0) + close(fd); +} + +bool cap_net_bind_service(cap_flag_value_t flag) +{ + const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE; + cap_flag_value_t original_value; + bool was_effective = false; + cap_t caps; + + caps = cap_get_proc(); + if (CHECK(!caps, "cap_get_proc", "errno %d", errno)) + goto free_caps; + + if (CHECK(cap_get_flag(caps, CAP_NET_BIND_SERVICE, CAP_EFFECTIVE, + &original_value), + "cap_get_flag", "errno %d", errno)) + goto free_caps; + + was_effective = (original_value == CAP_SET); + + if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service, + flag), + "cap_set_flag", "errno %d", errno)) + goto free_caps; + + if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno)) + goto free_caps; + +free_caps: + CHECK(cap_free(caps), "cap_free", "errno %d", errno); + return was_effective; +} + +void test_bind_perm(void) +{ + bool cap_was_effective; + struct bind_perm *skel; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/bind_perm"); + if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) + return; + + skel = bind_perm__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + goto close_cgroup_fd; + + skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel, "bind_v4_prog")) + goto close_skeleton; + + skel->links.bind_v6_prog = bpf_program__attach_cgroup(skel->progs.bind_v6_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel, "bind_v6_prog")) + goto close_skeleton; + + cap_was_effective = cap_net_bind_service(CAP_CLEAR); + + try_bind(AF_INET, 110, EACCES); + try_bind(AF_INET6, 110, EACCES); + + try_bind(AF_INET, 111, 0); + try_bind(AF_INET6, 111, 0); + + if (cap_was_effective) + cap_net_bind_service(CAP_SET); + +close_skeleton: + bind_perm__destroy(skel); +close_cgroup_fd: + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c b/tools/testing/selftests/bpf/progs/bind_perm.c new file mode 100644 index 000000000000..7bd2a027025d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bind_perm.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +static __always_inline int bind_prog(struct bpf_sock_addr *ctx, int family) +{ + struct bpf_sock *sk; + + sk = ctx->sk; + if (!sk) + return 0; + + if (sk->family != family) + return 0; + + if (ctx->type != SOCK_STREAM) + return 0; + + /* Return 1 OR'ed with the first bit set to indicate + * that CAP_NET_BIND_SERVICE should be bypassed. + */ + if (ctx->user_port == bpf_htons(111)) + return (1 | 2); + + return 1; +} + +SEC("cgroup/bind4") +int bind_v4_prog(struct bpf_sock_addr *ctx) +{ + return bind_prog(ctx, AF_INET); +} + +SEC("cgroup/bind6") +int bind_v6_prog(struct bpf_sock_addr *ctx) +{ + return bind_prog(ctx, AF_INET6); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-71-gd317 From 211a741cd3e124bffdc13ee82e7e65f204e53f60 Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Thu, 28 Jan 2021 02:50:58 +0100 Subject: tools: Factor Clang, LLC and LLVM utils definitions When dealing with BPF/BTF/pahole and DWARF v5 I wanted to build bpftool. While looking into the source code I found duplicate assignments in misc tools for the LLVM eco system, e.g. clang and llvm-objcopy. Move the Clang, LLC and/or LLVM utils definitions to tools/scripts/Makefile.include file and add missing includes where needed. Honestly, I was inspired by the commit c8a950d0d3b9 ("tools: Factor HOSTCC, HOSTLD, HOSTAR definitions"). I tested with bpftool and perf on Debian/testing AMD64 and LLVM/Clang v11.1.0-rc1. Build instructions: [ make and make-options ] MAKE="make V=1" MAKE_OPTS="HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang LD=ld.lld LLVM=1 LLVM_IAS=1" MAKE_OPTS="$MAKE_OPTS PAHOLE=/opt/pahole/bin/pahole" [ clean-up ] $MAKE $MAKE_OPTS -C tools/ clean [ bpftool ] $MAKE $MAKE_OPTS -C tools/bpf/bpftool/ [ perf ] PYTHON=python3 $MAKE $MAKE_OPTS -C tools/perf/ I was careful with respecting the user's wish to override custom compiler, linker, GNU/binutils and/or LLVM utils settings. Signed-off-by: Sedat Dilek Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Jiri Olsa # tools/build and tools/perf Link: https://lore.kernel.org/bpf/20210128015117.20515-1-sedat.dilek@gmail.com --- tools/bpf/bpftool/Makefile | 2 -- tools/bpf/runqslower/Makefile | 3 --- tools/build/feature/Makefile | 4 ++-- tools/perf/Makefile.perf | 1 - tools/scripts/Makefile.include | 7 +++++++ tools/testing/selftests/bpf/Makefile | 2 -- tools/testing/selftests/tc-testing/Makefile | 3 +-- 7 files changed, 10 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 45ac2f9e0aa9..8ced1655fea6 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -75,8 +75,6 @@ endif INSTALL ?= install RM ?= rm -f -CLANG ?= clang -LLVM_STRIP ?= llvm-strip FEATURE_USER = .bpftool FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \ diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 4d5ca54fcd4c..9d9fb6209be1 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -3,9 +3,6 @@ include ../../scripts/Makefile.include OUTPUT ?= $(abspath .output)/ -CLANG ?= clang -LLC ?= llc -LLVM_STRIP ?= llvm-strip BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 89ba522e377d..3e55edb3ea54 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.include + FILES= \ test-all.bin \ test-backtrace.bin \ @@ -76,8 +78,6 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config -LLVM_CONFIG ?= llvm-config -CLANG ?= clang all: $(FILES) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 62f3deb1d3a8..f4df7534026d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -176,7 +176,6 @@ endef LD += $(EXTRA_LDFLAGS) PKG_CONFIG = $(CROSS_COMPILE)pkg-config -LLVM_CONFIG ?= llvm-config RM = rm -f LN = ln -f diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 1358e89cdf7d..4255e71f72b7 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -69,6 +69,13 @@ HOSTCC ?= gcc HOSTLD ?= ld endif +# Some tools require Clang, LLC and/or LLVM utils +CLANG ?= clang +LLC ?= llc +LLVM_CONFIG ?= llvm-config +LLVM_OBJCOPY ?= llvm-objcopy +LLVM_STRIP ?= llvm-strip + ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 63d6288e419c..f0674d406f40 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -19,8 +19,6 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CLANG ?= clang -LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \ diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile index 91fee5c43274..4d639279f41e 100644 --- a/tools/testing/selftests/tc-testing/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../../scripts/Makefile.include top_srcdir = $(abspath ../../../..) APIDIR := $(top_scrdir)/include/uapi @@ -7,8 +8,6 @@ TEST_GEN_FILES = action.o KSFT_KHDR_INSTALL := 1 include ../lib.mk -CLANG ?= clang -LLC ?= llc PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) ifeq ($(PROBE),) -- cgit v1.2.3-71-gd317 From 62476cc1bf24b34d9442c7ba76e5eb6bf242f911 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 27 Jan 2021 15:28:50 -0800 Subject: bpf: Enable bpf_{g,s}etsockopt in BPF_CGROUP_UDP{4,6}_SENDMSG Can be used to query/modify socket state for unconnected UDP sendmsg. Those hooks run as BPF_CGROUP_RUN_SA_PROG_LOCK and operate on a locked socket. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210127232853.3753823-2-sdf@google.com --- net/core/filter.c | 4 ++++ tools/testing/selftests/bpf/bpf_sockopt_helpers.h | 21 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/sendmsg4_prog.c | 7 +++++++ tools/testing/selftests/bpf/progs/sendmsg6_prog.c | 5 +++++ 4 files changed, 37 insertions(+) create mode 100644 tools/testing/selftests/bpf/bpf_sockopt_helpers.h (limited to 'tools') diff --git a/net/core/filter.c b/net/core/filter.c index 9ab94e90d660..3d7f78a19565 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7023,6 +7023,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: return &bpf_sock_addr_setsockopt_proto; default: return NULL; @@ -7033,6 +7035,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: return &bpf_sock_addr_getsockopt_proto; default: return NULL; diff --git a/tools/testing/selftests/bpf/bpf_sockopt_helpers.h b/tools/testing/selftests/bpf/bpf_sockopt_helpers.h new file mode 100644 index 000000000000..11f3a0976174 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_sockopt_helpers.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include + +int get_set_sk_priority(void *ctx) +{ + int prio; + + /* Verify that context allows calling bpf_getsockopt and + * bpf_setsockopt by reading and writing back socket + * priority. + */ + + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index 092d9da536f3..ac5abc34cde8 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -8,6 +8,8 @@ #include #include +#include + #define SRC1_IP4 0xAC100001U /* 172.16.0.1 */ #define SRC2_IP4 0x00000000U #define SRC_REWRITE_IP4 0x7f000004U @@ -21,9 +23,14 @@ int _version SEC("version") = 1; SEC("cgroup/sendmsg4") int sendmsg_v4_prog(struct bpf_sock_addr *ctx) { + int prio; + if (ctx->type != SOCK_DGRAM) return 0; + if (!get_set_sk_priority(ctx)) + return 0; + /* Rewrite source. */ if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) || ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) { diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index 255a432bc163..24694b1a8d82 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -8,6 +8,8 @@ #include #include +#include + #define SRC_REWRITE_IP6_0 0 #define SRC_REWRITE_IP6_1 0 #define SRC_REWRITE_IP6_2 0 @@ -28,6 +30,9 @@ int sendmsg_v6_prog(struct bpf_sock_addr *ctx) if (ctx->type != SOCK_DGRAM) return 0; + if (!get_set_sk_priority(ctx)) + return 0; + /* Rewrite source. */ if (ctx->msg_src_ip6[3] == bpf_htonl(1) || ctx->msg_src_ip6[3] == bpf_htonl(0)) { -- cgit v1.2.3-71-gd317 From 073f4ec124bb2c431d9e4136e7f583abfea7f290 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 27 Jan 2021 15:28:51 -0800 Subject: bpf: Enable bpf_{g,s}etsockopt in BPF_CGROUP_INET{4,6}_GET{PEER,SOCK}NAME Those hooks run as BPF_CGROUP_RUN_SA_PROG_LOCK and operate on a locked socket. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210127232853.3753823-3-sdf@google.com --- net/core/filter.c | 8 ++++++++ tools/testing/selftests/bpf/progs/connect_force_port4.c | 8 ++++++++ tools/testing/selftests/bpf/progs/connect_force_port6.c | 8 ++++++++ 3 files changed, 24 insertions(+) (limited to 'tools') diff --git a/net/core/filter.c b/net/core/filter.c index 3d7f78a19565..ba436b1d70c2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7025,6 +7025,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_INET4_GETPEERNAME: + case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_INET4_GETSOCKNAME: + case BPF_CGROUP_INET6_GETSOCKNAME: return &bpf_sock_addr_setsockopt_proto; default: return NULL; @@ -7037,6 +7041,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_INET4_GETPEERNAME: + case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_INET4_GETSOCKNAME: + case BPF_CGROUP_INET6_GETSOCKNAME: return &bpf_sock_addr_getsockopt_proto; default: return NULL; diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c index 7396308677a3..a979aaef2a76 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port4.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c @@ -10,6 +10,8 @@ #include #include +#include + char _license[] SEC("license") = "GPL"; int _version SEC("version") = 1; @@ -58,6 +60,9 @@ int connect4(struct bpf_sock_addr *ctx) SEC("cgroup/getsockname4") int getsockname4(struct bpf_sock_addr *ctx) { + if (!get_set_sk_priority(ctx)) + return 1; + /* Expose local server as 1.2.3.4:60000 to client. */ if (ctx->user_port == bpf_htons(60123)) { ctx->user_ip4 = bpf_htonl(0x01020304); @@ -71,6 +76,9 @@ int getpeername4(struct bpf_sock_addr *ctx) { struct svc_addr *orig; + if (!get_set_sk_priority(ctx)) + return 1; + /* Expose service 1.2.3.4:60000 as peer instead of backend. */ if (ctx->user_port == bpf_htons(60123)) { orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0); diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c index c1a2b555e9ad..afc8f1c5a9d6 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port6.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c @@ -9,6 +9,8 @@ #include #include +#include + char _license[] SEC("license") = "GPL"; int _version SEC("version") = 1; @@ -63,6 +65,9 @@ int connect6(struct bpf_sock_addr *ctx) SEC("cgroup/getsockname6") int getsockname6(struct bpf_sock_addr *ctx) { + if (!get_set_sk_priority(ctx)) + return 1; + /* Expose local server as [fc00::1]:60000 to client. */ if (ctx->user_port == bpf_htons(60124)) { ctx->user_ip6[0] = bpf_htonl(0xfc000000); @@ -79,6 +84,9 @@ int getpeername6(struct bpf_sock_addr *ctx) { struct svc_addr *orig; + if (!get_set_sk_priority(ctx)) + return 1; + /* Expose service [fc00::1]:60000 as peer instead of backend. */ if (ctx->user_port == bpf_htons(60124)) { orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0); -- cgit v1.2.3-71-gd317 From 357490601621d077c2c90473fec66d7a8badedcc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 27 Jan 2021 15:28:52 -0800 Subject: selftests/bpf: Rewrite recvmsg{4,6} asm progs to c in test_sock_addr I'll extend them in the next patch. It's easier to work with C than with asm. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210127232853.3753823-4-sdf@google.com --- tools/testing/selftests/bpf/progs/recvmsg4_prog.c | 37 ++++++++++ tools/testing/selftests/bpf/progs/recvmsg6_prog.c | 43 ++++++++++++ tools/testing/selftests/bpf/test_sock_addr.c | 86 ++++------------------- 3 files changed, 92 insertions(+), 74 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/recvmsg4_prog.c create mode 100644 tools/testing/selftests/bpf/progs/recvmsg6_prog.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c new file mode 100644 index 000000000000..fc2fe8a952fa --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#include +#include + +#define SERV4_IP 0xc0a801feU /* 192.168.1.254 */ +#define SERV4_PORT 4040 + +SEC("cgroup/recvmsg4") +int recvmsg4_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock *sk; + __u32 user_ip4; + __u16 user_port; + + sk = ctx->sk; + if (!sk) + return 1; + + if (sk->family != AF_INET) + return 1; + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 1; + + ctx->user_ip4 = bpf_htonl(SERV4_IP); + ctx->user_port = bpf_htons(SERV4_PORT); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c new file mode 100644 index 000000000000..6060fd63324b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#include +#include + +#define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */ +#define SERV6_IP_1 0x12345678 +#define SERV6_IP_2 0x00000000 +#define SERV6_IP_3 0x0000abcd +#define SERV6_PORT 6060 + +SEC("cgroup/recvmsg6") +int recvmsg6_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock *sk; + __u32 user_ip4; + __u16 user_port; + + sk = ctx->sk; + if (!sk) + return 1; + + if (sk->family != AF_INET6) + return 1; + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 1; + + ctx->user_ip6[0] = bpf_htonl(SERV6_IP_0); + ctx->user_ip6[1] = bpf_htonl(SERV6_IP_1); + ctx->user_ip6[2] = bpf_htonl(SERV6_IP_2); + ctx->user_ip6[3] = bpf_htonl(SERV6_IP_3); + ctx->user_port = bpf_htons(SERV6_PORT); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index dcb83ab02919..aa3f185fcb89 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -31,6 +31,8 @@ #define CONNECT6_PROG_PATH "./connect6_prog.o" #define SENDMSG4_PROG_PATH "./sendmsg4_prog.o" #define SENDMSG6_PROG_PATH "./sendmsg6_prog.o" +#define RECVMSG4_PROG_PATH "./recvmsg4_prog.o" +#define RECVMSG6_PROG_PATH "./recvmsg6_prog.o" #define BIND4_PROG_PATH "./bind4_prog.o" #define BIND6_PROG_PATH "./bind6_prog.o" @@ -94,10 +96,10 @@ static int sendmsg_deny_prog_load(const struct sock_addr_test *test); static int recvmsg_allow_prog_load(const struct sock_addr_test *test); static int recvmsg_deny_prog_load(const struct sock_addr_test *test); static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test); -static int recvmsg4_rw_asm_prog_load(const struct sock_addr_test *test); +static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test); -static int recvmsg6_rw_asm_prog_load(const struct sock_addr_test *test); +static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test); @@ -573,8 +575,8 @@ static struct sock_addr_test tests[] = { LOAD_REJECT, }, { - "recvmsg4: rewrite IP & port (asm)", - recvmsg4_rw_asm_prog_load, + "recvmsg4: rewrite IP & port (C)", + recvmsg4_rw_c_prog_load, BPF_CGROUP_UDP4_RECVMSG, BPF_CGROUP_UDP4_RECVMSG, AF_INET, @@ -587,8 +589,8 @@ static struct sock_addr_test tests[] = { SUCCESS, }, { - "recvmsg6: rewrite IP & port (asm)", - recvmsg6_rw_asm_prog_load, + "recvmsg6: rewrite IP & port (C)", + recvmsg6_rw_c_prog_load, BPF_CGROUP_UDP6_RECVMSG, BPF_CGROUP_UDP6_RECVMSG, AF_INET6, @@ -786,45 +788,9 @@ static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test) return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); } -static int recvmsg4_rw_asm_prog_load(const struct sock_addr_test *test) +static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test) { - struct sockaddr_in src4_rw_addr; - - if (mk_sockaddr(AF_INET, SERV4_IP, SERV4_PORT, - (struct sockaddr *)&src4_rw_addr, - sizeof(src4_rw_addr)) == -1) - return -1; - - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (sk.family == AF_INET && */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 6), - - /* sk.type == SOCK_DGRAM) { */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, type)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 4), - - /* user_ip4 = src4_rw_addr.sin_addr */ - BPF_MOV32_IMM(BPF_REG_7, src4_rw_addr.sin_addr.s_addr), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_ip4)), - - /* user_port = src4_rw_addr.sin_port */ - BPF_MOV32_IMM(BPF_REG_7, src4_rw_addr.sin_port), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_port)), - /* } */ - - /* return 1 */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); + return load_path(test, RECVMSG4_PROG_PATH); } static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test) @@ -890,37 +856,9 @@ static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test) return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP); } -static int recvmsg6_rw_asm_prog_load(const struct sock_addr_test *test) +static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test) { - struct sockaddr_in6 src6_rw_addr; - - if (mk_sockaddr(AF_INET6, SERV6_IP, SERV6_PORT, - (struct sockaddr *)&src6_rw_addr, - sizeof(src6_rw_addr)) == -1) - return -1; - - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (sk.family == AF_INET6) { */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 10), - - STORE_IPV6(user_ip6, src6_rw_addr.sin6_addr.s6_addr32), - - /* user_port = dst6_rw_addr.sin6_port */ - BPF_MOV32_IMM(BPF_REG_7, src6_rw_addr.sin6_port), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_port)), - /* } */ - - /* return 1 */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); + return load_path(test, RECVMSG6_PROG_PATH); } static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test) -- cgit v1.2.3-71-gd317 From 4c3384d7abe58a68d78fd1b8b3bffbf62e1e29a1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 27 Jan 2021 15:28:53 -0800 Subject: bpf: Enable bpf_{g,s}etsockopt in BPF_CGROUP_UDP{4,6}_RECVMSG Those hooks run as BPF_CGROUP_RUN_SA_PROG_LOCK and operate on a locked socket. Note that we could remove the switch for prog->expected_attach_type altogether since all current sock_addr attach types are covered. However, it makes sense to keep it as a safe-guard in case new sock_addr attach types are added that might not operate on a locked socket. Therefore, avoid to let this slip through. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210127232853.3753823-5-sdf@google.com --- net/core/filter.c | 4 ++++ tools/testing/selftests/bpf/progs/recvmsg4_prog.c | 5 +++++ tools/testing/selftests/bpf/progs/recvmsg6_prog.c | 5 +++++ 3 files changed, 14 insertions(+) (limited to 'tools') diff --git a/net/core/filter.c b/net/core/filter.c index ba436b1d70c2..e15d4741719a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7023,6 +7023,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: @@ -7039,6 +7041,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: diff --git a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c index fc2fe8a952fa..3d1ae8b3402f 100644 --- a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c @@ -8,6 +8,8 @@ #include #include +#include + #define SERV4_IP 0xc0a801feU /* 192.168.1.254 */ #define SERV4_PORT 4040 @@ -28,6 +30,9 @@ int recvmsg4_prog(struct bpf_sock_addr *ctx) if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 1; + if (!get_set_sk_priority(ctx)) + return 1; + ctx->user_ip4 = bpf_htonl(SERV4_IP); ctx->user_port = bpf_htons(SERV4_PORT); diff --git a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c index 6060fd63324b..27dfb21b21b4 100644 --- a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c @@ -8,6 +8,8 @@ #include #include +#include + #define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */ #define SERV6_IP_1 0x12345678 #define SERV6_IP_2 0x00000000 @@ -31,6 +33,9 @@ int recvmsg6_prog(struct bpf_sock_addr *ctx) if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 1; + if (!get_set_sk_priority(ctx)) + return 1; + ctx->user_ip6[0] = bpf_htonl(SERV6_IP_0); ctx->user_ip6[1] = bpf_htonl(SERV6_IP_1); ctx->user_ip6[2] = bpf_htonl(SERV6_IP_2); -- cgit v1.2.3-71-gd317 From 37086bfdc737ea6f66bf68dcf16757004d68e1e1 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 2 Feb 2021 13:50:02 +0000 Subject: bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH When BPF_FETCH is set, atomic instructions load a value from memory into a register. The current verifier code first checks via check_mem_access whether we can access the memory, and then checks via check_reg_arg whether we can write into the register. For loads, check_reg_arg has the side-effect of marking the register's value as unkonwn, and check_mem_access has the side effect of propagating bounds from memory to the register. This currently only takes effect for stack memory. Therefore with the current order, bounds information is thrown away, but by simply reversing the order of check_reg_arg vs. check_mem_access, we can instead propagate bounds smartly. A simple test is added with an infinite loop that can only be proved unreachable if this propagation is present. This is implemented both with C and directly in test_verifier using assembly. Suggested-by: John Fastabend Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210202135002.4024825-1-jackmanb@google.com --- kernel/bpf/verifier.c | 32 ++++++++++++---------- .../selftests/bpf/prog_tests/atomic_bounds.c | 15 ++++++++++ tools/testing/selftests/bpf/progs/atomic_bounds.c | 24 ++++++++++++++++ .../testing/selftests/bpf/verifier/atomic_bounds.c | 27 ++++++++++++++++++ 4 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/atomic_bounds.c create mode 100644 tools/testing/selftests/bpf/progs/atomic_bounds.c create mode 100644 tools/testing/selftests/bpf/verifier/atomic_bounds.c (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 972fc38eb62d..5e09632efddb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3665,9 +3665,26 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i return -EACCES; } + if (insn->imm & BPF_FETCH) { + if (insn->imm == BPF_CMPXCHG) + load_reg = BPF_REG_0; + else + load_reg = insn->src_reg; + + /* check and record load of old value */ + err = check_reg_arg(env, load_reg, DST_OP); + if (err) + return err; + } else { + /* This instruction accesses a memory location but doesn't + * actually load it into a register. + */ + load_reg = -1; + } + /* check whether we can read the memory */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, -1, true); + BPF_SIZE(insn->code), BPF_READ, load_reg, true); if (err) return err; @@ -3677,19 +3694,6 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (err) return err; - if (!(insn->imm & BPF_FETCH)) - return 0; - - if (insn->imm == BPF_CMPXCHG) - load_reg = BPF_REG_0; - else - load_reg = insn->src_reg; - - /* check and record load of old value */ - err = check_reg_arg(env, load_reg, DST_OP); - if (err) - return err; - return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c new file mode 100644 index 000000000000..addf127068e4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include "atomic_bounds.skel.h" + +void test_atomic_bounds(void) +{ + struct atomic_bounds *skel; + __u32 duration = 0; + + skel = atomic_bounds__open_and_load(); + if (CHECK(!skel, "skel_load", "couldn't load program\n")) + return; +} diff --git a/tools/testing/selftests/bpf/progs/atomic_bounds.c b/tools/testing/selftests/bpf/progs/atomic_bounds.c new file mode 100644 index 000000000000..e5fff7fc7f8f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/atomic_bounds.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#ifdef ENABLE_ATOMICS_TESTS +bool skip_tests __attribute((__section__(".data"))) = false; +#else +bool skip_tests = true; +#endif + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(sub, int x) +{ +#ifdef ENABLE_ATOMICS_TESTS + int a = 0; + int b = __sync_fetch_and_add(&a, 1); + /* b is certainly 0 here. Can the verifier tell? */ + while (b) + continue; +#endif + return 0; +} diff --git a/tools/testing/selftests/bpf/verifier/atomic_bounds.c b/tools/testing/selftests/bpf/verifier/atomic_bounds.c new file mode 100644 index 000000000000..e82183e4914f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_bounds.c @@ -0,0 +1,27 @@ +{ + "BPF_ATOMIC bounds propagation, mem->reg", + .insns = { + /* a = 0; */ + /* + * Note this is implemented with two separate instructions, + * where you might think one would suffice: + * + * BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + * + * This is because BPF_ST_MEM doesn't seem to set the stack slot + * type to 0 when storing an immediate. + */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* b = atomic_fetch_add(&a, 1); */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* Verifier should be able to tell that this infinite loop isn't reachable. */ + /* if (b) while (true) continue; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, -1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "back-edge", +}, -- cgit v1.2.3-71-gd317 From 15075bb7228ae6422e9e79c27ea69cbd63a9d9dc Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 2 Feb 2021 21:37:30 +0000 Subject: selftests/bpf: Fix a compiler warning in local_storage test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some compilers trigger a warning when tmp_dir_path is allocated with a fixed size of 64-bytes and used in the following snprintf: snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm", tmp_dir_path); warning: ‘/copy_of_rm’ directive output may be truncated writing 11 bytes into a region of size between 1 and 64 [-Wformat-truncation=] This is because it assumes that tmp_dir_path can be a maximum of 64 bytes long and, therefore, the end-result can get truncated. Fix it by not using a fixed size in the initialization of tmp_dir_path which allows the compiler to track actual size of the array better. Fixes: 2f94ac191846 ("bpf: Update local storage test to check handling of null ptrs") Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210202213730.1906931-1-kpsingh@kernel.org --- tools/testing/selftests/bpf/prog_tests/test_local_storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 3bfcf00c0a67..d2c16eaae367 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -113,7 +113,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd) void test_test_local_storage(void) { - char tmp_dir_path[64] = "/tmp/local_storageXXXXXX"; + char tmp_dir_path[] = "/tmp/local_storageXXXXXX"; int err, serv_sk = -1, task_fd = -1, rm_fd = -1; struct local_storage *skel = NULL; char tmp_exec_path[64]; -- cgit v1.2.3-71-gd317 From 060fd1035880dd466607d1c279ca913dd1f96916 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Sat, 30 Jan 2021 17:01:50 -0500 Subject: selftest/bpf: Testing for multiple logs on REJECT This patch adds support to verifier tests to check for a succession of verifier log messages on program load failure. This makes the errstr field work uniformly across REJECT and VERBOSE_ACCEPT checks. This patch also increases the maximum size of a message in the series of messages to test from 80 chars to 200 chars. This is in order to keep existing tests working, which sometimes test for messages larger than 80 chars (which was accepted in the REJECT case, when testing for a single message, but not in the VERBOSE_ACCEPT case, when testing for possibly multiple messages). And example of such a long, checked message is in bounds.c: "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root" Signed-off-by: Andrei Matei Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210130220150.59305-1-andreimatei1@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 59bfa6201d1d..58b5a349d3ba 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -88,6 +88,10 @@ struct bpf_test { int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; + /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. + * Can be a tab-separated sequence of expected strings. An empty string + * means no log verification. + */ const char *errstr; const char *errstr_unpriv; uint32_t insn_processed; @@ -995,13 +999,19 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, return 0; } +/* Returns true if every part of exp (tab-separated) appears in log, in order. + * + * If exp is an empty string, returns true. + */ static bool cmp_str_seq(const char *log, const char *exp) { - char needle[80]; + char needle[200]; const char *p, *q; int len; do { + if (!strlen(exp)) + break; p = strchr(exp, '\t'); if (!p) p = exp + strlen(exp); @@ -1015,7 +1025,7 @@ static bool cmp_str_seq(const char *log, const char *exp) needle[len] = 0; q = strstr(log, needle); if (!q) { - printf("FAIL\nUnexpected verifier log in successful load!\n" + printf("FAIL\nUnexpected verifier log!\n" "EXP: %s\nRES:\n", needle); return false; } @@ -1130,7 +1140,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!expected_err || !strstr(bpf_vlog, expected_err)) { + if (!expected_err || !cmp_str_seq(bpf_vlog, expected_err)) { printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", expected_err, bpf_vlog); goto fail_log; -- cgit v1.2.3-71-gd317 From 5f10c1aac8b29d225d19a74656865d1ee3db6eaa Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 3 Feb 2021 12:34:45 -0800 Subject: libbpf: Stop using feature-detection Makefiles Libbpf's Makefile relies on Linux tools infrastructure's feature detection framework, but libbpf's needs are very modest: it detects the presence of libelf and libz, both of which are mandatory. So it doesn't benefit much from the framework, but pays significant costs in terms of maintainability and debugging experience, when something goes wrong. The other feature detector, testing for the presernce of minimal BPF API in system headers is long obsolete as well, providing no value. So stop using feature detection and just assume the presence of libelf and libz during build time. Worst case, user will get a clear and actionable linker error, e.g.: /usr/bin/ld: cannot find -lelf On the other hand, we completely bypass recurring issues various users reported over time with false negatives of feature detection (libelf or libz not being detected, while they are actually present in the system). Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Randy Dunlap Link: https://lore.kernel.org/bpf/20210203203445.3356114-1-andrii@kernel.org --- tools/lib/bpf/.gitignore | 1 - tools/lib/bpf/Makefile | 47 ++++------------------------------------------- 2 files changed, 4 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 8a81b3679d2b..5d4cfac671d5 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only libbpf_version.h libbpf.pc -FEATURE-DUMP.libbpf libbpf.so.* TAGS tags diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 55bd78b3496f..887a494ad5fc 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -58,28 +58,7 @@ ifndef VERBOSE VERBOSE = 0 endif -FEATURE_USER = .libbpf -FEATURE_TESTS = libelf zlib bpf -FEATURE_DISPLAY = libelf zlib bpf - INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi -FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) - -check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help -ifdef MAKECMDGOALS -ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) - check_feat := 0 -endif -endif - -ifeq ($(check_feat),1) -ifeq ($(FEATURES_DUMP),) -include $(srctree)/tools/build/Makefile.feature -else -include $(FEATURES_DUMP) -endif -endif export prefix libdir src obj @@ -157,7 +136,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_SHARED): force $(BPF_HELPER_DEFS) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -175,7 +154,7 @@ $(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_STATIC): force $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h @@ -264,34 +243,16 @@ install_pkgconfig: $(PC_FILE) install: install_lib install_pkgconfig install_headers -### Cleaning rules - -config-clean: - $(call QUIET_CLEAN, feature-detect) - $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null - -clean: config-clean +clean: $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ $(addprefix $(OUTPUT), \ *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) - $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf - - -PHONY += force elfdep zdep bpfdep cscope tags +PHONY += force cscope tags force: -elfdep: - @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi - -zdep: - @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi - -bpfdep: - @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi - cscope: ls *.c *.h > cscope.files cscope -b -q -I $(srctree)/include -f cscope.out -- cgit v1.2.3-71-gd317 From c9709f52386d9cc944417f45b979d821d1d08c46 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 4 Feb 2021 19:45:43 +0000 Subject: bpf: Helper script for running BPF presubmit tests The script runs the BPF selftests locally on the same kernel image as they would run post submit in the BPF continuous integration framework. The goal of the script is to allow contributors to run selftests locally in the same environment to check if their changes would end up breaking the BPF CI and reduce the back-and-forth between the maintainers and the developers. Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Tested-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20210204194544.3383814-2-kpsingh@kernel.org --- tools/testing/selftests/bpf/vmtest.sh | 368 ++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100755 tools/testing/selftests/bpf/vmtest.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh new file mode 100755 index 000000000000..26ae8d0b6ce3 --- /dev/null +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -0,0 +1,368 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -u +set -e + +# This script currently only works for x86_64, as +# it is based on the VM image used by the BPF CI which is +# x86_64. +QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}" +X86_BZIMAGE="arch/x86/boot/bzImage" +DEFAULT_COMMAND="./test_progs" +MOUNT_DIR="mnt" +ROOTFS_IMAGE="root.img" +OUTPUT_DIR="$HOME/.bpf_selftests" +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config" +KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config" +INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX" +NUM_COMPILE_JOBS="$(nproc)" + +usage() +{ + cat <] -- [] + + is the command you would normally run when you are in +tools/testing/selftests/bpf. e.g: + + $0 -- ./test_progs -t test_lsm + +If no command is specified, "${DEFAULT_COMMAND}" will be run by +default. + +If you build your kernel using KBUILD_OUTPUT= or O= options, these +can be passed as environment variables to the script: + + O= $0 -- ./test_progs -t test_lsm + +or + + KBUILD_OUTPUT= $0 -- ./test_progs -t test_lsm + +Options: + + -i) Update the rootfs image with a newer version. + -d) Update the output directory (default: ${OUTPUT_DIR}) + -j) Number of jobs for compilation, similar to -j in make + (default: ${NUM_COMPILE_JOBS}) +EOF +} + +unset URLS +populate_url_map() +{ + if ! declare -p URLS &> /dev/null; then + # URLS contain the mapping from file names to URLs where + # those files can be downloaded from. + declare -gA URLS + while IFS=$'\t' read -r name url; do + URLS["$name"]="$url" + done < <(curl -Lsf ${INDEX_URL}) + fi +} + +download() +{ + local file="$1" + + if [[ ! -v URLS[$file] ]]; then + echo "$file not found" >&2 + return 1 + fi + + echo "Downloading $file..." >&2 + curl -Lsf "${URLS[$file]}" "${@:2}" +} + +newest_rootfs_version() +{ + { + for file in "${!URLS[@]}"; do + if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then + echo "${BASH_REMATCH[1]}" + fi + done + } | sort -rV | head -1 +} + +download_rootfs() +{ + local rootfsversion="$1" + local dir="$2" + + if ! which zstd &> /dev/null; then + echo 'Could not find "zstd" on the system, please install zstd' + exit 1 + fi + + download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" | + zstd -d | sudo tar -C "$dir" -x +} + +recompile_kernel() +{ + local kernel_checkout="$1" + local make_command="$2" + + cd "${kernel_checkout}" + + ${make_command} olddefconfig + ${make_command} +} + +mount_image() +{ + local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}" + local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" + + sudo mount -o loop "${rootfs_img}" "${mount_dir}" +} + +unmount_image() +{ + local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" + + sudo umount "${mount_dir}" &> /dev/null +} + +update_selftests() +{ + local kernel_checkout="$1" + local selftests_dir="${kernel_checkout}/tools/testing/selftests/bpf" + + cd "${selftests_dir}" + ${make_command} + + # Mount the image and copy the selftests to the image. + mount_image + sudo rm -rf "${mount_dir}/root/bpf" + sudo cp -r "${selftests_dir}" "${mount_dir}/root" + unmount_image +} + +update_init_script() +{ + local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d" + local init_script="${init_script_dir}/S50-startup" + local command="$1" + local log_file="$2" + + mount_image + + if [[ ! -d "${init_script_dir}" ]]; then + cat <${init_script}" <&1 | tee /root/${log_file} +poweroff -f +EOF + + sudo chmod a+x "${init_script}" + unmount_image +} + +create_vm_image() +{ + local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}" + local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" + + rm -rf "${rootfs_img}" + touch "${rootfs_img}" + chattr +C "${rootfs_img}" >/dev/null 2>&1 || true + + truncate -s 2G "${rootfs_img}" + mkfs.ext4 -q "${rootfs_img}" + + mount_image + download_rootfs "$(newest_rootfs_version)" "${mount_dir}" + unmount_image +} + +run_vm() +{ + local kernel_bzimage="$1" + local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}" + + if ! which "${QEMU_BINARY}" &> /dev/null; then + cat < Date: Thu, 4 Feb 2021 19:45:44 +0000 Subject: bpf/selftests: Add a short note about vmtest.sh in README.rst Add a short note to make contributors aware of the existence of the script. The documentation does not intentionally document all the options of the script to avoid mentioning it in two places (it's available in the usage / help message of the script). Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210204194544.3383814-3-kpsingh@kernel.org --- tools/testing/selftests/bpf/README.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index ca064180d4d0..fd148b8410fa 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -6,6 +6,30 @@ General instructions on running selftests can be found in __ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests +========================= +Running Selftests in a VM +========================= + +It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtest.sh``. +The script tries to ensure that the tests are run with the same environment as they +would be run post-submit in the CI used by the Maintainers. + +This script downloads a suitable Kconfig and VM userspace image from the system used by +the CI. It builds the kernel (without overwriting your existing Kconfig), recompiles the +bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and +saves the resulting output (by default in ``~/.bpf_selftests``). + +For more information on about using the script, run: + +.. code-block:: console + + $ tools/testing/selftests/bpf/vmtest.sh -h + +.. note:: The script uses pahole and clang based on host environment setting. + If you want to change pahole and llvm, you can change `PATH` environment + variable in the beginning of script. + +.. note:: The script currently only supports x86_64. Additional information about selftest failures are documented here. -- cgit v1.2.3-71-gd317 From f446b570ac7e1e71ffd6d2a31ffbcc5f32330a6d Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 4 Feb 2021 19:36:22 +0000 Subject: bpf/selftests: Update the IMA test to use BPF ring buffer Instead of using shared global variables between userspace and BPF, use the ring buffer to send the IMA hash on the BPF ring buffer. This helps in validating both IMA and the usage of the ringbuffer in sleepable programs. Signed-off-by: KP Singh Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210204193622.3367275-3-kpsingh@kernel.org --- tools/testing/selftests/bpf/prog_tests/test_ima.c | 23 ++++++++++++---- tools/testing/selftests/bpf/progs/ima.c | 33 +++++++++++++++++------ 2 files changed, 43 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index 61fca681d524..b54bc0c351b7 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "ima.skel.h" @@ -31,9 +32,18 @@ static int run_measured_process(const char *measured_dir, u32 *monitored_pid) return -EINVAL; } +static u64 ima_hash_from_bpf; + +static int process_sample(void *ctx, void *data, size_t len) +{ + ima_hash_from_bpf = *((u64 *)data); + return 0; +} + void test_test_ima(void) { char measured_dir_template[] = "/tmp/ima_measuredXXXXXX"; + struct ring_buffer *ringbuf; const char *measured_dir; char cmd[256]; @@ -44,6 +54,11 @@ void test_test_ima(void) if (CHECK(!skel, "skel_load", "skeleton failed\n")) goto close_prog; + ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), + process_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ringbuf")) + goto close_prog; + err = ima__attach(skel); if (CHECK(err, "attach", "attach failed: %d\n", err)) goto close_prog; @@ -60,11 +75,9 @@ void test_test_ima(void) if (CHECK(err, "run_measured_process", "err = %d\n", err)) goto close_clean; - CHECK(skel->data->ima_hash_ret < 0, "ima_hash_ret", - "ima_hash_ret = %ld\n", skel->data->ima_hash_ret); - - CHECK(skel->bss->ima_hash == 0, "ima_hash", - "ima_hash = %lu\n", skel->bss->ima_hash); + err = ring_buffer__consume(ringbuf); + ASSERT_EQ(err, 1, "num_samples_or_err"); + ASSERT_NEQ(ima_hash_from_bpf, 0, "ima_hash"); close_clean: snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir); diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c index 86b21aff4bc5..96060ff4ffc6 100644 --- a/tools/testing/selftests/bpf/progs/ima.c +++ b/tools/testing/selftests/bpf/progs/ima.c @@ -9,20 +9,37 @@ #include #include -long ima_hash_ret = -1; -u64 ima_hash = 0; u32 monitored_pid = 0; +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf SEC(".maps"); + char _license[] SEC("license") = "GPL"; SEC("lsm.s/bprm_committed_creds") -int BPF_PROG(ima, struct linux_binprm *bprm) +void BPF_PROG(ima, struct linux_binprm *bprm) { - u32 pid = bpf_get_current_pid_tgid() >> 32; + u64 ima_hash = 0; + u64 *sample; + int ret; + u32 pid; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid == monitored_pid) { + ret = bpf_ima_inode_hash(bprm->file->f_inode, &ima_hash, + sizeof(ima_hash)); + if (ret < 0 || ima_hash == 0) + return; + + sample = bpf_ringbuf_reserve(&ringbuf, sizeof(u64), 0); + if (!sample) + return; - if (pid == monitored_pid) - ima_hash_ret = bpf_ima_inode_hash(bprm->file->f_inode, - &ima_hash, sizeof(ima_hash)); + *sample = ima_hash; + bpf_ringbuf_submit(sample, 0); + } - return 0; + return; } -- cgit v1.2.3-71-gd317 From 215cb7d3823e798de327e3232e396434fab84f42 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Sun, 7 Feb 2021 15:52:40 +0800 Subject: bpf/benchs/bench_ringbufs: Remove unneeded semicolon Eliminate the following coccicheck warning: ./tools/testing/selftests/bpf/benchs/bench_ringbufs.c:322:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1612684360-115910-1-git-send-email-yang.lee@linux.alibaba.com --- tools/testing/selftests/bpf/benchs/bench_ringbufs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index da87c7f31891..bde6c9d4cbd4 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -319,7 +319,7 @@ static void ringbuf_custom_process_ring(struct ringbuf_custom *r) smp_store_release(r->consumer_pos, cons_pos); else break; - }; + } } static void *ringbuf_custom_consumer(void *input) -- cgit v1.2.3-71-gd317 From 11da9f0c6d145e482991d29a771ce717d2f1b92b Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 8 Feb 2021 18:30:13 +0800 Subject: selftests/bpf: Remove unneeded semicolon Eliminate the following coccicheck warning: ./tools/testing/selftests/bpf/test_flow_dissector.c:506:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1612780213-84583-1-git-send-email-yang.lee@linux.alibaba.com --- tools/testing/selftests/bpf/test_flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c index 01f0c634d548..571cc076dd7d 100644 --- a/tools/testing/selftests/bpf/test_flow_dissector.c +++ b/tools/testing/selftests/bpf/test_flow_dissector.c @@ -503,7 +503,7 @@ static int do_rx(int fd) if (rbuf != cfg_payload_char) error(1, 0, "recv: payload mismatch"); num++; - }; + } return num; } -- cgit v1.2.3-71-gd317 From 1589a1fa4e3832bd43742f111e6a883a28fe7ae9 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Mon, 8 Feb 2021 12:37:37 +0000 Subject: selftests/bpf: Add missing cleanup in atomic_bounds test Add missing skeleton destroy call. Fixes: 37086bfdc737 ("bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH") Reported-by: Yonghong Song Signed-off-by: Brendan Jackman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210208123737.963172-1-jackmanb@google.com --- tools/testing/selftests/bpf/prog_tests/atomic_bounds.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c index addf127068e4..69bd7853e8f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c @@ -12,4 +12,6 @@ void test_atomic_bounds(void) skel = atomic_bounds__open_and_load(); if (CHECK(!skel, "skel_load", "couldn't load program\n")) return; + + atomic_bounds__destroy(skel); } -- cgit v1.2.3-71-gd317 From 0a1b0fd929a8bbdf7c47b418b8d0ee6a8de3a7a3 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Mon, 8 Feb 2021 17:43:36 +0800 Subject: bpf: Simplify bool comparison Fix the following coccicheck warning: ./tools/bpf/bpf_dbg.c:893:32-36: WARNING: Comparison to bool. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1612777416-34339-1-git-send-email-jiapeng.chong@linux.alibaba.com --- tools/bpf/bpf_dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c index a0ebcdf59c31..a07dfc479270 100644 --- a/tools/bpf/bpf_dbg.c +++ b/tools/bpf/bpf_dbg.c @@ -890,7 +890,7 @@ static int bpf_run_stepping(struct sock_filter *f, uint16_t bpf_len, bool stop = false; int i = 1; - while (bpf_curr.Rs == false && stop == false) { + while (!bpf_curr.Rs && !stop) { bpf_safe_regs(); if (i++ == next) -- cgit v1.2.3-71-gd317 From fc6b48f692f89cc48bfb7fd1aa65454dfe9b2d77 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 5 Feb 2021 13:40:17 +0100 Subject: tools/resolve_btfids: Build libbpf and libsubcmd in separate directories Setting up separate build directories for libbpf and libpsubcmd, so it's separated from other objects and we don't get them mixed in the future. It also simplifies cleaning, which is now simple rm -rf. Also there's no need for FEATURE-DUMP.libbpf and bpf_helper_defs.h files in .gitignore anymore. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210205124020.683286-2-jolsa@kernel.org --- tools/bpf/resolve_btfids/.gitignore | 2 -- tools/bpf/resolve_btfids/Makefile | 26 +++++++++++--------------- 2 files changed, 11 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore index a026df7dc280..25f308c933cc 100644 --- a/tools/bpf/resolve_btfids/.gitignore +++ b/tools/bpf/resolve_btfids/.gitignore @@ -1,4 +1,2 @@ -/FEATURE-DUMP.libbpf -/bpf_helper_defs.h /fixdep /resolve_btfids diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index bf656432ad73..1d46a247ec95 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -28,22 +28,22 @@ OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ LIBBPF_SRC := $(srctree)/tools/lib/bpf/ SUBCMD_SRC := $(srctree)/tools/lib/subcmd/ -BPFOBJ := $(OUTPUT)/libbpf.a -SUBCMDOBJ := $(OUTPUT)/libsubcmd.a +BPFOBJ := $(OUTPUT)/libbpf/libbpf.a +SUBCMDOBJ := $(OUTPUT)/libsubcmd/libsubcmd.a BINARY := $(OUTPUT)/resolve_btfids BINARY_IN := $(BINARY)-in.o all: $(BINARY) -$(OUTPUT): +$(OUTPUT) $(OUTPUT)/libbpf $(OUTPUT)/libsubcmd: $(call msg,MKDIR,,$@) - $(Q)mkdir -p $(OUTPUT) + $(Q)mkdir -p $(@) -$(SUBCMDOBJ): fixdep FORCE - $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT) +$(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd + $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) CFLAGS := -g \ @@ -57,23 +57,19 @@ LIBS = -lelf -lz export srctree OUTPUT CFLAGS Q include $(srctree)/tools/build/Makefile.include -$(BINARY_IN): fixdep FORCE +$(BINARY_IN): fixdep FORCE | $(OUTPUT) $(Q)$(MAKE) $(build)=resolve_btfids $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) $(call msg,LINK,$@) $(Q)$(CC) $(BINARY_IN) $(LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) -libsubcmd-clean: - $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT) clean - -libbpf-clean: - $(Q)$(MAKE) -C $(LIBBPF_SRC) OUTPUT=$(OUTPUT) clean - -clean: libsubcmd-clean libbpf-clean fixdep-clean +clean: fixdep-clean $(call msg,CLEAN,$(BINARY)) $(Q)$(RM) -f $(BINARY); \ $(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \ + $(RM) -rf $(OUTPUT)/libbpf; \ + $(RM) -rf $(OUTPUT)/libsubcmd; \ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) tags: -- cgit v1.2.3-71-gd317 From f23130979c2f15ea29a431cd9e1ea7916337bbd4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 5 Feb 2021 13:40:18 +0100 Subject: tools/resolve_btfids: Check objects before removing We want this clean to be called from tree's root clean and that one is silent if there's nothing to clean. Adding check for all object to clean and display CLEAN messages only if there are objects to remove. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210205124020.683286-3-jolsa@kernel.org --- tools/bpf/resolve_btfids/Makefile | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 1d46a247ec95..be09ec4f03ff 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -64,13 +64,20 @@ $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) $(call msg,LINK,$@) $(Q)$(CC) $(BINARY_IN) $(LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS) +clean_objects := $(wildcard $(OUTPUT)/*.o \ + $(OUTPUT)/.*.o.cmd \ + $(OUTPUT)/.*.o.d \ + $(OUTPUT)/libbpf \ + $(OUTPUT)/libsubcmd \ + $(OUTPUT)/resolve_btfids) + +ifneq ($(clean_objects),) clean: fixdep-clean $(call msg,CLEAN,$(BINARY)) - $(Q)$(RM) -f $(BINARY); \ - $(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \ - $(RM) -rf $(OUTPUT)/libbpf; \ - $(RM) -rf $(OUTPUT)/libsubcmd; \ - find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + $(Q)$(RM) -rf $(clean_objects) +else +clean: +endif tags: $(call msg,GEN,,tags) -- cgit v1.2.3-71-gd317 From 7962cb9b640af98ccb577f46c8b894319e6c5c20 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 5 Feb 2021 13:40:19 +0100 Subject: tools/resolve_btfids: Set srctree variable unconditionally We want this clean to be called from tree's root Makefile, which defines same srctree variable and that will screw the make setup. We actually do not use srctree being passed from outside, so we can solve this by setting current srctree value directly. Also changing the way how srctree is initialized as suggested by Andrri. Also root Makefile does not define the implicit RM variable, so adding RM initialization. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210205124020.683286-4-jolsa@kernel.org --- tools/bpf/resolve_btfids/Makefile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index be09ec4f03ff..bb9fa8de7e62 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -2,11 +2,7 @@ include ../../scripts/Makefile.include include ../../scripts/Makefile.arch -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -endif +srctree := $(abspath $(CURDIR)/../../../) ifeq ($(V),1) Q = @@ -22,6 +18,7 @@ AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) ARCH = $(HOSTARCH) +RM ?= rm OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ -- cgit v1.2.3-71-gd317 From a680cb3d8e3f4f84205720b90c926579d04eedb6 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Sat, 6 Feb 2021 20:10:25 -0500 Subject: selftest/bpf: Adjust expected verifier errors The verifier errors around stack accesses have changed slightly in the previous commit (generally for the better). Signed-off-by: Andrei Matei Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210207011027.676572-3-andreimatei1@gmail.com --- tools/testing/selftests/bpf/verifier/basic_stack.c | 2 +- tools/testing/selftests/bpf/verifier/calls.c | 4 ++-- tools/testing/selftests/bpf/verifier/const_or.c | 4 ++-- .../selftests/bpf/verifier/helper_access_var_len.c | 12 ++++++------ tools/testing/selftests/bpf/verifier/int_ptr.c | 6 +++--- tools/testing/selftests/bpf/verifier/raw_stack.c | 10 +++++----- tools/testing/selftests/bpf/verifier/stack_ptr.c | 22 +++++++++++++--------- tools/testing/selftests/bpf/verifier/unpriv.c | 2 +- tools/testing/selftests/bpf/verifier/var_off.c | 16 ++++++++-------- 9 files changed, 41 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/basic_stack.c b/tools/testing/selftests/bpf/verifier/basic_stack.c index b56f8117c09d..f995777dddb3 100644 --- a/tools/testing/selftests/bpf/verifier/basic_stack.c +++ b/tools/testing/selftests/bpf/verifier/basic_stack.c @@ -4,7 +4,7 @@ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack", + .errstr = "invalid write to stack", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index c4f5d909e58a..eb888c8479c3 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1228,7 +1228,7 @@ .prog_type = BPF_PROG_TYPE_XDP, .fixup_map_hash_8b = { 23 }, .result = REJECT, - .errstr = "invalid read from stack off -16+0 size 8", + .errstr = "invalid read from stack R7 off=-16 size=8", }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", @@ -1958,7 +1958,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 6 }, - .errstr = "invalid indirect read from stack off -8+0 size 8", + .errstr = "invalid indirect read from stack R2 off -8+0 size 8", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c index 6c214c58e8d4..0719b0ddec04 100644 --- a/tools/testing/selftests/bpf/verifier/const_or.c +++ b/tools/testing/selftests/bpf/verifier/const_or.c @@ -23,7 +23,7 @@ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-48 access_size=58", + .errstr = "invalid indirect access to stack R1 off=-48 size=58", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -54,7 +54,7 @@ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-48 access_size=58", + .errstr = "invalid indirect access to stack R1 off=-48 size=58", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c index 87c4e7900083..0ab7f1dfc97a 100644 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -39,7 +39,7 @@ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack off -64+0 size 64", + .errstr = "invalid indirect read from stack R1 off -64+0 size 64", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -59,7 +59,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-64 access_size=65", + .errstr = "invalid indirect access to stack R1 off=-64 size=65", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -136,7 +136,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-64 access_size=65", + .errstr = "invalid indirect access to stack R1 off=-64 size=65", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -156,7 +156,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid stack type R1 off=-64 access_size=65", + .errstr = "invalid indirect access to stack R1 off=-64 size=65", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -194,7 +194,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack off -64+0 size 64", + .errstr = "invalid indirect read from stack R1 off -64+0 size 64", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -584,7 +584,7 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack off -64+32 size 64", + .errstr = "invalid indirect read from stack R1 off -64+32 size 64", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c index ca3b4729df66..070893fb2900 100644 --- a/tools/testing/selftests/bpf/verifier/int_ptr.c +++ b/tools/testing/selftests/bpf/verifier/int_ptr.c @@ -27,7 +27,7 @@ }, .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack off -16+0 size 8", + .errstr = "invalid indirect read from stack R4 off -16+0 size 8", }, { "ARG_PTR_TO_LONG half-uninitialized", @@ -59,7 +59,7 @@ }, .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack off -16+4 size 8", + .errstr = "invalid indirect read from stack R4 off -16+4 size 8", }, { "ARG_PTR_TO_LONG misaligned", @@ -125,7 +125,7 @@ }, .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid stack type R4 off=-4 access_size=8", + .errstr = "invalid indirect access to stack R4 off=-4 size=8", }, { "ARG_PTR_TO_LONG initialized", diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c index 193d9e87d5a9..cc8e8c3cdc03 100644 --- a/tools/testing/selftests/bpf/verifier/raw_stack.c +++ b/tools/testing/selftests/bpf/verifier/raw_stack.c @@ -11,7 +11,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid read from stack off -8+0 size 8", + .errstr = "invalid read from stack R6 off=-8 size=8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -59,7 +59,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3", + .errstr = "invalid zero-sized read", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -205,7 +205,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-513 access_size=8", + .errstr = "invalid indirect access to stack R3 off=-513 size=8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -221,7 +221,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-1 access_size=8", + .errstr = "invalid indirect access to stack R3 off=-1 size=8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -285,7 +285,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-512 access_size=0", + .errstr = "invalid zero-sized read", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c index 8bfeb77c60bd..07eaa04412ae 100644 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -44,7 +44,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off=-79992 size=8", + .errstr = "invalid write to stack R1 off=-79992 size=8", .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", }, { @@ -57,7 +57,7 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off=0 size=8", + .errstr = "invalid write to stack R1 off=0 size=8", }, { "PTR_TO_STACK check high 1", @@ -106,7 +106,7 @@ BPF_EXIT_INSN(), }, .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid stack off=0 size=1", + .errstr = "invalid write to stack R1 off=0 size=1", .result = REJECT, }, { @@ -119,7 +119,8 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid write to stack R1", }, { "PTR_TO_STACK check high 6", @@ -131,7 +132,8 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid write to stack", }, { "PTR_TO_STACK check high 7", @@ -183,7 +185,7 @@ BPF_EXIT_INSN(), }, .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid stack off=-513 size=1", + .errstr = "invalid write to stack R1 off=-513 size=1", .result = REJECT, }, { @@ -208,7 +210,8 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid write to stack", }, { "PTR_TO_STACK check low 6", @@ -220,7 +223,8 @@ BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack off", + .errstr = "invalid write to stack", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", }, { "PTR_TO_STACK check low 7", @@ -292,7 +296,7 @@ BPF_EXIT_INSN(), }, .result_unpriv = REJECT, - .errstr_unpriv = "invalid stack off=0 size=1", + .errstr_unpriv = "invalid write to stack R1 off=0 size=1", .result = ACCEPT, .retval = 42, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index ee298627abae..b018ad71e0a8 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -108,7 +108,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr_unpriv = "invalid indirect read from stack off -8+0 size 8", + .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8", .result_unpriv = REJECT, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index 8504ac937809..49b78a1a261b 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -18,7 +18,7 @@ .prog_type = BPF_PROG_TYPE_LWT_IN, }, { - "variable-offset stack access", + "variable-offset stack read, priv vs unpriv", .insns = { /* Fill the top 8 bytes of the stack */ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -63,7 +63,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R4 unbounded indirect variable offset stack access", + .errstr = "invalid unbounded variable-offset indirect access to stack R4", .result = REJECT, .prog_type = BPF_PROG_TYPE_SOCK_OPS, }, @@ -88,7 +88,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "R2 max value is outside of stack bound", + .errstr = "invalid variable-offset indirect access to stack R2", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, @@ -113,7 +113,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "R2 min value is outside of stack bound", + .errstr = "invalid variable-offset indirect access to stack R2", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, @@ -138,7 +138,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack var_off", + .errstr = "invalid indirect read from stack R2 var_off", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, @@ -163,7 +163,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack var_off", + .errstr = "invalid indirect read from stack R2 var_off", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, @@ -189,7 +189,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 6 }, - .errstr_unpriv = "R2 stack pointer arithmetic goes out of range, prohibited for !root", + .errstr_unpriv = "R2 variable stack access prohibited for !root", .result_unpriv = REJECT, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_CGROUP_SKB, @@ -217,7 +217,7 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack var_off", + .errstr = "invalid indirect read from stack R4 var_off", .result = REJECT, .prog_type = BPF_PROG_TYPE_SOCK_OPS, }, -- cgit v1.2.3-71-gd317 From 7a22930c4179b51352f2ec9feb35167cbe79afd9 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Sat, 6 Feb 2021 20:10:26 -0500 Subject: selftest/bpf: Verifier tests for var-off access Add tests for the new functionality - reading and writing to the stack through a variable-offset pointer. Signed-off-by: Andrei Matei Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210207011027.676572-4-andreimatei1@gmail.com --- tools/testing/selftests/bpf/verifier/var_off.c | 99 +++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index 49b78a1a261b..eab1f7f56e2f 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -31,14 +31,109 @@ * we don't know which */ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* dereference it */ + /* dereference it for a stack read */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "variable-offset stack read, uninitialized", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), + /* add it to fp. We now have either fp-4 or fp-8, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* dereference it for a stack read */ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)", .result = REJECT, + .errstr = "invalid variable-offset read from stack R2", .prog_type = BPF_PROG_TYPE_LWT_IN, }, +{ + "variable-offset stack write, priv vs unpriv", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 8-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-8 or fp-16, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Dereference it for a stack write */ + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + /* Now read from the address we just wrote. This shows + * that, after a variable-offset write, a priviledged + * program can read the slots that were in the range of + * that write (even if the verifier doesn't actually know + * if the slot being read was really written to or not. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + /* Variable stack access is rejected for unprivileged. + */ + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "variable-offset stack write clobbers spilled regs", + .insns = { + /* Dummy instruction; needed because we need to patch the next one + * and we can't patch the first instruction. + */ + BPF_MOV64_IMM(BPF_REG_6, 0), + /* Make R0 a map ptr */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 8-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-8 or fp-16, but + * we don't know which. + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Spill R0(map ptr) into stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* Dereference the unknown value for a stack write */ + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + /* Fill the register back into R2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), + /* Try to dereference R2 for a memory load */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + /* The unpriviledged case is not too interesting; variable + * stack access is rejected. + */ + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .result_unpriv = REJECT, + /* In the priviledged case, dereferencing a spilled-and-then-filled + * register is rejected because the previous variable offset stack + * write might have overwritten the spilled pointer (i.e. we lose track + * of the spilled register when we analyze the write). + */ + .errstr = "R2 invalid mem access 'inv'", + .result = REJECT, +}, { "indirect variable-offset stack access, unbounded", .insns = { -- cgit v1.2.3-71-gd317 From 0fd7562af1cd21fce4c1011825e18de1cfa97baa Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Sat, 6 Feb 2021 20:10:27 -0500 Subject: selftest/bpf: Add test for var-offset stack access Add a higher-level test (C BPF program) for the new functionality - variable access stack reads and writes. Signed-off-by: Andrei Matei Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210207011027.676572-5-andreimatei1@gmail.com --- .../selftests/bpf/prog_tests/stack_var_off.c | 35 +++++++++++++++ .../selftests/bpf/progs/test_stack_var_off.c | 51 ++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/stack_var_off.c create mode 100644 tools/testing/selftests/bpf/progs/test_stack_var_off.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/stack_var_off.c b/tools/testing/selftests/bpf/prog_tests/stack_var_off.c new file mode 100644 index 000000000000..2ce9deefa59c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stack_var_off.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "test_stack_var_off.skel.h" + +/* Test read and writes to the stack performed with offsets that are not + * statically known. + */ +void test_stack_var_off(void) +{ + int duration = 0; + struct test_stack_var_off *skel; + + skel = test_stack_var_off__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + /* Give pid to bpf prog so it doesn't trigger for anyone else. */ + skel->bss->test_pid = getpid(); + /* Initialize the probe's input. */ + skel->bss->input[0] = 2; + skel->bss->input[1] = 42; /* This will be returned in probe_res. */ + + if (!ASSERT_OK(test_stack_var_off__attach(skel), "skel_attach")) + goto cleanup; + + /* Trigger probe. */ + usleep(1); + + if (CHECK(skel->bss->probe_res != 42, "check_probe_res", + "wrong probe res: %d\n", skel->bss->probe_res)) + goto cleanup; + +cleanup: + test_stack_var_off__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_stack_var_off.c b/tools/testing/selftests/bpf/progs/test_stack_var_off.c new file mode 100644 index 000000000000..665e6ae09d37 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_stack_var_off.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +int probe_res; + +char input[4] = {}; +int test_pid; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int probe(void *ctx) +{ + /* This BPF program performs variable-offset reads and writes on a + * stack-allocated buffer. + */ + char stack_buf[16]; + unsigned long len; + unsigned long last; + + if ((bpf_get_current_pid_tgid() >> 32) != test_pid) + return 0; + + /* Copy the input to the stack. */ + __builtin_memcpy(stack_buf, input, 4); + + /* The first byte in the buffer indicates the length. */ + len = stack_buf[0] & 0xf; + last = (len - 1) & 0xf; + + /* Append something to the buffer. The offset where we write is not + * statically known; this is a variable-offset stack write. + */ + stack_buf[len] = 42; + + /* Index into the buffer at an unknown offset. This is a + * variable-offset stack read. + * + * Note that if it wasn't for the preceding variable-offset write, this + * read would be rejected because the stack slot cannot be verified as + * being initialized. With the preceding variable-offset write, the + * stack slot still cannot be verified, but the write inhibits the + * respective check on the reasoning that, if there was a + * variable-offset to a higher-or-equal spot, we're probably reading + * what we just wrote. + */ + probe_res = stack_buf[last]; + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-71-gd317 From 45df3052682564327acc0a0fdb0f9adc3a27a50b Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 10 Feb 2021 03:07:13 +0100 Subject: selftests/bpf: Fix endianness issues in atomic tests Atomic tests store a DW, but then load it back as a W from the same address. This doesn't work on big-endian systems, and since the point of those tests is not testing narrow loads, fix simply by loading a DW. Fixes: 98d666d05a1d ("bpf: Add tests for new BPF atomic operations") Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210020713.77911-1-iii@linux.ibm.com --- tools/testing/selftests/bpf/verifier/atomic_and.c | 2 +- tools/testing/selftests/bpf/verifier/atomic_or.c | 2 +- tools/testing/selftests/bpf/verifier/atomic_xor.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c index 600bc5e0f143..1bdc8e6684f7 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_and.c +++ b/tools/testing/selftests/bpf/verifier/atomic_and.c @@ -7,7 +7,7 @@ BPF_MOV64_IMM(BPF_REG_1, 0x011), BPF_ATOMIC_OP(BPF_DW, BPF_AND, BPF_REG_10, BPF_REG_1, -8), /* if (val != 0x010) exit(2); */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x010, 2), BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), diff --git a/tools/testing/selftests/bpf/verifier/atomic_or.c b/tools/testing/selftests/bpf/verifier/atomic_or.c index ebe6e51455ba..70f982e1f9f0 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_or.c +++ b/tools/testing/selftests/bpf/verifier/atomic_or.c @@ -7,7 +7,7 @@ BPF_MOV64_IMM(BPF_REG_1, 0x011), BPF_ATOMIC_OP(BPF_DW, BPF_OR, BPF_REG_10, BPF_REG_1, -8), /* if (val != 0x111) exit(2); */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x111, 2), BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), diff --git a/tools/testing/selftests/bpf/verifier/atomic_xor.c b/tools/testing/selftests/bpf/verifier/atomic_xor.c index eb791e547b47..74e8fb46694b 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_xor.c +++ b/tools/testing/selftests/bpf/verifier/atomic_xor.c @@ -7,7 +7,7 @@ BPF_MOV64_IMM(BPF_REG_1, 0x011), BPF_ATOMIC_OP(BPF_DW, BPF_XOR, BPF_REG_10, BPF_REG_1, -8), /* if (val != 0x101) exit(2); */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x101, 2), BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), -- cgit v1.2.3-71-gd317 From bd2d4e6c6e9f0186967252e8c7ab29a23c3db9cf Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 9 Feb 2021 16:46:38 +0800 Subject: selftests/bpf: Simplify the calculation of variables Fix the following coccicheck warnings: ./tools/testing/selftests/bpf/xdpxceiver.c:954:28-30: WARNING !A || A && B is equivalent to !A || B. ./tools/testing/selftests/bpf/xdpxceiver.c:932:28-30: WARNING !A || A && B is equivalent to !A || B. ./tools/testing/selftests/bpf/xdpxceiver.c:909:28-30: WARNING !A || A && B is equivalent to !A || B. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1612860398-102839-1-git-send-email-jiapeng.chong@linux.alibaba.com --- tools/testing/selftests/bpf/xdpxceiver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 99ea6cf069e6..f4a96d5ff524 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -897,7 +897,7 @@ static void *worker_testapp_validate(void *arg) ksft_print_msg("Destroying socket\n"); } - if (!opt_bidi || (opt_bidi && bidi_pass)) { + if (!opt_bidi || bidi_pass) { xsk_socket__delete(ifobject->xsk->xsk); (void)xsk_umem__delete(ifobject->umem->umem); } @@ -922,7 +922,7 @@ static void testapp_validate(void) pthread_mutex_lock(&sync_mutex); /*Spawn RX thread */ - if (!opt_bidi || (opt_bidi && !bidi_pass)) { + if (!opt_bidi || !bidi_pass) { if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1])) exit_with_error(errno); } else if (opt_bidi && bidi_pass) { @@ -942,7 +942,7 @@ static void testapp_validate(void) pthread_mutex_unlock(&sync_mutex); /*Spawn TX thread */ - if (!opt_bidi || (opt_bidi && !bidi_pass)) { + if (!opt_bidi || !bidi_pass) { if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0])) exit_with_error(errno); } else if (opt_bidi && bidi_pass) { -- cgit v1.2.3-71-gd317 From ca06f55b90020cd97f4cc6d52db95436162e7dcf Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:29 -0800 Subject: bpf: Add per-program recursion prevention mechanism Since both sleepable and non-sleepable programs execute under migrate_disable add recursion prevention mechanism to both types of programs when they're executed via bpf trampoline. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210033634.62081-5-alexei.starovoitov@gmail.com --- arch/x86/net/bpf_jit_comp.c | 15 ++++++++++++++ include/linux/bpf.h | 6 +++--- include/linux/filter.h | 1 + kernel/bpf/core.c | 8 ++++++++ kernel/bpf/trampoline.c | 23 ++++++++++++++++++---- .../selftests/bpf/prog_tests/fexit_stress.c | 4 ++-- .../selftests/bpf/prog_tests/trampoline_count.c | 4 ++-- 7 files changed, 50 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d11b9bcebbea..79e7a0ec1da5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1740,8 +1740,11 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, struct bpf_prog *p, int stack_size, bool mod_ret) { u8 *prog = *pprog; + u8 *jmp_insn; int cnt = 0; + /* arg1: mov rdi, progs[i] */ + emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); if (emit_call(&prog, p->aux->sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter, prog)) @@ -1749,6 +1752,14 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, /* remember prog start time returned by __bpf_prog_enter */ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); + /* if (__bpf_prog_enter*(prog) == 0) + * goto skip_exec_of_prog; + */ + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ + /* emit 2 nops that will be replaced with JE insn */ + jmp_insn = prog; + emit_nops(&prog, 2); + /* arg1: lea rdi, [rbp - stack_size] */ EMIT4(0x48, 0x8D, 0x7D, -stack_size); /* arg2: progs[i]->insnsi for interpreter */ @@ -1767,6 +1778,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (mod_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + /* replace 2 nops with JE insn, since jmp target is known */ + jmp_insn[0] = X86_JE; + jmp_insn[1] = prog - jmp_insn - 2; + /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: mov rsi, rbx <- start time in nsec */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1840ceaf55f..1c8ea682c007 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -529,7 +529,7 @@ struct btf_func_model { /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ -#define BPF_MAX_TRAMP_PROGS 40 +#define BPF_MAX_TRAMP_PROGS 38 struct bpf_tramp_progs { struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; @@ -561,9 +561,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, struct bpf_tramp_progs *tprogs, void *orig_call); /* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(void); +u64 notrace __bpf_prog_enter(struct bpf_prog *prog); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); -u64 notrace __bpf_prog_enter_sleepable(void); +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); struct bpf_ksym { diff --git a/include/linux/filter.h b/include/linux/filter.h index cecb03c9d251..6a06f3c69f4e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -565,6 +565,7 @@ struct bpf_prog { u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; struct bpf_prog_stats __percpu *stats; + int __percpu *active; unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); struct bpf_prog_aux *aux; /* Auxiliary fields */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2cf71fd39c22..334070c4b8a1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -91,6 +91,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag vfree(fp); return NULL; } + fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags); + if (!fp->active) { + vfree(fp); + kfree(aux); + return NULL; + } fp->pages = size / PAGE_SIZE; fp->aux = aux; @@ -116,6 +122,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); if (!prog->stats) { + free_percpu(prog->active); kfree(prog->aux); vfree(prog); return NULL; @@ -253,6 +260,7 @@ void __bpf_prog_free(struct bpf_prog *fp) kfree(fp->aux); } free_percpu(fp->stats); + free_percpu(fp->active); vfree(fp); } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 48eb021e1421..89ef6320d19b 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -381,13 +381,16 @@ out: mutex_unlock(&trampoline_mutex); } -#define NO_START_TIME 0 +#define NO_START_TIME 1 static u64 notrace bpf_prog_start_time(void) { u64 start = NO_START_TIME; - if (static_branch_unlikely(&bpf_stats_enabled_key)) + if (static_branch_unlikely(&bpf_stats_enabled_key)) { start = sched_clock(); + if (unlikely(!start)) + start = NO_START_TIME; + } return start; } @@ -397,12 +400,20 @@ static u64 notrace bpf_prog_start_time(void) * call __bpf_prog_enter * call prog->bpf_func * call __bpf_prog_exit + * + * __bpf_prog_enter returns: + * 0 - skip execution of the bpf prog + * 1 - execute bpf prog + * [2..MAX_U64] - excute bpf prog and record execution time. + * This is start time. */ -u64 notrace __bpf_prog_enter(void) +u64 notrace __bpf_prog_enter(struct bpf_prog *prog) __acquires(RCU) { rcu_read_lock(); migrate_disable(); + if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) + return 0; return bpf_prog_start_time(); } @@ -430,21 +441,25 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) __releases(RCU) { update_prog_stats(prog, start); + __this_cpu_dec(*(prog->active)); migrate_enable(); rcu_read_unlock(); } -u64 notrace __bpf_prog_enter_sleepable(void) +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) { rcu_read_lock_trace(); migrate_disable(); might_fault(); + if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) + return 0; return bpf_prog_start_time(); } void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start) { update_prog_stats(prog, start); + __this_cpu_dec(*(prog->active)); migrate_enable(); rcu_read_unlock_trace(); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index 3b9dbf7433f0..7c9b62e971f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -2,8 +2,8 @@ /* Copyright (c) 2019 Facebook */ #include -/* x86-64 fits 55 JITed and 43 interpreted progs into half page */ -#define CNT 40 +/* that's kernel internal BPF_MAX_TRAMP_PROGS define */ +#define CNT 38 void test_fexit_stress(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 781c8d11604b..f3022d934e2d 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -4,7 +4,7 @@ #include #include -#define MAX_TRAMP_PROGS 40 +#define MAX_TRAMP_PROGS 38 struct inst { struct bpf_object *obj; @@ -52,7 +52,7 @@ void test_trampoline_count(void) struct bpf_link *link; char comm[16] = {}; - /* attach 'allowed' 40 trampoline programs */ + /* attach 'allowed' trampoline programs */ for (i = 0; i < MAX_TRAMP_PROGS; i++) { obj = bpf_object__open_file(object, NULL); if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { -- cgit v1.2.3-71-gd317 From 406c557edc5bb903db9f6cdd543cfc282c663ad8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:30 -0800 Subject: selftest/bpf: Add a recursion test Add recursive non-sleepable fentry program as a test. All attach points where sleepable progs can execute are non recursive so far. The recursion protection mechanism for sleepable cannot be activated yet. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210033634.62081-6-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/prog_tests/recursion.c | 33 ++++++++++++++++ tools/testing/selftests/bpf/progs/recursion.c | 46 ++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/recursion.c create mode 100644 tools/testing/selftests/bpf/progs/recursion.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c new file mode 100644 index 000000000000..863757461e3f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/recursion.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include +#include "recursion.skel.h" + +void test_recursion(void) +{ + struct recursion *skel; + int key = 0; + int err; + + skel = recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = recursion__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0"); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1"); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2"); + + ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0"); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1"); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2"); +out: + recursion__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c new file mode 100644 index 000000000000..49f679375b9d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recursion.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, long); +} hash1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, long); +} hash2 SEC(".maps"); + +int pass1 = 0; +int pass2 = 0; + +SEC("fentry/__htab_map_lookup_elem") +int BPF_PROG(on_lookup, struct bpf_map *map) +{ + int key = 0; + + if (map == (void *)&hash1) { + pass1++; + return 0; + } + if (map == (void *)&hash2) { + pass2++; + /* htab_map_gen_lookup() will inline below call + * into direct call to __htab_map_lookup_elem() + */ + bpf_map_lookup_elem(&hash2, &key); + return 0; + } + + return 0; +} -- cgit v1.2.3-71-gd317 From 9ed9e9ba2337205311398a312796c213737bac35 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:31 -0800 Subject: bpf: Count the number of times recursion was prevented Add per-program counter for number of times recursion prevention mechanism was triggered and expose it via show_fdinfo and bpf_prog_info. Teach bpftool to print it. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210033634.62081-7-alexei.starovoitov@gmail.com --- include/linux/filter.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 14 ++++++++++---- kernel/bpf/trampoline.c | 18 ++++++++++++++++-- tools/bpf/bpftool/prog.c | 4 ++++ tools/include/uapi/linux/bpf.h | 1 + 6 files changed, 33 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/include/linux/filter.h b/include/linux/filter.h index 6a06f3c69f4e..3b00fc906ccd 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -543,6 +543,7 @@ struct bpf_binary_header { struct bpf_prog_stats { u64 cnt; u64 nsecs; + u64 misses; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c001766adcbc..c547ad1ffe43 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4501,6 +4501,7 @@ struct bpf_prog_info { __aligned_u64 prog_tags; __u64 run_time_ns; __u64 run_cnt; + __u64 recursion_misses; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f7df56a704de..c859bc46d06c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1731,25 +1731,28 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) static void bpf_prog_get_stats(const struct bpf_prog *prog, struct bpf_prog_stats *stats) { - u64 nsecs = 0, cnt = 0; + u64 nsecs = 0, cnt = 0, misses = 0; int cpu; for_each_possible_cpu(cpu) { const struct bpf_prog_stats *st; unsigned int start; - u64 tnsecs, tcnt; + u64 tnsecs, tcnt, tmisses; st = per_cpu_ptr(prog->stats, cpu); do { start = u64_stats_fetch_begin_irq(&st->syncp); tnsecs = st->nsecs; tcnt = st->cnt; + tmisses = st->misses; } while (u64_stats_fetch_retry_irq(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; + misses += tmisses; } stats->nsecs = nsecs; stats->cnt = cnt; + stats->misses = misses; } #ifdef CONFIG_PROC_FS @@ -1768,14 +1771,16 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) "memlock:\t%llu\n" "prog_id:\t%u\n" "run_time_ns:\t%llu\n" - "run_cnt:\t%llu\n", + "run_cnt:\t%llu\n" + "recursion_misses:\t%llu\n", prog->type, prog->jited, prog_tag, prog->pages * 1ULL << PAGE_SHIFT, prog->aux->id, stats.nsecs, - stats.cnt); + stats.cnt, + stats.misses); } #endif @@ -3438,6 +3443,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, bpf_prog_get_stats(prog, &stats); info.run_time_ns = stats.nsecs; info.run_cnt = stats.cnt; + info.recursion_misses = stats.misses; if (!bpf_capable()) { info.jited_prog_len = 0; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 89ef6320d19b..7bc3b3209224 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -394,6 +394,16 @@ static u64 notrace bpf_prog_start_time(void) return start; } +static void notrace inc_misses_counter(struct bpf_prog *prog) +{ + struct bpf_prog_stats *stats; + + stats = this_cpu_ptr(prog->stats); + u64_stats_update_begin(&stats->syncp); + stats->misses++; + u64_stats_update_end(&stats->syncp); +} + /* The logic is similar to BPF_PROG_RUN, but with an explicit * rcu_read_lock() and migrate_disable() which are required * for the trampoline. The macro is split into @@ -412,8 +422,10 @@ u64 notrace __bpf_prog_enter(struct bpf_prog *prog) { rcu_read_lock(); migrate_disable(); - if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) + if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { + inc_misses_counter(prog); return 0; + } return bpf_prog_start_time(); } @@ -451,8 +463,10 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) rcu_read_lock_trace(); migrate_disable(); might_fault(); - if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) + if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { + inc_misses_counter(prog); return 0; + } return bpf_prog_start_time(); } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 1fe3ba255bad..f2b915b20546 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -368,6 +368,8 @@ static void print_prog_header_json(struct bpf_prog_info *info) jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns); jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt); } + if (info->recursion_misses) + jsonw_uint_field(json_wtr, "recursion_misses", info->recursion_misses); } static void print_prog_json(struct bpf_prog_info *info, int fd) @@ -446,6 +448,8 @@ static void print_prog_header_plain(struct bpf_prog_info *info) if (info->run_time_ns) printf(" run_time_ns %lld run_cnt %lld", info->run_time_ns, info->run_cnt); + if (info->recursion_misses) + printf(" recursion_misses %lld", info->recursion_misses); printf("\n"); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c001766adcbc..c547ad1ffe43 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4501,6 +4501,7 @@ struct bpf_prog_info { __aligned_u64 prog_tags; __u64 run_time_ns; __u64 run_cnt; + __u64 recursion_misses; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3-71-gd317 From dcf33b6f4de173818540e3a2a0668c80a1ebdc68 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:32 -0800 Subject: selftests/bpf: Improve recursion selftest Since recursion_misses counter is available in bpf_prog_info improve the selftest to make sure it's counting correctly. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210210033634.62081-8-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/prog_tests/recursion.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c index 863757461e3f..0e378d63fe18 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursion.c +++ b/tools/testing/selftests/bpf/prog_tests/recursion.c @@ -5,6 +5,8 @@ void test_recursion(void) { + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); struct recursion *skel; int key = 0; int err; @@ -28,6 +30,12 @@ void test_recursion(void) ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1"); bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2"); + + err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_lookup), + &prog_info, &prog_info_len); + if (!ASSERT_OK(err, "get_prog_info")) + goto out; + ASSERT_EQ(prog_info.recursion_misses, 2, "recursion_misses"); out: recursion__destroy(skel); } -- cgit v1.2.3-71-gd317 From 750e5d7649b1415e27979f91f917fa5e103714d9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Feb 2021 19:36:34 -0800 Subject: selftests/bpf: Add a test for map-in-map and per-cpu maps in sleepable progs Add a basic test for map-in-map and per-cpu maps in sleepable programs. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210210033634.62081-10-alexei.starovoitov@gmail.com --- tools/testing/selftests/bpf/progs/lsm.c | 69 +++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index ff4d343b94b5..33694ef8acfa 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -30,6 +30,53 @@ struct { __type(value, __u64); } lru_hash SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} percpu_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} percpu_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} lru_percpu_hash SEC(".maps"); + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, __u64); +} inner_map SEC(".maps"); + +struct outer_arr { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct inner_map); +} outer_arr SEC(".maps") = { + .values = { [0] = &inner_map }, +}; + +struct outer_hash { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __array(values, struct inner_map); +} outer_hash SEC(".maps") = { + .values = { [0] = &inner_map }, +}; + char _license[] SEC("license") = "GPL"; int monitored_pid = 0; @@ -61,6 +108,7 @@ SEC("lsm.s/bprm_committed_creds") int BPF_PROG(test_void_hook, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct inner_map *inner_map; char args[64]; __u32 key = 0; __u64 *value; @@ -80,6 +128,27 @@ int BPF_PROG(test_void_hook, struct linux_binprm *bprm) value = bpf_map_lookup_elem(&lru_hash, &key); if (value) *value = 0; + value = bpf_map_lookup_elem(&percpu_array, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&percpu_hash, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&lru_percpu_hash, &key); + if (value) + *value = 0; + inner_map = bpf_map_lookup_elem(&outer_arr, &key); + if (inner_map) { + value = bpf_map_lookup_elem(inner_map, &key); + if (value) + *value = 0; + } + inner_map = bpf_map_lookup_elem(&outer_hash, &key); + if (inner_map) { + value = bpf_map_lookup_elem(inner_map, &key); + if (value) + *value = 0; + } return 0; } -- cgit v1.2.3-71-gd317 From 07881ccbf40cc7893869f3f170301889ddca54ac Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 10 Feb 2021 12:14:02 +0100 Subject: bpf: Be less specific about socket cookies guarantees Since "92acdc58ab11 bpf, net: Rework cookie generator as per-cpu one" socket cookies are not guaranteed to be non-decreasing. The bpf_get_socket_cookie helper descriptions are currently specifying that cookies are non-decreasing but we don't want users to rely on that. Reported-by: Daniel Borkmann Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210210111406.785541-1-revest@chromium.org --- include/uapi/linux/bpf.h | 8 ++++---- tools/include/uapi/linux/bpf.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c547ad1ffe43..dbf10bf08582 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1656,22 +1656,22 @@ union bpf_attr { * networking traffic statistics as it provides a global socket * identifier that can be assumed unique. * Return - * A 8-byte long non-decreasing number on success, or 0 if the - * socket field is missing inside *skb*. + * A 8-byte long unique number on success, or 0 if the socket + * field is missing inside *skb*. * * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c547ad1ffe43..dbf10bf08582 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1656,22 +1656,22 @@ union bpf_attr { * networking traffic statistics as it provides a global socket * identifier that can be assumed unique. * Return - * A 8-byte long non-decreasing number on success, or 0 if the - * socket field is missing inside *skb*. + * A 8-byte long unique number on success, or 0 if the socket + * field is missing inside *skb*. * * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return -- cgit v1.2.3-71-gd317 From c5dbb89fc2ac013afe67b9e4fcb3743c02b567cd Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 10 Feb 2021 12:14:03 +0100 Subject: bpf: Expose bpf_get_socket_cookie to tracing programs This needs a new helper that: - can work in a sleepable context (using sock_gen_cookie) - takes a struct sock pointer and checks that it's not NULL Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210111406.785541-2-revest@chromium.org --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 8 ++++++++ kernel/trace/bpf_trace.c | 2 ++ net/core/filter.c | 12 ++++++++++++ tools/include/uapi/linux/bpf.h | 8 ++++++++ 5 files changed, 31 insertions(+) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1c8ea682c007..cccaef1088ea 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1885,6 +1885,7 @@ extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; +extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dbf10bf08582..07cc2e404291 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1673,6 +1673,14 @@ union bpf_attr { * Return * A 8-byte long unique number. * + * u64 bpf_get_socket_cookie(struct sock *sk) + * Description + * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts + * *sk*, but gets socket from a BTF **struct sock**. This helper + * also works for sleepable programs. + * Return + * A 8-byte long unique number or 0 if *sk* is NULL. + * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return * The owner UID of the socket associated to *skb*. If the socket diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6c0018abe68a..845b2168e006 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1760,6 +1760,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_delete_tracing_proto; case BPF_FUNC_sock_from_file: return &bpf_sock_from_file_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_ptr_cookie_proto; #endif case BPF_FUNC_seq_printf: return prog->expected_attach_type == BPF_TRACE_ITER ? diff --git a/net/core/filter.c b/net/core/filter.c index e15d4741719a..57aaed478362 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4631,6 +4631,18 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk) +{ + return sk ? sock_gen_cookie(sk) : 0; +} + +const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = { + .func = bpf_get_socket_ptr_cookie, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, +}; + BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) { return __sock_gen_cookie(ctx->sk); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index dbf10bf08582..07cc2e404291 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1673,6 +1673,14 @@ union bpf_attr { * Return * A 8-byte long unique number. * + * u64 bpf_get_socket_cookie(struct sock *sk) + * Description + * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts + * *sk*, but gets socket from a BTF **struct sock**. This helper + * also works for sleepable programs. + * Return + * A 8-byte long unique number or 0 if *sk* is NULL. + * * u32 bpf_get_socket_uid(struct sk_buff *skb) * Return * The owner UID of the socket associated to *skb*. If the socket -- cgit v1.2.3-71-gd317 From 61f8c9c8f3c8fb60722f0f3168d1a546dbf8a3d4 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 10 Feb 2021 12:14:04 +0100 Subject: selftests/bpf: Integrate the socket_cookie test to test_progs Currently, the selftest for the BPF socket_cookie helpers is built and run independently from test_progs. It's easy to forget and hard to maintain. This patch moves the socket cookies test into prog_tests/ and vastly simplifies its logic by: - rewriting the loading code with BPF skeletons - rewriting the server/client code with network helpers - rewriting the cgroup code with test__join_cgroup - rewriting the error handling code with CHECKs Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210111406.785541-3-revest@chromium.org --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 3 +- .../selftests/bpf/prog_tests/socket_cookie.c | 71 +++++++ .../selftests/bpf/progs/socket_cookie_prog.c | 2 - tools/testing/selftests/bpf/test_socket_cookie.c | 208 --------------------- 5 files changed, 72 insertions(+), 213 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/socket_cookie.c delete mode 100644 tools/testing/selftests/bpf/test_socket_cookie.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9abca0616ec0..c0c48fdb9ac1 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -17,7 +17,6 @@ test_sockmap test_lirc_mode2_user get_cgroup_id_user test_skb_cgroup_id_user -test_socket_cookie test_cgroup_storage test_flow_dissector flow_dissector_load diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f0674d406f40..044bfdcf5b74 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -31,7 +31,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_verifier_log test_dev_cgroup \ - test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ + test_sock test_sockmap get_cgroup_id_user \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sysctl \ test_progs-no_alu32 @@ -185,7 +185,6 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c -$(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c diff --git a/tools/testing/selftests/bpf/prog_tests/socket_cookie.c b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c new file mode 100644 index 000000000000..e12a31d3752c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Google LLC. +// Copyright (c) 2018 Facebook + +#include +#include "socket_cookie_prog.skel.h" +#include "network_helpers.h" + +static int duration; + +struct socket_cookie { + __u64 cookie_key; + __u32 cookie_value; +}; + +void test_socket_cookie(void) +{ + int server_fd = 0, client_fd = 0, cgroup_fd = 0, err = 0; + socklen_t addr_len = sizeof(struct sockaddr_in6); + struct socket_cookie_prog *skel; + __u32 cookie_expected_value; + struct sockaddr_in6 addr; + struct socket_cookie val; + + skel = socket_cookie_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + cgroup_fd = test__join_cgroup("/socket_cookie"); + if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n")) + goto out; + + skel->links.set_cookie = bpf_program__attach_cgroup( + skel->progs.set_cookie, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach")) + goto close_cgroup_fd; + + skel->links.update_cookie = bpf_program__attach_cgroup( + skel->progs.update_cookie, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.update_cookie, "prog_attach")) + goto close_cgroup_fd; + + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno)) + goto close_cgroup_fd; + + client_fd = connect_to_fd(server_fd, 0); + if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno)) + goto close_server_fd; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies), + &client_fd, &val); + if (!ASSERT_OK(err, "map_lookup(socket_cookies)")) + goto close_client_fd; + + err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len); + if (!ASSERT_OK(err, "getsockname")) + goto close_client_fd; + + cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; + ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value"); + +close_client_fd: + close(client_fd); +close_server_fd: + close(server_fd); +close_cgroup_fd: + close(cgroup_fd); +out: + socket_cookie_prog__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index 0cb5656a22b0..81e84be6f86d 100644 --- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -65,6 +65,4 @@ int update_cookie(struct bpf_sock_ops *ctx) return 1; } -int _version SEC("version") = 1; - char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c deleted file mode 100644 index ca7ca87e91aa..000000000000 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook - -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include "bpf_rlimit.h" -#include "cgroup_helpers.h" - -#define CG_PATH "/foo" -#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o" - -struct socket_cookie { - __u64 cookie_key; - __u32 cookie_value; -}; - -static int start_server(void) -{ - struct sockaddr_in6 addr; - int fd; - - fd = socket(AF_INET6, SOCK_STREAM, 0); - if (fd == -1) { - log_err("Failed to create server socket"); - goto out; - } - - memset(&addr, 0, sizeof(addr)); - addr.sin6_family = AF_INET6; - addr.sin6_addr = in6addr_loopback; - addr.sin6_port = 0; - - if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) { - log_err("Failed to bind server socket"); - goto close_out; - } - - if (listen(fd, 128) == -1) { - log_err("Failed to listen on server socket"); - goto close_out; - } - - goto out; - -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static int connect_to_server(int server_fd) -{ - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int fd; - - fd = socket(AF_INET6, SOCK_STREAM, 0); - if (fd == -1) { - log_err("Failed to create client socket"); - goto out; - } - - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { - log_err("Failed to get server addr"); - goto close_out; - } - - if (connect(fd, (const struct sockaddr *)&addr, len) == -1) { - log_err("Fail to connect to server"); - goto close_out; - } - - goto out; - -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static int validate_map(struct bpf_map *map, int client_fd) -{ - __u32 cookie_expected_value; - struct sockaddr_in6 addr; - socklen_t len = sizeof(addr); - struct socket_cookie val; - int err = 0; - int map_fd; - - if (!map) { - log_err("Map not found in BPF object"); - goto err; - } - - map_fd = bpf_map__fd(map); - - err = bpf_map_lookup_elem(map_fd, &client_fd, &val); - - err = getsockname(client_fd, (struct sockaddr *)&addr, &len); - if (err) { - log_err("Can't get client local addr"); - goto out; - } - - cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; - if (val.cookie_value != cookie_expected_value) { - log_err("Unexpected value in map: %x != %x", val.cookie_value, - cookie_expected_value); - goto err; - } - - goto out; -err: - err = -1; -out: - return err; -} - -static int run_test(int cgfd) -{ - enum bpf_attach_type attach_type; - struct bpf_prog_load_attr attr; - struct bpf_program *prog; - struct bpf_object *pobj; - const char *prog_name; - int server_fd = -1; - int client_fd = -1; - int prog_fd = -1; - int err = 0; - - memset(&attr, 0, sizeof(attr)); - attr.file = SOCKET_COOKIE_PROG; - attr.prog_type = BPF_PROG_TYPE_UNSPEC; - attr.prog_flags = BPF_F_TEST_RND_HI32; - - err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd); - if (err) { - log_err("Failed to load %s", attr.file); - goto out; - } - - bpf_object__for_each_program(prog, pobj) { - prog_name = bpf_program__section_name(prog); - - if (libbpf_attach_type_by_name(prog_name, &attach_type)) - goto err; - - err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type, - BPF_F_ALLOW_OVERRIDE); - if (err) { - log_err("Failed to attach prog %s", prog_name); - goto out; - } - } - - server_fd = start_server(); - if (server_fd == -1) - goto err; - - client_fd = connect_to_server(server_fd); - if (client_fd == -1) - goto err; - - if (validate_map(bpf_map__next(NULL, pobj), client_fd)) - goto err; - - goto out; -err: - err = -1; -out: - close(client_fd); - close(server_fd); - bpf_object__close(pobj); - printf("%s\n", err ? "FAILED" : "PASSED"); - return err; -} - -int main(int argc, char **argv) -{ - int cgfd = -1; - int err = 0; - - cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; - - if (run_test(cgfd)) - goto err; - - goto out; -err: - err = -1; -out: - close(cgfd); - cleanup_cgroup_environment(); - return err; -} -- cgit v1.2.3-71-gd317 From 6cd4dcc3fb8198fff6e6c2d7c622f78649fa2474 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 10 Feb 2021 12:14:05 +0100 Subject: selftests/bpf: Use vmlinux.h in socket_cookie_prog.c When migrating from the bpf.h's to the vmlinux.h's definition of struct bps_sock, an interesting LLVM behavior happened. LLVM started producing two fetches of ctx->sk in the sockops program this means that the verifier could not keep track of the NULL-check on ctx->sk. Therefore, we need to extract ctx->sk in a variable before checking and dereferencing it. Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210210111406.785541-4-revest@chromium.org --- tools/testing/selftests/bpf/progs/socket_cookie_prog.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index 81e84be6f86d..fbd5eaf39720 100644 --- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Facebook -#include -#include +#include "vmlinux.h" #include #include +#define AF_INET6 10 + struct socket_cookie { __u64 cookie_key; __u32 cookie_value; @@ -41,7 +42,7 @@ int set_cookie(struct bpf_sock_addr *ctx) SEC("sockops") int update_cookie(struct bpf_sock_ops *ctx) { - struct bpf_sock *sk; + struct bpf_sock *sk = ctx->sk; struct socket_cookie *p; if (ctx->family != AF_INET6) @@ -50,10 +51,10 @@ int update_cookie(struct bpf_sock_ops *ctx) if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) return 1; - if (!ctx->sk) + if (!sk) return 1; - p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0, 0); + p = bpf_sk_storage_get(&socket_cookies, sk, 0, 0); if (!p) return 1; -- cgit v1.2.3-71-gd317 From 6fdd671baaf587cca17603485f9ef4bf7a1f9be1 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 10 Feb 2021 12:14:06 +0100 Subject: selftests/bpf: Add a selftest for the tracing bpf_get_socket_cookie This builds up on the existing socket cookie test which checks whether the bpf_get_socket_cookie helpers provide the same value in cgroup/connect6 and sockops programs for a socket created by the userspace part of the test. Instead of having an update_cookie sockops program tag a socket local storage with 0xFF, this uses both an update_cookie_sockops program and an update_cookie_tracing program which succesively tag the socket with 0x0F and then 0xF0. Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210210111406.785541-5-revest@chromium.org --- .../selftests/bpf/prog_tests/socket_cookie.c | 11 +++++-- .../selftests/bpf/progs/socket_cookie_prog.c | 36 ++++++++++++++++++++-- 2 files changed, 41 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/socket_cookie.c b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c index e12a31d3752c..232db28dde18 100644 --- a/tools/testing/selftests/bpf/prog_tests/socket_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c @@ -35,9 +35,14 @@ void test_socket_cookie(void) if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach")) goto close_cgroup_fd; - skel->links.update_cookie = bpf_program__attach_cgroup( - skel->progs.update_cookie, cgroup_fd); - if (!ASSERT_OK_PTR(skel->links.update_cookie, "prog_attach")) + skel->links.update_cookie_sockops = bpf_program__attach_cgroup( + skel->progs.update_cookie_sockops, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach")) + goto close_cgroup_fd; + + skel->links.update_cookie_tracing = bpf_program__attach( + skel->progs.update_cookie_tracing); + if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach")) goto close_cgroup_fd; server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index fbd5eaf39720..35630a5aaf5f 100644 --- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -5,6 +5,7 @@ #include #include +#include #define AF_INET6 10 @@ -20,6 +21,14 @@ struct { __type(value, struct socket_cookie); } socket_cookies SEC(".maps"); +/* + * These three programs get executed in a row on connect() syscalls. The + * userspace side of the test creates a client socket, issues a connect() on it + * and then checks that the local storage associated with this socket has: + * cookie_value == local_port << 8 | 0xFF + * The different parts of this cookie_value are appended by those hooks if they + * all agree on the output of bpf_get_socket_cookie(). + */ SEC("cgroup/connect6") int set_cookie(struct bpf_sock_addr *ctx) { @@ -33,14 +42,14 @@ int set_cookie(struct bpf_sock_addr *ctx) if (!p) return 1; - p->cookie_value = 0xFF; + p->cookie_value = 0xF; p->cookie_key = bpf_get_socket_cookie(ctx); return 1; } SEC("sockops") -int update_cookie(struct bpf_sock_ops *ctx) +int update_cookie_sockops(struct bpf_sock_ops *ctx) { struct bpf_sock *sk = ctx->sk; struct socket_cookie *p; @@ -61,9 +70,30 @@ int update_cookie(struct bpf_sock_ops *ctx) if (p->cookie_key != bpf_get_socket_cookie(ctx)) return 1; - p->cookie_value = (ctx->local_port << 8) | p->cookie_value; + p->cookie_value |= (ctx->local_port << 8); return 1; } +SEC("fexit/inet_stream_connect") +int BPF_PROG(update_cookie_tracing, struct socket *sock, + struct sockaddr *uaddr, int addr_len, int flags) +{ + struct socket_cookie *p; + + if (uaddr->sa_family != AF_INET6) + return 0; + + p = bpf_sk_storage_get(&socket_cookies, sock->sk, 0, 0); + if (!p) + return 0; + + if (p->cookie_key != bpf_get_socket_cookie(sock->sk)) + return 0; + + p->cookie_value |= 0xF0; + + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-71-gd317 From 1e0aa3fb05f8be0201e05a3f4e6c8910b9071e96 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 9 Feb 2021 14:18:26 -0800 Subject: libbpf: Use AF_LOCAL instead of AF_INET in xsk.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have the environments where usage of AF_INET is prohibited (cgroup/sock_create returns EPERM for AF_INET). Let's use AF_LOCAL instead of AF_INET, it should perfectly work with SIOCETHTOOL. Signed-off-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Tested-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210209221826.922940-1-sdf@google.com --- tools/lib/bpf/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 20500fb1f17e..ffbb588724d8 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -517,7 +517,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) struct ifreq ifr = {}; int fd, err, ret; - fd = socket(AF_INET, SOCK_DGRAM, 0); + fd = socket(AF_LOCAL, SOCK_DGRAM, 0); if (fd < 0) return -errno; -- cgit v1.2.3-71-gd317 From d2836dddc95d5dd82c7cb23726c97d8c9147f050 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 11 Feb 2021 18:10:30 -0800 Subject: libbpf: Ignore non function pointer member in struct_ops When libbpf initializes the kernel's struct_ops in "bpf_map__init_kern_struct_ops()", it enforces all pointer types must be a function pointer and rejects others. It turns out to be too strict. For example, when directly using "struct tcp_congestion_ops" from vmlinux.h, it has a "struct module *owner" member and it is set to NULL in a bpf_tcp_cc.o. Instead, it only needs to ensure the member is a function pointer if it has been set (relocated) to a bpf-prog. This patch moves the "btf_is_func_proto(kern_mtype)" check after the existing "if (!prog) { continue; }". The original debug message in "if (!prog) { continue; }" is also removed since it is no longer valid. Beside, there is a later debug message to tell which function pointer is set. The "btf_is_func_proto(mtype)" has already been guaranteed in "bpf_object__collect_st_ops_relos()" which has been run before "bpf_map__init_kern_struct_ops()". Thus, this check is removed. v2: - Remove outdated debug message (Andrii) Remove because there is a later debug message to tell which function pointer is set. - Following mtype->type is no longer needed. Remove: "skip_mods_and_typedefs(btf, mtype->type, &mtype_id)" - Do "if (!prog)" test before skip_mods_and_typedefs. Fixes: 590a00888250 ("bpf: libbpf: Add STRUCT_OPS support") Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210212021030.266932-1-kafai@fb.com --- tools/lib/bpf/libbpf.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2abbc3800568..d43cc3f29dae 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -884,24 +884,24 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, if (btf_is_ptr(mtype)) { struct bpf_program *prog; - mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id); + prog = st_ops->progs[i]; + if (!prog) + continue; + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, &kern_mtype_id); - if (!btf_is_func_proto(mtype) || - !btf_is_func_proto(kern_mtype)) { - pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n", + + /* mtype->type must be a func_proto which was + * guaranteed in bpf_object__collect_st_ops_relos(), + * so only check kern_mtype for func_proto here. + */ + if (!btf_is_func_proto(kern_mtype)) { + pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", map->name, mname); return -ENOTSUP; } - prog = st_ops->progs[i]; - if (!prog) { - pr_debug("struct_ops init_kern %s: func ptr %s is not set\n", - map->name, mname); - continue; - } - prog->attach_btf_id = kern_type_id; prog->expected_attach_type = kern_member_idx; -- cgit v1.2.3-71-gd317 From a79e88dd2ca6686dca77c9c53c3e12c031347348 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 11 Feb 2021 18:10:37 -0800 Subject: bpf: selftests: Add non function pointer test to struct_ops This patch adds a "void *owner" member. The existing bpf_tcp_ca test will ensure the bpf_cubic.o and bpf_dctcp.o can be loaded. Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210212021037.267278-1-kafai@fb.com --- tools/testing/selftests/bpf/bpf_tcp_helpers.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 6a9053162cf2..91f0fac632f4 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -177,6 +177,7 @@ struct tcp_congestion_ops { * after all the ca_state processing. (optional) */ void (*cong_control)(struct sock *sk, const struct rate_sample *rs); + void *owner; }; #define min(a, b) ((a) < (b) ? (a) : (b)) -- cgit v1.2.3-71-gd317 From e8168840e16c606b3de38148c97262638b41750d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 12 Feb 2021 10:31:07 -0800 Subject: selftests/bpf: Add test for bpf_iter_task_vma The test dumps information similar to /proc/pid/maps. The first line of the output is compared against the /proc file to make sure they match. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210212183107.50963-4-songliubraving@fb.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 118 +++++++++++++++++++-- tools/testing/selftests/bpf/progs/bpf_iter.h | 8 ++ .../selftests/bpf/progs/bpf_iter_task_vma.c | 58 ++++++++++ 3 files changed, 174 insertions(+), 10 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 0e586368948d..74c45d557a2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -7,6 +7,7 @@ #include "bpf_iter_task.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" +#include "bpf_iter_task_vma.skel.h" #include "bpf_iter_task_btf.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" @@ -64,6 +65,22 @@ free_link: bpf_link__destroy(link); } +static int read_fd_into_buffer(int fd, char *buf, int size) +{ + int bufleft = size; + int len; + + do { + len = read(fd, buf, bufleft); + if (len > 0) { + buf += len; + bufleft -= len; + } + } while (len > 0); + + return len < 0 ? len : size - bufleft; +} + static void test_ipv6_route(void) { struct bpf_iter_ipv6_route *skel; @@ -177,7 +194,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) { struct bpf_program *prog = skel->progs.dump_task_struct; struct bpf_iter_task_btf__bss *bss = skel->bss; - int iter_fd = -1, len = 0, bufleft = TASKBUFSZ; + int iter_fd = -1, err; struct bpf_link *link; char *buf = taskbuf; int ret = 0; @@ -190,14 +207,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) goto free_link; - do { - len = read(iter_fd, buf, bufleft); - if (len > 0) { - buf += len; - bufleft -= len; - } - } while (len > 0); - + err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ); if (bss->skip) { printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); ret = 1; @@ -205,7 +215,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) goto free_link; } - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) goto free_link; CHECK(strstr(taskbuf, "(struct task_struct)") == NULL, @@ -1133,6 +1143,92 @@ static void test_buf_neg_offset(void) bpf_iter_test_kern6__destroy(skel); } +#define CMP_BUFFER_SIZE 1024 +static char task_vma_output[CMP_BUFFER_SIZE]; +static char proc_maps_output[CMP_BUFFER_SIZE]; + +/* remove \0 and \t from str, and only keep the first line */ +static void str_strip_first_line(char *str) +{ + char *dst = str, *src = str; + + do { + if (*src == ' ' || *src == '\t') + src++; + else + *(dst++) = *(src++); + + } while (*src != '\0' && *src != '\n'); + + *dst = '\0'; +} + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static void test_task_vma(void) +{ + int err, iter_fd = -1, proc_maps_fd = -1; + struct bpf_iter_task_vma *skel; + int len, read_size = 4; + char maps_path[64]; + + skel = bpf_iter_task_vma__open(); + if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n")) + return; + + skel->bss->pid = getpid(); + + err = bpf_iter_task_vma__load(skel); + if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n")) + goto out; + + skel->links.proc_maps = bpf_program__attach_iter( + skel->progs.proc_maps, NULL); + + if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter", + "attach iterator failed\n")) { + skel->links.proc_maps = NULL; + goto out; + } + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto out; + + /* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks + * to trigger seq_file corner cases. The expected output is much + * longer than 1kB, so the while loop will terminate. + */ + len = 0; + while (len < CMP_BUFFER_SIZE) { + err = read_fd_into_buffer(iter_fd, task_vma_output + len, + min(read_size, CMP_BUFFER_SIZE - len)); + if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n")) + goto out; + len += err; + } + + /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */ + snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid); + proc_maps_fd = open(maps_path, O_RDONLY); + if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n")) + goto out; + err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE); + if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n")) + goto out; + + /* strip and compare the first line of the two files */ + str_strip_first_line(task_vma_output); + str_strip_first_line(proc_maps_output); + + CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output", + "found mismatch\n"); +out: + close(proc_maps_fd); + close(iter_fd); + bpf_iter_task_vma__destroy(skel); +} + void test_bpf_iter(void) { if (test__start_subtest("btf_id_or_null")) @@ -1149,6 +1245,8 @@ void test_bpf_iter(void) test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); + if (test__start_subtest("task_vma")) + test_task_vma(); if (test__start_subtest("task_btf")) test_task_btf(); if (test__start_subtest("tcp4")) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 6a1255465fd6..3d83b185c4bc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -7,6 +7,7 @@ #define bpf_iter__netlink bpf_iter__netlink___not_used #define bpf_iter__task bpf_iter__task___not_used #define bpf_iter__task_file bpf_iter__task_file___not_used +#define bpf_iter__task_vma bpf_iter__task_vma___not_used #define bpf_iter__tcp bpf_iter__tcp___not_used #define tcp6_sock tcp6_sock___not_used #define bpf_iter__udp bpf_iter__udp___not_used @@ -26,6 +27,7 @@ #undef bpf_iter__netlink #undef bpf_iter__task #undef bpf_iter__task_file +#undef bpf_iter__task_vma #undef bpf_iter__tcp #undef tcp6_sock #undef bpf_iter__udp @@ -67,6 +69,12 @@ struct bpf_iter__task_file { struct file *file; } __attribute__((preserve_access_index)); +struct bpf_iter__task_vma { + struct bpf_iter_meta *meta; + struct task_struct *task; + struct vm_area_struct *vma; +} __attribute__((preserve_access_index)); + struct bpf_iter__bpf_map { struct bpf_iter_meta *meta; struct bpf_map *map; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c new file mode 100644 index 000000000000..11d1aa37cf11 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +/* Copied from mm.h */ +#define VM_READ 0x00000001 +#define VM_WRITE 0x00000002 +#define VM_EXEC 0x00000004 +#define VM_MAYSHARE 0x00000080 + +/* Copied from kdev_t.h */ +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) + +#define D_PATH_BUF_SIZE 1024 +char d_path_buf[D_PATH_BUF_SIZE] = {}; +__u32 pid = 0; + +SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) +{ + struct vm_area_struct *vma = ctx->vma; + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + struct file *file; + char perm_str[] = "----"; + + if (task == (void *)0 || vma == (void *)0) + return 0; + + file = vma->vm_file; + if (task->tgid != pid) + return 0; + perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-'; + perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-'; + perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-'; + perm_str[3] = (vma->vm_flags & VM_MAYSHARE) ? 's' : 'p'; + BPF_SEQ_PRINTF(seq, "%08llx-%08llx %s ", vma->vm_start, vma->vm_end, perm_str); + + if (file) { + __u32 dev = file->f_inode->i_sb->s_dev; + + bpf_d_path(&file->f_path, d_path_buf, D_PATH_BUF_SIZE); + + BPF_SEQ_PRINTF(seq, "%08llx ", vma->vm_pgoff << 12); + BPF_SEQ_PRINTF(seq, "%02x:%02x %u", MAJOR(dev), MINOR(dev), + file->f_inode->i_ino); + BPF_SEQ_PRINTF(seq, "\t%s\n", d_path_buf); + } else { + BPF_SEQ_PRINTF(seq, "%08llx 00:00 0\n", 0ULL); + } + return 0; +} -- cgit v1.2.3-71-gd317 From 90a82b1fa40d0cee33d1c9306dc54412442d1e57 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 11 Feb 2021 17:00:53 -0800 Subject: tools/resolve_btfids: Add /libbpf to .gitignore This is what I see after compiling the kernel: # bpf-next...bpf-next/master ?? tools/bpf/resolve_btfids/libbpf/ Fixes: fc6b48f692f8 ("tools/resolve_btfids: Build libbpf and libsubcmd in separate directories") Signed-off-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210212010053.668700-1-sdf@google.com --- tools/bpf/resolve_btfids/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore index 25f308c933cc..16913fffc985 100644 --- a/tools/bpf/resolve_btfids/.gitignore +++ b/tools/bpf/resolve_btfids/.gitignore @@ -1,2 +1,3 @@ /fixdep /resolve_btfids +/libbpf/ -- cgit v1.2.3-71-gd317 From e1850ea9bd9eca3656820b4875967d6f9c11c237 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Feb 2021 14:38:19 +0100 Subject: bpf: bpf_fib_lookup return MTU value as output when looked up The BPF-helpers for FIB lookup (bpf_xdp_fib_lookup and bpf_skb_fib_lookup) can perform MTU check and return BPF_FIB_LKUP_RET_FRAG_NEEDED. The BPF-prog don't know the MTU value that caused this rejection. If the BPF-prog wants to implement PMTU (Path MTU Discovery) (rfc1191) it need to know this MTU value for the ICMP packet. Patch change lookup and result struct bpf_fib_lookup, to contain this MTU value as output via a union with 'tot_len' as this is the value used for the MTU lookup. V5: - Fixed uninit value spotted by Dan Carpenter. - Name struct output member mtu_result Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/161287789952.790810.13134700381067698781.stgit@firesoul --- include/uapi/linux/bpf.h | 11 +++++++++-- net/core/filter.c | 22 +++++++++++++++------- tools/include/uapi/linux/bpf.h | 11 +++++++++-- 3 files changed, 33 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 07cc2e404291..6b1f6058cccf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2239,6 +2239,9 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * + * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU + * was exceeded and output params->mtu_result contains the MTU. + * * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. @@ -4990,9 +4993,13 @@ struct bpf_fib_lookup { __be16 sport; __be16 dport; - /* total length of packet from network header - used for MTU check */ - __u16 tot_len; + union { /* used for MTU check */ + /* input to lookup */ + __u16 tot_len; /* L3 length from network hdr (iph->tot_len) */ + /* output: MTU value */ + __u16 mtu_result; + }; /* input: L3 device index for lookup * output: device index from FIB lookup */ diff --git a/net/core/filter.c b/net/core/filter.c index 1fec2e998e37..e7a9b1667dd6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5301,12 +5301,14 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6) static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, const struct neighbour *neigh, - const struct net_device *dev) + const struct net_device *dev, u32 mtu) { memcpy(params->dmac, neigh->ha, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN); params->h_vlan_TCI = 0; params->h_vlan_proto = 0; + if (mtu) + params->mtu_result = mtu; /* union with tot_len */ return 0; } @@ -5322,8 +5324,8 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, struct net_device *dev; struct fib_result res; struct flowi4 fl4; + u32 mtu = 0; int err; - u32 mtu; dev = dev_get_by_index_rcu(net, params->ifindex); if (unlikely(!dev)) @@ -5390,8 +5392,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (check_mtu) { mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); - if (params->tot_len > mtu) + if (params->tot_len > mtu) { + params->mtu_result = mtu; /* union with tot_len */ return BPF_FIB_LKUP_RET_FRAG_NEEDED; + } } nhc = res.nhc; @@ -5425,7 +5429,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (!neigh) return BPF_FIB_LKUP_RET_NO_NEIGH; - return bpf_fib_set_fwd_params(params, neigh, dev); + return bpf_fib_set_fwd_params(params, neigh, dev, mtu); } #endif @@ -5442,7 +5446,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, struct flowi6 fl6; int strict = 0; int oif, err; - u32 mtu; + u32 mtu = 0; /* link local addresses are never forwarded */ if (rt6_need_strict(dst) || rt6_need_strict(src)) @@ -5517,8 +5521,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (check_mtu) { mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src); - if (params->tot_len > mtu) + if (params->tot_len > mtu) { + params->mtu_result = mtu; /* union with tot_len */ return BPF_FIB_LKUP_RET_FRAG_NEEDED; + } } if (res.nh->fib_nh_lws) @@ -5538,7 +5544,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (!neigh) return BPF_FIB_LKUP_RET_NO_NEIGH; - return bpf_fib_set_fwd_params(params, neigh, dev); + return bpf_fib_set_fwd_params(params, neigh, dev, mtu); } #endif @@ -5614,6 +5620,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, dev = dev_get_by_index_rcu(net, params->ifindex); if (!is_skb_forwardable(dev, skb)) rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; + + params->mtu_result = dev->mtu; /* union with tot_len */ } return rc; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 07cc2e404291..6b1f6058cccf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2239,6 +2239,9 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * + * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU + * was exceeded and output params->mtu_result contains the MTU. + * * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. @@ -4990,9 +4993,13 @@ struct bpf_fib_lookup { __be16 sport; __be16 dport; - /* total length of packet from network header - used for MTU check */ - __u16 tot_len; + union { /* used for MTU check */ + /* input to lookup */ + __u16 tot_len; /* L3 length from network hdr (iph->tot_len) */ + /* output: MTU value */ + __u16 mtu_result; + }; /* input: L3 device index for lookup * output: device index from FIB lookup */ -- cgit v1.2.3-71-gd317 From 34b2021cc61642d61c3cf943d9e71925b827941b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Feb 2021 14:38:24 +0100 Subject: bpf: Add BPF-helper for MTU checking This BPF-helper bpf_check_mtu() works for both XDP and TC-BPF programs. The SKB object is complex and the skb->len value (accessible from BPF-prog) also include the length of any extra GRO/GSO segments, but without taking into account that these GRO/GSO segments get added transport (L4) and network (L3) headers before being transmitted. Thus, this BPF-helper is created such that the BPF-programmer don't need to handle these details in the BPF-prog. The API is designed to help the BPF-programmer, that want to do packet context size changes, which involves other helpers. These other helpers usually does a delta size adjustment. This helper also support a delta size (len_diff), which allow BPF-programmer to reuse arguments needed by these other helpers, and perform the MTU check prior to doing any actual size adjustment of the packet context. It is on purpose, that we allow the len adjustment to become a negative result, that will pass the MTU check. This might seem weird, but it's not this helpers responsibility to "catch" wrong len_diff adjustments. Other helpers will take care of these checks, if BPF-programmer chooses to do actual size adjustment. V14: - Improve man-page desc of len_diff. V13: - Enforce flag BPF_MTU_CHK_SEGS cannot use len_diff. V12: - Simplify segment check that calls skb_gso_validate_network_len. - Helpers should return long V9: - Use dev->hard_header_len (instead of ETH_HLEN) - Annotate with unlikely req from Daniel - Fix logic error using skb_gso_validate_network_len from Daniel V6: - Took John's advice and dropped BPF_MTU_CHK_RELAX - Returned MTU is kept at L3-level (like fib_lookup) V4: Lot of changes - ifindex 0 now use current netdev for MTU lookup - rename helper from bpf_mtu_check to bpf_check_mtu - fix bug for GSO pkt length (as skb->len is total len) - remove __bpf_len_adj_positive, simply allow negative len adj Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/161287790461.790810.3429728639563297353.stgit@firesoul --- include/uapi/linux/bpf.h | 75 +++++++++++++++++++++++++++ net/core/filter.c | 114 +++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 75 +++++++++++++++++++++++++++ 3 files changed, 264 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6b1f6058cccf..4c24daa43bac 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3847,6 +3847,69 @@ union bpf_attr { * Return * A pointer to a struct socket on success or NULL if the file is * not a socket. + * + * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) + * Description + + * Check ctx packet size against exceeding MTU of net device (based + * on *ifindex*). This helper will likely be used in combination + * with helpers that adjust/change the packet size. + * + * The argument *len_diff* can be used for querying with a planned + * size change. This allows to check MTU prior to changing packet + * ctx. Providing an *len_diff* adjustment that is larger than the + * actual packet size (resulting in negative packet size) will in + * principle not exceed the MTU, why it is not considered a + * failure. Other BPF-helpers are needed for performing the + * planned size change, why the responsability for catch a negative + * packet size belong in those helpers. + * + * Specifying *ifindex* zero means the MTU check is performed + * against the current net device. This is practical if this isn't + * used prior to redirect. + * + * The Linux kernel route table can configure MTUs on a more + * specific per route level, which is not provided by this helper. + * For route level MTU checks use the **bpf_fib_lookup**\ () + * helper. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** for tc cls_act programs. + * + * The *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_MTU_CHK_SEGS** + * This flag will only works for *ctx* **struct sk_buff**. + * If packet context contains extra packet segment buffers + * (often knows as GSO skb), then MTU check is harder to + * check at this point, because in transmit path it is + * possible for the skb packet to get re-segmented + * (depending on net device features). This could still be + * a MTU violation, so this flag enables performing MTU + * check against segments, with a different violation + * return code to tell it apart. Check cannot use len_diff. + * + * On return *mtu_len* pointer contains the MTU value of the net + * device. Remember the net device configured MTU is the L3 size, + * which is returned here and XDP and TX length operate at L2. + * Helper take this into account for you, but remember when using + * MTU value in your BPF-code. On input *mtu_len* must be a valid + * pointer and be initialized (to zero), else verifier will reject + * BPF program. + * + * Return + * * 0 on success, and populate MTU value in *mtu_len* pointer. + * + * * < 0 if any input argument is invalid (*mtu_len* not updated) + * + * MTU violations return positive values, but also populate MTU + * value in *mtu_len* pointer, as this can be needed for + * implementing PMTU handing: + * + * * **BPF_MTU_CHK_RET_FRAG_NEEDED** + * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4012,6 +4075,7 @@ union bpf_attr { FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ FN(sock_from_file), \ + FN(check_mtu), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5045,6 +5109,17 @@ struct bpf_redir_neigh { }; }; +/* bpf_check_mtu flags*/ +enum bpf_check_mtu_flags { + BPF_MTU_CHK_SEGS = (1U << 0), +}; + +enum bpf_check_mtu_ret { + BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */ + BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */ +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ diff --git a/net/core/filter.c b/net/core/filter.c index e7a9b1667dd6..439f43f00483 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5637,6 +5637,116 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { .arg4_type = ARG_ANYTHING, }; +static struct net_device *__dev_via_ifindex(struct net_device *dev_curr, + u32 ifindex) +{ + struct net *netns = dev_net(dev_curr); + + /* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */ + if (ifindex == 0) + return dev_curr; + + return dev_get_by_index_rcu(netns, ifindex); +} + +BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb, + u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags) +{ + int ret = BPF_MTU_CHK_RET_FRAG_NEEDED; + struct net_device *dev = skb->dev; + int skb_len, dev_len; + int mtu; + + if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) + return -EINVAL; + + if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff)) + return -EINVAL; + + dev = __dev_via_ifindex(dev, ifindex); + if (unlikely(!dev)) + return -ENODEV; + + mtu = READ_ONCE(dev->mtu); + + dev_len = mtu + dev->hard_header_len; + skb_len = skb->len + len_diff; /* minus result pass check */ + if (skb_len <= dev_len) { + ret = BPF_MTU_CHK_RET_SUCCESS; + goto out; + } + /* At this point, skb->len exceed MTU, but as it include length of all + * segments, it can still be below MTU. The SKB can possibly get + * re-segmented in transmit path (see validate_xmit_skb). Thus, user + * must choose if segs are to be MTU checked. + */ + if (skb_is_gso(skb)) { + ret = BPF_MTU_CHK_RET_SUCCESS; + + if (flags & BPF_MTU_CHK_SEGS && + !skb_gso_validate_network_len(skb, mtu)) + ret = BPF_MTU_CHK_RET_SEGS_TOOBIG; + } +out: + /* BPF verifier guarantees valid pointer */ + *mtu_len = mtu; + + return ret; +} + +BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp, + u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags) +{ + struct net_device *dev = xdp->rxq->dev; + int xdp_len = xdp->data_end - xdp->data; + int ret = BPF_MTU_CHK_RET_SUCCESS; + int mtu, dev_len; + + /* XDP variant doesn't support multi-buffer segment check (yet) */ + if (unlikely(flags)) + return -EINVAL; + + dev = __dev_via_ifindex(dev, ifindex); + if (unlikely(!dev)) + return -ENODEV; + + mtu = READ_ONCE(dev->mtu); + + /* Add L2-header as dev MTU is L3 size */ + dev_len = mtu + dev->hard_header_len; + + xdp_len += len_diff; /* minus result pass check */ + if (xdp_len > dev_len) + ret = BPF_MTU_CHK_RET_FRAG_NEEDED; + + /* BPF verifier guarantees valid pointer */ + *mtu_len = mtu; + + return ret; +} + +static const struct bpf_func_proto bpf_skb_check_mtu_proto = { + .func = bpf_skb_check_mtu, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_INT, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto bpf_xdp_check_mtu_proto = { + .func = bpf_xdp_check_mtu, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_INT, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) { @@ -7222,6 +7332,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_uid_proto; case BPF_FUNC_fib_lookup: return &bpf_skb_fib_lookup_proto; + case BPF_FUNC_check_mtu: + return &bpf_skb_check_mtu_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; case BPF_FUNC_sk_storage_get: @@ -7291,6 +7403,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_adjust_tail_proto; case BPF_FUNC_fib_lookup: return &bpf_xdp_fib_lookup_proto; + case BPF_FUNC_check_mtu: + return &bpf_xdp_check_mtu_proto; #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_udp: return &bpf_xdp_sk_lookup_udp_proto; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6b1f6058cccf..4c24daa43bac 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3847,6 +3847,69 @@ union bpf_attr { * Return * A pointer to a struct socket on success or NULL if the file is * not a socket. + * + * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) + * Description + + * Check ctx packet size against exceeding MTU of net device (based + * on *ifindex*). This helper will likely be used in combination + * with helpers that adjust/change the packet size. + * + * The argument *len_diff* can be used for querying with a planned + * size change. This allows to check MTU prior to changing packet + * ctx. Providing an *len_diff* adjustment that is larger than the + * actual packet size (resulting in negative packet size) will in + * principle not exceed the MTU, why it is not considered a + * failure. Other BPF-helpers are needed for performing the + * planned size change, why the responsability for catch a negative + * packet size belong in those helpers. + * + * Specifying *ifindex* zero means the MTU check is performed + * against the current net device. This is practical if this isn't + * used prior to redirect. + * + * The Linux kernel route table can configure MTUs on a more + * specific per route level, which is not provided by this helper. + * For route level MTU checks use the **bpf_fib_lookup**\ () + * helper. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** for tc cls_act programs. + * + * The *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_MTU_CHK_SEGS** + * This flag will only works for *ctx* **struct sk_buff**. + * If packet context contains extra packet segment buffers + * (often knows as GSO skb), then MTU check is harder to + * check at this point, because in transmit path it is + * possible for the skb packet to get re-segmented + * (depending on net device features). This could still be + * a MTU violation, so this flag enables performing MTU + * check against segments, with a different violation + * return code to tell it apart. Check cannot use len_diff. + * + * On return *mtu_len* pointer contains the MTU value of the net + * device. Remember the net device configured MTU is the L3 size, + * which is returned here and XDP and TX length operate at L2. + * Helper take this into account for you, but remember when using + * MTU value in your BPF-code. On input *mtu_len* must be a valid + * pointer and be initialized (to zero), else verifier will reject + * BPF program. + * + * Return + * * 0 on success, and populate MTU value in *mtu_len* pointer. + * + * * < 0 if any input argument is invalid (*mtu_len* not updated) + * + * MTU violations return positive values, but also populate MTU + * value in *mtu_len* pointer, as this can be needed for + * implementing PMTU handing: + * + * * **BPF_MTU_CHK_RET_FRAG_NEEDED** + * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4012,6 +4075,7 @@ union bpf_attr { FN(ktime_get_coarse_ns), \ FN(ima_inode_hash), \ FN(sock_from_file), \ + FN(check_mtu), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5045,6 +5109,17 @@ struct bpf_redir_neigh { }; }; +/* bpf_check_mtu flags*/ +enum bpf_check_mtu_flags { + BPF_MTU_CHK_SEGS = (1U << 0), +}; + +enum bpf_check_mtu_ret { + BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */ + BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */ +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ -- cgit v1.2.3-71-gd317 From 6b8838be7e21edb620099e01eb040c21c5190494 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Feb 2021 14:38:34 +0100 Subject: selftests/bpf: Use bpf_check_mtu in selftest test_cls_redirect This demonstrate how bpf_check_mtu() helper can easily be used together with bpf_skb_adjust_room() helper, prior to doing size adjustment, as delta argument is already setup. Hint: This specific test can be selected like this: ./test_progs -t cls_redirect Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/161287791481.790810.4444271170546646080.stgit@firesoul --- tools/testing/selftests/bpf/progs/test_cls_redirect.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index c9f8464996ea..3c1e042962e6 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -70,6 +70,7 @@ typedef struct { uint64_t errors_total_encap_adjust_failed; uint64_t errors_total_encap_buffer_too_small; uint64_t errors_total_redirect_loop; + uint64_t errors_total_encap_mtu_violate; } metrics_t; typedef enum { @@ -407,6 +408,7 @@ static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *e payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr); int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead; uint16_t proto = ETH_P_IP; + uint32_t mtu_len = 0; /* Loop protection: the inner packet's TTL is decremented as a safeguard * against any forwarding loop. As the only interesting field is the TTL @@ -479,6 +481,11 @@ static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *e } } + if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) { + metrics->errors_total_encap_mtu_violate++; + return TC_ACT_SHOT; + } + if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET, BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_NO_CSUM_RESET) || -- cgit v1.2.3-71-gd317 From b62eba563229fc7c51af41b55fc67c4778d85bb7 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Feb 2021 14:38:39 +0100 Subject: selftests/bpf: Tests using bpf_check_mtu BPF-helper Adding selftest for BPF-helper bpf_check_mtu(). Making sure it can be used from both XDP and TC. V16: - Fix 'void' function definition V11: - Addresse nitpicks from Andrii Nakryiko V10: - Remove errno non-zero test in CHECK_ATTR() - Addresse comments from Andrii Nakryiko Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/161287791989.790810.13612620012522164562.stgit@firesoul --- tools/testing/selftests/bpf/prog_tests/check_mtu.c | 216 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_check_mtu.c | 198 +++++++++++++++++++ 2 files changed, 414 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/check_mtu.c create mode 100644 tools/testing/selftests/bpf/progs/test_check_mtu.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c new file mode 100644 index 000000000000..36af1c138faf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Jesper Dangaard Brouer */ + +#include /* before test_progs.h, avoid bpf_util.h redefines */ +#include +#include "test_check_mtu.skel.h" +#include "network_helpers.h" + +#include +#include + +#define IFINDEX_LO 1 + +static __u32 duration; /* Hint: needed for CHECK macro */ + +static int read_mtu_device_lo(void) +{ + const char *filename = "/sys/class/net/lo/mtu"; + char buf[11] = {}; + int value, n, fd; + + fd = open(filename, 0, O_RDONLY); + if (fd == -1) + return -1; + + n = read(fd, buf, sizeof(buf)); + close(fd); + + if (n == -1) + return -2; + + value = strtoimax(buf, NULL, 10); + if (errno == ERANGE) + return -3; + + return value; +} + +static void test_check_mtu_xdp_attach(void) +{ + struct bpf_link_info link_info; + __u32 link_info_len = sizeof(link_info); + struct test_check_mtu *skel; + struct bpf_program *prog; + struct bpf_link *link; + int err = 0; + int fd; + + skel = test_check_mtu__open_and_load(); + if (CHECK(!skel, "open and load skel", "failed")) + return; /* Exit if e.g. helper unknown to kernel */ + + prog = skel->progs.xdp_use_helper_basic; + + link = bpf_program__attach_xdp(prog, IFINDEX_LO); + if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + goto out; + skel->links.xdp_use_helper_basic = link; + + memset(&link_info, 0, sizeof(link_info)); + fd = bpf_link__fd(link); + err = bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + if (CHECK(err, "link_info", "failed: %d\n", err)) + goto out; + + CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", + "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); + CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", + "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); + + err = bpf_link__detach(link); + CHECK(err, "link_detach", "failed %d\n", err); + +out: + test_check_mtu__destroy(skel); +} + +static void test_check_mtu_run_xdp(struct test_check_mtu *skel, + struct bpf_program *prog, + __u32 mtu_expect) +{ + const char *prog_name = bpf_program__name(prog); + int retval_expect = XDP_PASS; + __u32 mtu_result = 0; + char buf[256] = {}; + int err; + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .prog_fd = bpf_program__fd(prog), + }; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != 0, "bpf_prog_test_run", + "prog_name:%s (err %d errno %d retval %d)\n", + prog_name, err, errno, tattr.retval); + + CHECK(tattr.retval != retval_expect, "retval", + "progname:%s unexpected retval=%d expected=%d\n", + prog_name, tattr.retval, retval_expect); + + /* Extract MTU that BPF-prog got */ + mtu_result = skel->bss->global_bpf_mtu_xdp; + ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user"); +} + + +static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex) +{ + struct test_check_mtu *skel; + int err; + + skel = test_check_mtu__open(); + if (CHECK(!skel, "skel_open", "failed")) + return; + + /* Update "constants" in BPF-prog *BEFORE* libbpf load */ + skel->rodata->GLOBAL_USER_MTU = mtu; + skel->rodata->GLOBAL_USER_IFINDEX = ifindex; + + err = test_check_mtu__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu); + test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu); + test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu); + +cleanup: + test_check_mtu__destroy(skel); +} + +static void test_check_mtu_run_tc(struct test_check_mtu *skel, + struct bpf_program *prog, + __u32 mtu_expect) +{ + const char *prog_name = bpf_program__name(prog); + int retval_expect = BPF_OK; + __u32 mtu_result = 0; + char buf[256] = {}; + int err; + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .prog_fd = bpf_program__fd(prog), + }; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != 0, "bpf_prog_test_run", + "prog_name:%s (err %d errno %d retval %d)\n", + prog_name, err, errno, tattr.retval); + + CHECK(tattr.retval != retval_expect, "retval", + "progname:%s unexpected retval=%d expected=%d\n", + prog_name, tattr.retval, retval_expect); + + /* Extract MTU that BPF-prog got */ + mtu_result = skel->bss->global_bpf_mtu_tc; + ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user"); +} + + +static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) +{ + struct test_check_mtu *skel; + int err; + + skel = test_check_mtu__open(); + if (CHECK(!skel, "skel_open", "failed")) + return; + + /* Update "constants" in BPF-prog *BEFORE* libbpf load */ + skel->rodata->GLOBAL_USER_MTU = mtu; + skel->rodata->GLOBAL_USER_IFINDEX = ifindex; + + err = test_check_mtu__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_check_mtu_run_tc(skel, skel->progs.tc_use_helper, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu); +cleanup: + test_check_mtu__destroy(skel); +} + +void test_check_mtu(void) +{ + __u32 mtu_lo; + + if (test__start_subtest("bpf_check_mtu XDP-attach")) + test_check_mtu_xdp_attach(); + + mtu_lo = read_mtu_device_lo(); + if (CHECK(mtu_lo < 0, "reading MTU value", "failed (err:%d)", mtu_lo)) + return; + + if (test__start_subtest("bpf_check_mtu XDP-run")) + test_check_mtu_xdp(mtu_lo, 0); + + if (test__start_subtest("bpf_check_mtu XDP-run ifindex-lookup")) + test_check_mtu_xdp(mtu_lo, IFINDEX_LO); + + if (test__start_subtest("bpf_check_mtu TC-run")) + test_check_mtu_tc(mtu_lo, 0); + + if (test__start_subtest("bpf_check_mtu TC-run ifindex-lookup")) + test_check_mtu_tc(mtu_lo, IFINDEX_LO); +} diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c new file mode 100644 index 000000000000..b7787b43f9db --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Jesper Dangaard Brouer */ + +#include +#include +#include + +#include +#include + +char _license[] SEC("license") = "GPL"; + +/* Userspace will update with MTU it can see on device */ +static volatile const int GLOBAL_USER_MTU; +static volatile const __u32 GLOBAL_USER_IFINDEX; + +/* BPF-prog will update these with MTU values it can see */ +__u32 global_bpf_mtu_xdp = 0; +__u32 global_bpf_mtu_tc = 0; + +SEC("xdp") +int xdp_use_helper_basic(struct xdp_md *ctx) +{ + __u32 mtu_len = 0; + + if (bpf_check_mtu(ctx, 0, &mtu_len, 0, 0)) + return XDP_ABORTED; + + return XDP_PASS; +} + +SEC("xdp") +int xdp_use_helper(struct xdp_md *ctx) +{ + int retval = XDP_PASS; /* Expected retval on successful test */ + __u32 mtu_len = 0; + __u32 ifindex = 0; + int delta = 0; + + /* When ifindex is zero, save net_device lookup and use ctx netdev */ + if (GLOBAL_USER_IFINDEX > 0) + ifindex = GLOBAL_USER_IFINDEX; + + if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0)) { + /* mtu_len is also valid when check fail */ + retval = XDP_ABORTED; + goto out; + } + + if (mtu_len != GLOBAL_USER_MTU) + retval = XDP_DROP; + +out: + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("xdp") +int xdp_exceed_mtu(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 data_len = data_end - data; + int retval = XDP_ABORTED; /* Fail */ + __u32 mtu_len = 0; + int delta; + int err; + + /* Exceed MTU with 1 via delta adjust */ + delta = GLOBAL_USER_MTU - (data_len - ETH_HLEN) + 1; + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0); + if (err) { + retval = XDP_PASS; /* Success in exceeding MTU check */ + if (err != BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = XDP_DROP; + } + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("xdp") +int xdp_minus_delta(struct xdp_md *ctx) +{ + int retval = XDP_PASS; /* Expected retval on successful test */ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 data_len = data_end - data; + __u32 mtu_len = 0; + int delta; + + /* Borderline test case: Minus delta exceeding packet length allowed */ + delta = -((data_len - ETH_HLEN) + 1); + + /* Minus length (adjusted via delta) still pass MTU check, other helpers + * are responsible for catching this, when doing actual size adjust + */ + if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0)) + retval = XDP_ABORTED; + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("classifier") +int tc_use_helper(struct __sk_buff *ctx) +{ + int retval = BPF_OK; /* Expected retval on successful test */ + __u32 mtu_len = 0; + int delta = 0; + + if (bpf_check_mtu(ctx, 0, &mtu_len, delta, 0)) { + retval = BPF_DROP; + goto out; + } + + if (mtu_len != GLOBAL_USER_MTU) + retval = BPF_REDIRECT; +out: + global_bpf_mtu_tc = mtu_len; + return retval; +} + +SEC("classifier") +int tc_exceed_mtu(struct __sk_buff *ctx) +{ + __u32 ifindex = GLOBAL_USER_IFINDEX; + int retval = BPF_DROP; /* Fail */ + __u32 skb_len = ctx->len; + __u32 mtu_len = 0; + int delta; + int err; + + /* Exceed MTU with 1 via delta adjust */ + delta = GLOBAL_USER_MTU - (skb_len - ETH_HLEN) + 1; + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0); + if (err) { + retval = BPF_OK; /* Success in exceeding MTU check */ + if (err != BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = BPF_DROP; + } + + global_bpf_mtu_tc = mtu_len; + return retval; +} + +SEC("classifier") +int tc_exceed_mtu_da(struct __sk_buff *ctx) +{ + /* SKB Direct-Access variant */ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 data_len = data_end - data; + int retval = BPF_DROP; /* Fail */ + __u32 mtu_len = 0; + int delta; + int err; + + /* Exceed MTU with 1 via delta adjust */ + delta = GLOBAL_USER_MTU - (data_len - ETH_HLEN) + 1; + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0); + if (err) { + retval = BPF_OK; /* Success in exceeding MTU check */ + if (err != BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = BPF_DROP; + } + + global_bpf_mtu_tc = mtu_len; + return retval; +} + +SEC("classifier") +int tc_minus_delta(struct __sk_buff *ctx) +{ + int retval = BPF_OK; /* Expected retval on successful test */ + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 skb_len = ctx->len; + __u32 mtu_len = 0; + int delta; + + /* Borderline test case: Minus delta exceeding packet length allowed */ + delta = -((skb_len - ETH_HLEN) + 1); + + /* Minus length (adjusted via delta) still pass MTU check, other helpers + * are responsible for catching this, when doing actual size adjust + */ + if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0)) + retval = BPF_DROP; + + global_bpf_mtu_xdp = mtu_len; + return retval; +} -- cgit v1.2.3-71-gd317 From 8b08807d039a843163fd4aeca93aec69dfc4fbcf Mon Sep 17 00:00:00 2001 From: Dmitrii Banshchikov Date: Sat, 13 Feb 2021 00:56:42 +0400 Subject: selftests/bpf: Add unit tests for pointers in global functions test_global_func9 - check valid pointer's scenarios test_global_func10 - check that a smaller type cannot be passed as a larger one test_global_func11 - check that CTX pointer cannot be passed test_global_func12 - check access to a null pointer test_global_func13 - check access to an arbitrary pointer value test_global_func14 - check that an opaque pointer cannot be passed test_global_func15 - check that a variable has an unknown value after it was passed to a global function by pointer test_global_func16 - check access to uninitialized stack memory test_global_func_args - check read and write operations through a pointer Signed-off-by: Dmitrii Banshchikov Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210212205642.620788-5-me@ubique.spb.ru --- .../selftests/bpf/prog_tests/global_func_args.c | 60 ++++++++++ .../selftests/bpf/prog_tests/test_global_funcs.c | 8 ++ .../selftests/bpf/progs/test_global_func10.c | 29 +++++ .../selftests/bpf/progs/test_global_func11.c | 19 +++ .../selftests/bpf/progs/test_global_func12.c | 21 ++++ .../selftests/bpf/progs/test_global_func13.c | 24 ++++ .../selftests/bpf/progs/test_global_func14.c | 21 ++++ .../selftests/bpf/progs/test_global_func15.c | 22 ++++ .../selftests/bpf/progs/test_global_func16.c | 22 ++++ .../selftests/bpf/progs/test_global_func9.c | 132 +++++++++++++++++++++ .../selftests/bpf/progs/test_global_func_args.c | 91 ++++++++++++++ 11 files changed, 449 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/global_func_args.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func10.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func11.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func12.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func13.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func14.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func15.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func16.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func9.c create mode 100644 tools/testing/selftests/bpf/progs/test_global_func_args.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c new file mode 100644 index 000000000000..8bcc2869102f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "test_progs.h" +#include "network_helpers.h" + +static __u32 duration; + +static void test_global_func_args0(struct bpf_object *obj) +{ + int err, i, map_fd, actual_value; + const char *map_name = "values"; + + map_fd = bpf_find_map(__func__, obj, map_name); + if (CHECK(map_fd < 0, "bpf_find_map", "cannot find BPF map %s: %s\n", + map_name, strerror(errno))) + return; + + struct { + const char *descr; + int expected_value; + } tests[] = { + {"passing NULL pointer", 0}, + {"returning value", 1}, + {"reading local variable", 100 }, + {"writing local variable", 101 }, + {"reading global variable", 42 }, + {"writing global variable", 43 }, + {"writing to pointer-to-pointer", 1 }, + }; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + const int expected_value = tests[i].expected_value; + + err = bpf_map_lookup_elem(map_fd, &i, &actual_value); + + CHECK(err || actual_value != expected_value, tests[i].descr, + "err %d result %d expected %d\n", err, actual_value, expected_value); + } +} + +void test_global_func_args(void) +{ + const char *file = "./test_global_func_args.o"; + __u32 retval; + struct bpf_object *obj; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (CHECK(err, "load program", "error %d loading %s\n", err, file)) + return; + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "pass global func args run", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + test_global_func_args0(obj); + + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 32e4348b714b..7e13129f593a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -61,6 +61,14 @@ void test_test_global_funcs(void) { "test_global_func6.o" , "modified ctx ptr R2" }, { "test_global_func7.o" , "foo() doesn't return scalar" }, { "test_global_func8.o" }, + { "test_global_func9.o" }, + { "test_global_func10.o", "invalid indirect read from stack" }, + { "test_global_func11.o", "Caller passes invalid args into func#1" }, + { "test_global_func12.o", "invalid mem access 'mem_or_null'" }, + { "test_global_func13.o", "Caller passes invalid args into func#1" }, + { "test_global_func14.o", "reference type('FWD S') size cannot be determined" }, + { "test_global_func15.o", "At program exit the register R0 has value" }, + { "test_global_func16.o", "invalid indirect read from stack" }, }; libbpf_print_fn_t old_print_fn = NULL; int err, i, duration = 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c new file mode 100644 index 000000000000..61c2ae92ce41 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +struct Small { + int x; +}; + +struct Big { + int x; + int y; +}; + +__noinline int foo(const struct Big *big) +{ + if (big == 0) + return 0; + + return bpf_get_prandom_u32() < big->y; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + const struct Small small = {.x = skb->len }; + + return foo((struct Big *)&small) ? 1 : 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c new file mode 100644 index 000000000000..28488047c849 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func11.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +struct S { + int x; +}; + +__noinline int foo(const struct S *s) +{ + return s ? bpf_get_prandom_u32() < s->x : 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + return foo(skb); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c new file mode 100644 index 000000000000..62343527cc59 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func12.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +struct S { + int x; +}; + +__noinline int foo(const struct S *s) +{ + return bpf_get_prandom_u32() < s->x; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + const struct S s = {.x = skb->len }; + + return foo(&s); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func13.c b/tools/testing/selftests/bpf/progs/test_global_func13.c new file mode 100644 index 000000000000..ff8897c1ac22 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func13.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +struct S { + int x; +}; + +__noinline int foo(const struct S *s) +{ + if (s) + return bpf_get_prandom_u32() < s->x; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + const struct S *s = (const struct S *)(0xbedabeda); + + return foo(s); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func14.c b/tools/testing/selftests/bpf/progs/test_global_func14.c new file mode 100644 index 000000000000..698c77199ebf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func14.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +struct S; + +__noinline int foo(const struct S *s) +{ + if (s) + return bpf_get_prandom_u32() < *(const int *) s; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + + return foo(NULL); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c new file mode 100644 index 000000000000..c19c435988d5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func15.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +__noinline int foo(unsigned int *v) +{ + if (v) + *v = bpf_get_prandom_u32(); + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + unsigned int v = 1; + + foo(&v); + + return v; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c new file mode 100644 index 000000000000..0312d1e8d8c0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func16.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +__noinline int foo(int (*arr)[10]) +{ + if (arr) + return (*arr)[9]; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + int array[10]; + + const int rv = foo(&array); + + return rv ? 1 : 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func9.c b/tools/testing/selftests/bpf/progs/test_global_func9.c new file mode 100644 index 000000000000..bd233ddede98 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func9.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +struct S { + int x; +}; + +struct C { + int x; + int y; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct S); +} map SEC(".maps"); + +enum E { + E_ITEM +}; + +static int global_data_x = 100; +static int volatile global_data_y = 500; + +__noinline int foo(const struct S *s) +{ + if (s) + return bpf_get_prandom_u32() < s->x; + + return 0; +} + +__noinline int bar(int *x) +{ + if (x) + *x &= bpf_get_prandom_u32(); + + return 0; +} +__noinline int baz(volatile int *x) +{ + if (x) + *x &= bpf_get_prandom_u32(); + + return 0; +} + +__noinline int qux(enum E *e) +{ + if (e) + return *e; + + return 0; +} + +__noinline int quux(int (*arr)[10]) +{ + if (arr) + return (*arr)[9]; + + return 0; +} + +__noinline int quuz(int **p) +{ + if (p) + *p = NULL; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + int result = 0; + + { + const struct S s = {.x = skb->len }; + + result |= foo(&s); + } + + { + const __u32 key = 1; + const struct S *s = bpf_map_lookup_elem(&map, &key); + + result |= foo(s); + } + + { + const struct C c = {.x = skb->len, .y = skb->family }; + + result |= foo((const struct S *)&c); + } + + { + result |= foo(NULL); + } + + { + bar(&result); + bar(&global_data_x); + } + + { + result |= baz(&global_data_y); + } + + { + enum E e = E_ITEM; + + result |= qux(&e); + } + + { + int array[10] = {0}; + + result |= quux(&array); + } + + { + int *p; + + result |= quuz(&p); + } + + return result ? 1 : 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c new file mode 100644 index 000000000000..cae309538a9e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include + +struct S { + int v; +}; + +static volatile struct S global_variable; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 7); + __type(key, __u32); + __type(value, int); +} values SEC(".maps"); + +static void save_value(__u32 index, int value) +{ + bpf_map_update_elem(&values, &index, &value, 0); +} + +__noinline int foo(__u32 index, struct S *s) +{ + if (s) { + save_value(index, s->v); + return ++s->v; + } + + save_value(index, 0); + + return 1; +} + +__noinline int bar(__u32 index, volatile struct S *s) +{ + if (s) { + save_value(index, s->v); + return ++s->v; + } + + save_value(index, 0); + + return 1; +} + +__noinline int baz(struct S **s) +{ + if (s) + *s = 0; + + return 0; +} + +SEC("cgroup_skb/ingress") +int test_cls(struct __sk_buff *skb) +{ + __u32 index = 0; + + { + const int v = foo(index++, 0); + + save_value(index++, v); + } + + { + struct S s = { .v = 100 }; + + foo(index++, &s); + save_value(index++, s.v); + } + + { + global_variable.v = 42; + bar(index++, &global_variable); + save_value(index++, global_variable.v); + } + + { + struct S v, *p = &v; + + baz(&p); + save_value(index++, !p); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-71-gd317