diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2017-12-12 09:16:22 -0800 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2017-12-12 09:16:56 -0800 |
| commit | 553a8f2f42dffc5414a82fffe55d9b8c0fbd383f (patch) | |
| tree | 39d63df8a9c2794258d6d4328c7b18d10a2ecc29 /kernel/trace | |
| parent | a23967c181cae80becb451b38ff2a1e01c05c37b (diff) | |
| parent | 023f46c5b807ae5fff83b57d918727a5b9dbee55 (diff) | |
| download | cachepc-linux-553a8f2f42dffc5414a82fffe55d9b8c0fbd383f.tar.gz cachepc-linux-553a8f2f42dffc5414a82fffe55d9b8c0fbd383f.zip | |
Merge branch 'bpf-override-return'
Josef Bacik says:
====================
This is the same as v8, just rebased onto the bpf tree.
v8->v9:
- rebased onto the bpf tree.
v7->v8:
- removed the _ASM_KPROBE_ERROR_INJECT since it was not needed.
v6->v7:
- moved the opt-in macro to bpf.h out of kprobes.h.
v5->v6:
- add BPF_ALLOW_ERROR_INJECTION() tagging for functions that will support this
feature. This way only functions that opt-in will be allowed to be
overridden.
- added a btrfs patch to allow error injection for open_ctree() so that the bpf
sample actually works.
v4->v5:
- disallow kprobe_override programs from being put in the prog map array so we
don't tail call into something we didn't check. This allows us to make the
normal path still fast without a bunch of percpu operations.
v3->v4:
- fix a build error found by kbuild test bot (I didn't wait long enough
apparently.)
- Added a warning message as per Daniels suggestion.
v2->v3:
- added a ->kprobe_override flag to bpf_prog.
- added some sanity checks to disallow attaching bpf progs that have
->kprobe_override set that aren't for ftrace kprobes.
- added the trace_kprobe_ftrace helper to check if the trace_event_call is a
ftrace kprobe.
- renamed bpf_kprobe_state to bpf_kprobe_override, fixed it so we only read this
value in the kprobe path, and thus only write to it if we're overriding or
clearing the override.
v1->v2:
- moved things around to make sure that bpf_override_return could really only be
used for an ftrace kprobe.
- killed the special return values from trace_call_bpf.
- renamed pc_modified to bpf_kprobe_state so bpf_override_return could tell if
it was being called from an ftrace kprobe context.
- reworked the logic in kprobe_perf_func to take advantage of bpf_kprobe_state.
- updated the test as per Alexei's review.
- Original message -
A lot of our error paths are not well tested because we have no good way of
injecting errors generically. Some subystems (block, memory) have ways to
inject errors, but they are random so it's hard to get reproduceable results.
With BPF we can add determinism to our error injection. We can use kprobes and
other things to verify we are injecting errors at the exact case we are trying
to test. This patch gives us the tool to actual do the error injection part.
It is very simple, we just set the return value of the pt_regs we're given to
whatever we provide, and then override the PC with a dummy function that simply
returns.
Right now this only works on x86, but it would be simple enough to expand to
other architectures. Thanks,
Josef
====================
In patch "bpf: add a bpf_override_function helper" Alexei moved
"ifdef CONFIG_BPF_KPROBE_OVERRIDE" few lines to fail program loading
when kprobe_override is not available instead of failing at run-time.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/trace')
| -rw-r--r-- | kernel/trace/Kconfig | 11 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 35 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 55 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.h | 12 |
4 files changed, 106 insertions, 7 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index af7dad126c13..3e6fd580fe7f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -529,6 +529,17 @@ config FUNCTION_PROFILER If in doubt, say N. +config BPF_KPROBE_OVERRIDE + bool "Enable BPF programs to override a kprobed function" + depends on BPF_EVENTS + depends on KPROBES_ON_FTRACE + depends on HAVE_KPROBE_OVERRIDE + depends on DYNAMIC_FTRACE_WITH_REGS + default n + help + Allows BPF to override the execution of a probed function and + set a different return value. This is used for error injection. + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b143f2a05aff..e009b7ecf473 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,6 +13,10 @@ #include <linux/filter.h> #include <linux/uaccess.h> #include <linux/ctype.h> +#include <linux/kprobes.h> +#include <asm/kprobes.h> + +#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); +#ifdef CONFIG_BPF_KPROBE_OVERRIDE +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + __this_cpu_write(bpf_kprobe_override, 1); + regs_set_return_value(regs, rc); + arch_ftrace_kprobe_override_function(regs); + return 0; +} + +static const struct bpf_func_proto bpf_override_return_proto = { + .func = bpf_override_return, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; +#ifdef CONFIG_BPF_KPROBE_OVERRIDE + case BPF_FUNC_override_return: + return &bpf_override_return_proto; +#endif default: return tracing_func_proto(func_id); } @@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; + /* + * Kprobe override only works for ftrace based kprobes, and only if they + * are on the opt-in list. + */ + if (prog->kprobe_override && + (!trace_kprobe_ftrace(event->tp_event) || + !trace_kprobe_error_injectable(event->tp_event))) + return -EINVAL; + mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 492700c5fb4d..5db849809a56 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,6 +42,7 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) +DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } +int trace_kprobe_ftrace(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + return kprobe_ftrace(&tk->rp.kp); +} + +int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + unsigned long addr; + + if (tk->symbol) { + addr = (unsigned long) + kallsyms_lookup_name(trace_kprobe_symbol(tk)); + addr += tk->rp.kp.offset; + } else { + addr = (unsigned long)tk->rp.kp.addr; + } + return within_kprobe_error_injection_list(addr); +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static void +static int kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) - return; + if (bpf_prog_array_valid(call)) { + int ret; + + ret = trace_call_bpf(call, regs); + + /* + * We need to check and see if we modified the pc of the + * pt_regs, and if so clear the kprobe and return 1 so that we + * don't do the instruction skipping. Also reset our state so + * we are clean the next pass through. + */ + if (__this_cpu_read(bpf_kprobe_override)) { + __this_cpu_write(bpf_kprobe_override, 0); + reset_current_kprobe(); + return 1; + } + if (!ret) + return 0; + } head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return; + return 0; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return; + return 0; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); + return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1275,6 +1315,7 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1282,9 +1323,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - kprobe_perf_func(tk, regs); + ret = kprobe_perf_func(tk, regs); #endif - return 0; /* We don't tweek kernel, so just return 0 */ + return ret; } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index fb66e3eaa192..5e54d748c84c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -252,6 +252,8 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +int trace_kprobe_ftrace(struct trace_event_call *call); +int trace_kprobe_error_injectable(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } + +static inline int trace_kprobe_ftrace(struct trace_event_call *call) +{ + return 0; +} + +static inline int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + return 0; +} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { |
