diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/core.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 2 | ||||
| -rw-r--r-- | kernel/events/core.c | 7 | ||||
| -rw-r--r-- | kernel/kprobes.c | 163 | ||||
| -rw-r--r-- | kernel/module.c | 6 | ||||
| -rw-r--r-- | kernel/trace/Kconfig | 11 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 35 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 55 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.h | 12 |
9 files changed, 286 insertions, 8 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b16c6f8f42b6..d32bebf4f2de 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1320,6 +1320,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + if (fp->kprobe_override) + return false; + if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7afa92e9b409..e807bda7fe29 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4413,6 +4413,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_override_return) + prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can diff --git a/kernel/events/core.c b/kernel/events/core.c index f10609e539d4..5857c500721b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8080,6 +8080,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } + /* Kprobe override only works for kprobes, not uprobes. */ + if (prog->kprobe_override && + !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { + bpf_prog_put(prog); + return -EINVAL; + } + if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index da2ccf142358..b4aab48ad258 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -83,6 +83,16 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) return &(kretprobe_table_locks[hash].lock); } +/* List of symbols that can be overriden for error injection. */ +static LIST_HEAD(kprobe_error_injection_list); +static DEFINE_MUTEX(kprobe_ei_mutex); +struct kprobe_ei_entry { + struct list_head list; + unsigned long start_addr; + unsigned long end_addr; + void *priv; +}; + /* Blacklist -- list of struct kprobe_blacklist_entry */ static LIST_HEAD(kprobe_blacklist); @@ -1394,6 +1404,17 @@ bool within_kprobe_blacklist(unsigned long addr) return false; } +bool within_kprobe_error_injection_list(unsigned long addr) +{ + struct kprobe_ei_entry *ent; + + list_for_each_entry(ent, &kprobe_error_injection_list, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) + return true; + } + return false; +} + /* * If we have a symbol_name argument, look it up and add the offset field * to it. This way, we can specify a relative address to a symbol. @@ -2168,6 +2189,86 @@ static int __init populate_kprobe_blacklist(unsigned long *start, return 0; } +#ifdef CONFIG_BPF_KPROBE_OVERRIDE +/* Markers of the _kprobe_error_inject_list section */ +extern unsigned long __start_kprobe_error_inject_list[]; +extern unsigned long __stop_kprobe_error_inject_list[]; + +/* + * Lookup and populate the kprobe_error_injection_list. + * + * For safety reasons we only allow certain functions to be overriden with + * bpf_error_injection, so we need to populate the list of the symbols that have + * been marked as safe for overriding. + */ +static void populate_kprobe_error_injection_list(unsigned long *start, + unsigned long *end, + void *priv) +{ + unsigned long *iter; + struct kprobe_ei_entry *ent; + unsigned long entry, offset = 0, size = 0; + + mutex_lock(&kprobe_ei_mutex); + for (iter = start; iter < end; iter++) { + entry = arch_deref_entry_point((void *)*iter); + + if (!kernel_text_address(entry) || + !kallsyms_lookup_size_offset(entry, &size, &offset)) { + pr_err("Failed to find error inject entry at %p\n", + (void *)entry); + continue; + } + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + break; + ent->start_addr = entry; + ent->end_addr = entry + size; + ent->priv = priv; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, &kprobe_error_injection_list); + } + mutex_unlock(&kprobe_ei_mutex); +} + +static void __init populate_kernel_kprobe_ei_list(void) +{ + populate_kprobe_error_injection_list(__start_kprobe_error_inject_list, + __stop_kprobe_error_inject_list, + NULL); +} + +static void module_load_kprobe_ei_list(struct module *mod) +{ + if (!mod->num_kprobe_ei_funcs) + return; + populate_kprobe_error_injection_list(mod->kprobe_ei_funcs, + mod->kprobe_ei_funcs + + mod->num_kprobe_ei_funcs, mod); +} + +static void module_unload_kprobe_ei_list(struct module *mod) +{ + struct kprobe_ei_entry *ent, *n; + if (!mod->num_kprobe_ei_funcs) + return; + + mutex_lock(&kprobe_ei_mutex); + list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) { + if (ent->priv == mod) { + list_del_init(&ent->list); + kfree(ent); + } + } + mutex_unlock(&kprobe_ei_mutex); +} +#else +static inline void __init populate_kernel_kprobe_ei_list(void) {} +static inline void module_load_kprobe_ei_list(struct module *m) {} +static inline void module_unload_kprobe_ei_list(struct module *m) {} +#endif + /* Module notifier call back, checking kprobes on the module */ static int kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -2178,6 +2279,11 @@ static int kprobes_module_callback(struct notifier_block *nb, unsigned int i; int checkcore = (val == MODULE_STATE_GOING); + if (val == MODULE_STATE_COMING) + module_load_kprobe_ei_list(mod); + else if (val == MODULE_STATE_GOING) + module_unload_kprobe_ei_list(mod); + if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) return NOTIFY_DONE; @@ -2240,6 +2346,8 @@ static int __init init_kprobes(void) pr_err("Please take care of using kprobes.\n"); } + populate_kernel_kprobe_ei_list(); + if (kretprobe_blacklist_size) { /* lookup the function address from its name */ for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { @@ -2407,6 +2515,56 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = { .release = seq_release, }; +/* + * kprobes/error_injection_list -- shows which functions can be overriden for + * error injection. + * */ +static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&kprobe_ei_mutex); + return seq_list_start(&kprobe_error_injection_list, *pos); +} + +static void kprobe_ei_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&kprobe_ei_mutex); +} + +static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &kprobe_error_injection_list, pos); +} + +static int kprobe_ei_seq_show(struct seq_file *m, void *v) +{ + char buffer[KSYM_SYMBOL_LEN]; + struct kprobe_ei_entry *ent = + list_entry(v, struct kprobe_ei_entry, list); + + sprint_symbol(buffer, ent->start_addr); + seq_printf(m, "%s\n", buffer); + return 0; +} + +static const struct seq_operations kprobe_ei_seq_ops = { + .start = kprobe_ei_seq_start, + .next = kprobe_ei_seq_next, + .stop = kprobe_ei_seq_stop, + .show = kprobe_ei_seq_show, +}; + +static int kprobe_ei_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &kprobe_ei_seq_ops); +} + +static const struct file_operations debugfs_kprobe_ei_ops = { + .open = kprobe_ei_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static void arm_all_kprobes(void) { struct hlist_head *head; @@ -2548,6 +2706,11 @@ static int __init debugfs_kprobe_init(void) if (!file) goto error; + file = debugfs_create_file("error_injection_list", 0444, dir, NULL, + &debugfs_kprobe_ei_ops); + if (!file) + goto error; + return 0; error: diff --git a/kernel/module.c b/kernel/module.c index dea01ac9cb74..bd695bfdc5c4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3118,7 +3118,11 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif - +#ifdef CONFIG_BPF_KPROBE_OVERRIDE + mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list", + sizeof(*mod->kprobe_ei_funcs), + &mod->num_kprobe_ei_funcs); +#endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index af7dad126c13..3e6fd580fe7f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -529,6 +529,17 @@ config FUNCTION_PROFILER If in doubt, say N. +config BPF_KPROBE_OVERRIDE + bool "Enable BPF programs to override a kprobed function" + depends on BPF_EVENTS + depends on KPROBES_ON_FTRACE + depends on HAVE_KPROBE_OVERRIDE + depends on DYNAMIC_FTRACE_WITH_REGS + default n + help + Allows BPF to override the execution of a probed function and + set a different return value. This is used for error injection. + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b143f2a05aff..e009b7ecf473 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,6 +13,10 @@ #include <linux/filter.h> #include <linux/uaccess.h> #include <linux/ctype.h> +#include <linux/kprobes.h> +#include <asm/kprobes.h> + +#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); +#ifdef CONFIG_BPF_KPROBE_OVERRIDE +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + __this_cpu_write(bpf_kprobe_override, 1); + regs_set_return_value(regs, rc); + arch_ftrace_kprobe_override_function(regs); + return 0; +} + +static const struct bpf_func_proto bpf_override_return_proto = { + .func = bpf_override_return, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; +#ifdef CONFIG_BPF_KPROBE_OVERRIDE + case BPF_FUNC_override_return: + return &bpf_override_return_proto; +#endif default: return tracing_func_proto(func_id); } @@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; + /* + * Kprobe override only works for ftrace based kprobes, and only if they + * are on the opt-in list. + */ + if (prog->kprobe_override && + (!trace_kprobe_ftrace(event->tp_event) || + !trace_kprobe_error_injectable(event->tp_event))) + return -EINVAL; + mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 492700c5fb4d..5db849809a56 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,6 +42,7 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) +DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } +int trace_kprobe_ftrace(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + return kprobe_ftrace(&tk->rp.kp); +} + +int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + unsigned long addr; + + if (tk->symbol) { + addr = (unsigned long) + kallsyms_lookup_name(trace_kprobe_symbol(tk)); + addr += tk->rp.kp.offset; + } else { + addr = (unsigned long)tk->rp.kp.addr; + } + return within_kprobe_error_injection_list(addr); +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static void +static int kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) - return; + if (bpf_prog_array_valid(call)) { + int ret; + + ret = trace_call_bpf(call, regs); + + /* + * We need to check and see if we modified the pc of the + * pt_regs, and if so clear the kprobe and return 1 so that we + * don't do the instruction skipping. Also reset our state so + * we are clean the next pass through. + */ + if (__this_cpu_read(bpf_kprobe_override)) { + __this_cpu_write(bpf_kprobe_override, 0); + reset_current_kprobe(); + return 1; + } + if (!ret) + return 0; + } head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return; + return 0; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return; + return 0; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); + return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1275,6 +1315,7 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + int ret = 0; raw_cpu_inc(*tk->nhit); @@ -1282,9 +1323,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS if (tk->tp.flags & TP_FLAG_PROFILE) - kprobe_perf_func(tk, regs); + ret = kprobe_perf_func(tk, regs); #endif - return 0; /* We don't tweek kernel, so just return 0 */ + return ret; } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index fb66e3eaa192..5e54d748c84c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -252,6 +252,8 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +int trace_kprobe_ftrace(struct trace_event_call *call); +int trace_kprobe_error_injectable(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } + +static inline int trace_kprobe_ftrace(struct trace_event_call *call) +{ + return 0; +} + +static inline int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + return 0; +} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { |
