summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c3
-rw-r--r--kernel/bpf/verifier.c2
-rw-r--r--kernel/events/core.c7
-rw-r--r--kernel/kprobes.c163
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/bpf_trace.c35
-rw-r--r--kernel/trace/trace_kprobe.c55
-rw-r--r--kernel/trace/trace_probe.h12
9 files changed, 286 insertions, 8 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b16c6f8f42b6..d32bebf4f2de 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1320,6 +1320,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
{
+ if (fp->kprobe_override)
+ return false;
+
if (!array->owner_prog_type) {
/* There's no owner yet where we could check for
* compatibility.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7afa92e9b409..e807bda7fe29 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4413,6 +4413,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32)
bpf_user_rnd_init_once();
+ if (insn->imm == BPF_FUNC_override_return)
+ prog->kprobe_override = 1;
if (insn->imm == BPF_FUNC_tail_call) {
/* If we tail call into other programs, we
* cannot make any assumptions since they can
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f10609e539d4..5857c500721b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8080,6 +8080,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
return -EINVAL;
}
+ /* Kprobe override only works for kprobes, not uprobes. */
+ if (prog->kprobe_override &&
+ !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
if (is_tracepoint || is_syscall_tp) {
int off = trace_event_get_offsets(event->tp_event);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index da2ccf142358..b4aab48ad258 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -83,6 +83,16 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
return &(kretprobe_table_locks[hash].lock);
}
+/* List of symbols that can be overriden for error injection. */
+static LIST_HEAD(kprobe_error_injection_list);
+static DEFINE_MUTEX(kprobe_ei_mutex);
+struct kprobe_ei_entry {
+ struct list_head list;
+ unsigned long start_addr;
+ unsigned long end_addr;
+ void *priv;
+};
+
/* Blacklist -- list of struct kprobe_blacklist_entry */
static LIST_HEAD(kprobe_blacklist);
@@ -1394,6 +1404,17 @@ bool within_kprobe_blacklist(unsigned long addr)
return false;
}
+bool within_kprobe_error_injection_list(unsigned long addr)
+{
+ struct kprobe_ei_entry *ent;
+
+ list_for_each_entry(ent, &kprobe_error_injection_list, list) {
+ if (addr >= ent->start_addr && addr < ent->end_addr)
+ return true;
+ }
+ return false;
+}
+
/*
* If we have a symbol_name argument, look it up and add the offset field
* to it. This way, we can specify a relative address to a symbol.
@@ -2168,6 +2189,86 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
return 0;
}
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+/* Markers of the _kprobe_error_inject_list section */
+extern unsigned long __start_kprobe_error_inject_list[];
+extern unsigned long __stop_kprobe_error_inject_list[];
+
+/*
+ * Lookup and populate the kprobe_error_injection_list.
+ *
+ * For safety reasons we only allow certain functions to be overriden with
+ * bpf_error_injection, so we need to populate the list of the symbols that have
+ * been marked as safe for overriding.
+ */
+static void populate_kprobe_error_injection_list(unsigned long *start,
+ unsigned long *end,
+ void *priv)
+{
+ unsigned long *iter;
+ struct kprobe_ei_entry *ent;
+ unsigned long entry, offset = 0, size = 0;
+
+ mutex_lock(&kprobe_ei_mutex);
+ for (iter = start; iter < end; iter++) {
+ entry = arch_deref_entry_point((void *)*iter);
+
+ if (!kernel_text_address(entry) ||
+ !kallsyms_lookup_size_offset(entry, &size, &offset)) {
+ pr_err("Failed to find error inject entry at %p\n",
+ (void *)entry);
+ continue;
+ }
+
+ ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ break;
+ ent->start_addr = entry;
+ ent->end_addr = entry + size;
+ ent->priv = priv;
+ INIT_LIST_HEAD(&ent->list);
+ list_add_tail(&ent->list, &kprobe_error_injection_list);
+ }
+ mutex_unlock(&kprobe_ei_mutex);
+}
+
+static void __init populate_kernel_kprobe_ei_list(void)
+{
+ populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
+ __stop_kprobe_error_inject_list,
+ NULL);
+}
+
+static void module_load_kprobe_ei_list(struct module *mod)
+{
+ if (!mod->num_kprobe_ei_funcs)
+ return;
+ populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
+ mod->kprobe_ei_funcs +
+ mod->num_kprobe_ei_funcs, mod);
+}
+
+static void module_unload_kprobe_ei_list(struct module *mod)
+{
+ struct kprobe_ei_entry *ent, *n;
+ if (!mod->num_kprobe_ei_funcs)
+ return;
+
+ mutex_lock(&kprobe_ei_mutex);
+ list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
+ if (ent->priv == mod) {
+ list_del_init(&ent->list);
+ kfree(ent);
+ }
+ }
+ mutex_unlock(&kprobe_ei_mutex);
+}
+#else
+static inline void __init populate_kernel_kprobe_ei_list(void) {}
+static inline void module_load_kprobe_ei_list(struct module *m) {}
+static inline void module_unload_kprobe_ei_list(struct module *m) {}
+#endif
+
/* Module notifier call back, checking kprobes on the module */
static int kprobes_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
@@ -2178,6 +2279,11 @@ static int kprobes_module_callback(struct notifier_block *nb,
unsigned int i;
int checkcore = (val == MODULE_STATE_GOING);
+ if (val == MODULE_STATE_COMING)
+ module_load_kprobe_ei_list(mod);
+ else if (val == MODULE_STATE_GOING)
+ module_unload_kprobe_ei_list(mod);
+
if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
return NOTIFY_DONE;
@@ -2240,6 +2346,8 @@ static int __init init_kprobes(void)
pr_err("Please take care of using kprobes.\n");
}
+ populate_kernel_kprobe_ei_list();
+
if (kretprobe_blacklist_size) {
/* lookup the function address from its name */
for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
@@ -2407,6 +2515,56 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
.release = seq_release,
};
+/*
+ * kprobes/error_injection_list -- shows which functions can be overriden for
+ * error injection.
+ * */
+static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&kprobe_ei_mutex);
+ return seq_list_start(&kprobe_error_injection_list, *pos);
+}
+
+static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&kprobe_ei_mutex);
+}
+
+static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return seq_list_next(v, &kprobe_error_injection_list, pos);
+}
+
+static int kprobe_ei_seq_show(struct seq_file *m, void *v)
+{
+ char buffer[KSYM_SYMBOL_LEN];
+ struct kprobe_ei_entry *ent =
+ list_entry(v, struct kprobe_ei_entry, list);
+
+ sprint_symbol(buffer, ent->start_addr);
+ seq_printf(m, "%s\n", buffer);
+ return 0;
+}
+
+static const struct seq_operations kprobe_ei_seq_ops = {
+ .start = kprobe_ei_seq_start,
+ .next = kprobe_ei_seq_next,
+ .stop = kprobe_ei_seq_stop,
+ .show = kprobe_ei_seq_show,
+};
+
+static int kprobe_ei_open(struct inode *inode, struct file *filp)
+{
+ return seq_open(filp, &kprobe_ei_seq_ops);
+}
+
+static const struct file_operations debugfs_kprobe_ei_ops = {
+ .open = kprobe_ei_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static void arm_all_kprobes(void)
{
struct hlist_head *head;
@@ -2548,6 +2706,11 @@ static int __init debugfs_kprobe_init(void)
if (!file)
goto error;
+ file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
+ &debugfs_kprobe_ei_ops);
+ if (!file)
+ goto error;
+
return 0;
error:
diff --git a/kernel/module.c b/kernel/module.c
index dea01ac9cb74..bd695bfdc5c4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3118,7 +3118,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->ftrace_callsites),
&mod->num_ftrace_callsites);
#endif
-
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
+ sizeof(*mod->kprobe_ei_funcs),
+ &mod->num_kprobe_ei_funcs);
+#endif
mod->extable = section_objs(info, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index af7dad126c13..3e6fd580fe7f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -529,6 +529,17 @@ config FUNCTION_PROFILER
If in doubt, say N.
+config BPF_KPROBE_OVERRIDE
+ bool "Enable BPF programs to override a kprobed function"
+ depends on BPF_EVENTS
+ depends on KPROBES_ON_FTRACE
+ depends on HAVE_KPROBE_OVERRIDE
+ depends on DYNAMIC_FTRACE_WITH_REGS
+ default n
+ help
+ Allows BPF to override the execution of a probed function and
+ set a different return value. This is used for error injection.
+
config FTRACE_MCOUNT_RECORD
def_bool y
depends on DYNAMIC_FTRACE
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b143f2a05aff..e009b7ecf473 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,6 +13,10 @@
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
+#include <linux/kprobes.h>
+#include <asm/kprobes.h>
+
+#include "trace_probe.h"
#include "trace.h"
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
}
EXPORT_SYMBOL_GPL(trace_call_bpf);
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
+{
+ __this_cpu_write(bpf_kprobe_override, 1);
+ regs_set_return_value(regs, rc);
+ arch_ftrace_kprobe_override_function(regs);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_override_return_proto = {
+ .func = bpf_override_return,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+#endif
+
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{
int ret;
@@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_get_stackid_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ case BPF_FUNC_override_return:
+ return &bpf_override_return_proto;
+#endif
default:
return tracing_func_proto(func_id);
}
@@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog_array *new_array;
int ret = -EEXIST;
+ /*
+ * Kprobe override only works for ftrace based kprobes, and only if they
+ * are on the opt-in list.
+ */
+ if (prog->kprobe_override &&
+ (!trace_kprobe_ftrace(event->tp_event) ||
+ !trace_kprobe_error_injectable(event->tp_event)))
+ return -EINVAL;
+
mutex_lock(&bpf_event_mutex);
if (event->prog)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 492700c5fb4d..5db849809a56 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -42,6 +42,7 @@ struct trace_kprobe {
(offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
+DEFINE_PER_CPU(int, bpf_kprobe_override);
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
{
@@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
return nhit;
}
+int trace_kprobe_ftrace(struct trace_event_call *call)
+{
+ struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+ return kprobe_ftrace(&tk->rp.kp);
+}
+
+int trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+ struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+ unsigned long addr;
+
+ if (tk->symbol) {
+ addr = (unsigned long)
+ kallsyms_lookup_name(trace_kprobe_symbol(tk));
+ addr += tk->rp.kp.offset;
+ } else {
+ addr = (unsigned long)tk->rp.kp.addr;
+ }
+ return within_kprobe_error_injection_list(addr);
+}
+
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
@@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
-static void
+static int
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
struct trace_event_call *call = &tk->tp.call;
@@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
int size, __size, dsize;
int rctx;
- if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
- return;
+ if (bpf_prog_array_valid(call)) {
+ int ret;
+
+ ret = trace_call_bpf(call, regs);
+
+ /*
+ * We need to check and see if we modified the pc of the
+ * pt_regs, and if so clear the kprobe and return 1 so that we
+ * don't do the instruction skipping. Also reset our state so
+ * we are clean the next pass through.
+ */
+ if (__this_cpu_read(bpf_kprobe_override)) {
+ __this_cpu_write(bpf_kprobe_override, 0);
+ reset_current_kprobe();
+ return 1;
+ }
+ if (!ret)
+ return 0;
+ }
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
- return;
+ return 0;
dsize = __get_data_size(&tk->tp, regs);
__size = sizeof(*entry) + tk->tp.size + dsize;
@@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
entry = perf_trace_buf_alloc(size, NULL, &rctx);
if (!entry)
- return;
+ return 0;
entry->ip = (unsigned long)tk->rp.kp.addr;
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
+ return 0;
}
NOKPROBE_SYMBOL(kprobe_perf_func);
@@ -1275,6 +1315,7 @@ static int kprobe_register(struct trace_event_call *event,
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
{
struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
+ int ret = 0;
raw_cpu_inc(*tk->nhit);
@@ -1282,9 +1323,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
if (tk->tp.flags & TP_FLAG_PROFILE)
- kprobe_perf_func(tk, regs);
+ ret = kprobe_perf_func(tk, regs);
#endif
- return 0; /* We don't tweek kernel, so just return 0 */
+ return ret;
}
NOKPROBE_SYMBOL(kprobe_dispatcher);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index fb66e3eaa192..5e54d748c84c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,6 +252,8 @@ struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc);
struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
+int trace_kprobe_ftrace(struct trace_event_call *call);
+int trace_kprobe_error_injectable(struct trace_event_call *call);
#else
/* uprobes do not support symbol fetch methods */
#define fetch_symbol_u8 NULL
@@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset)
{
return NULL;
}
+
+static inline int trace_kprobe_ftrace(struct trace_event_call *call)
+{
+ return 0;
+}
+
+static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+ return 0;
+}
#endif /* CONFIG_KPROBE_EVENTS */
struct probe_arg {