trace_event_perf.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
trace_event_perf.c (12445B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * trace event based perf event profiling/tracing
      4 *
      5 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra
      6 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
      7 */
      8
      9#include <linux/module.h>
     10#include <linux/kprobes.h>
     11#include <linux/security.h>
     12#include "trace.h"
     13#include "trace_probe.h"
     14
     15static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
     16
     17/*
     18 * Force it to be aligned to unsigned long to avoid misaligned accesses
     19 * surprises
     20 */
     21typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
     22	perf_trace_t;
     23
     24/* Count the events in use (per event id, not per instance) */
     25static int	total_ref_count;
     26
     27static int perf_trace_event_perm(struct trace_event_call *tp_event,
     28				 struct perf_event *p_event)
     29{
     30	int ret;
     31
     32	if (tp_event->perf_perm) {
     33		ret = tp_event->perf_perm(tp_event, p_event);
     34		if (ret)
     35			return ret;
     36	}
     37
     38	/*
     39	 * We checked and allowed to create parent,
     40	 * allow children without checking.
     41	 */
     42	if (p_event->parent)
     43		return 0;
     44
     45	/*
     46	 * It's ok to check current process (owner) permissions in here,
     47	 * because code below is called only via perf_event_open syscall.
     48	 */
     49
     50	/* The ftrace function trace is allowed only for root. */
     51	if (ftrace_event_is_function(tp_event)) {
     52		ret = perf_allow_tracepoint(&p_event->attr);
     53		if (ret)
     54			return ret;
     55
     56		if (!is_sampling_event(p_event))
     57			return 0;
     58
     59		/*
     60		 * We don't allow user space callchains for  function trace
     61		 * event, due to issues with page faults while tracing page
     62		 * fault handler and its overall trickiness nature.
     63		 */
     64		if (!p_event->attr.exclude_callchain_user)
     65			return -EINVAL;
     66
     67		/*
     68		 * Same reason to disable user stack dump as for user space
     69		 * callchains above.
     70		 */
     71		if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER)
     72			return -EINVAL;
     73	}
     74
     75	/* No tracing, just counting, so no obvious leak */
     76	if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
     77		return 0;
     78
     79	/* Some events are ok to be traced by non-root users... */
     80	if (p_event->attach_state == PERF_ATTACH_TASK) {
     81		if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
     82			return 0;
     83	}
     84
     85	/*
     86	 * ...otherwise raw tracepoint data can be a severe data leak,
     87	 * only allow root to have these.
     88	 */
     89	ret = perf_allow_tracepoint(&p_event->attr);
     90	if (ret)
     91		return ret;
     92
     93	return 0;
     94}
     95
     96static int perf_trace_event_reg(struct trace_event_call *tp_event,
     97				struct perf_event *p_event)
     98{
     99	struct hlist_head __percpu *list;
    100	int ret = -ENOMEM;
    101	int cpu;
    102
    103	p_event->tp_event = tp_event;
    104	if (tp_event->perf_refcount++ > 0)
    105		return 0;
    106
    107	list = alloc_percpu(struct hlist_head);
    108	if (!list)
    109		goto fail;
    110
    111	for_each_possible_cpu(cpu)
    112		INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
    113
    114	tp_event->perf_events = list;
    115
    116	if (!total_ref_count) {
    117		char __percpu *buf;
    118		int i;
    119
    120		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
    121			buf = (char __percpu *)alloc_percpu(perf_trace_t);
    122			if (!buf)
    123				goto fail;
    124
    125			perf_trace_buf[i] = buf;
    126		}
    127	}
    128
    129	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
    130	if (ret)
    131		goto fail;
    132
    133	total_ref_count++;
    134	return 0;
    135
    136fail:
    137	if (!total_ref_count) {
    138		int i;
    139
    140		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
    141			free_percpu(perf_trace_buf[i]);
    142			perf_trace_buf[i] = NULL;
    143		}
    144	}
    145
    146	if (!--tp_event->perf_refcount) {
    147		free_percpu(tp_event->perf_events);
    148		tp_event->perf_events = NULL;
    149	}
    150
    151	return ret;
    152}
    153
    154static void perf_trace_event_unreg(struct perf_event *p_event)
    155{
    156	struct trace_event_call *tp_event = p_event->tp_event;
    157	int i;
    158
    159	if (--tp_event->perf_refcount > 0)
    160		goto out;
    161
    162	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
    163
    164	/*
    165	 * Ensure our callback won't be called anymore. The buffers
    166	 * will be freed after that.
    167	 */
    168	tracepoint_synchronize_unregister();
    169
    170	free_percpu(tp_event->perf_events);
    171	tp_event->perf_events = NULL;
    172
    173	if (!--total_ref_count) {
    174		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
    175			free_percpu(perf_trace_buf[i]);
    176			perf_trace_buf[i] = NULL;
    177		}
    178	}
    179out:
    180	trace_event_put_ref(tp_event);
    181}
    182
    183static int perf_trace_event_open(struct perf_event *p_event)
    184{
    185	struct trace_event_call *tp_event = p_event->tp_event;
    186	return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
    187}
    188
    189static void perf_trace_event_close(struct perf_event *p_event)
    190{
    191	struct trace_event_call *tp_event = p_event->tp_event;
    192	tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
    193}
    194
    195static int perf_trace_event_init(struct trace_event_call *tp_event,
    196				 struct perf_event *p_event)
    197{
    198	int ret;
    199
    200	ret = perf_trace_event_perm(tp_event, p_event);
    201	if (ret)
    202		return ret;
    203
    204	ret = perf_trace_event_reg(tp_event, p_event);
    205	if (ret)
    206		return ret;
    207
    208	ret = perf_trace_event_open(p_event);
    209	if (ret) {
    210		perf_trace_event_unreg(p_event);
    211		return ret;
    212	}
    213
    214	return 0;
    215}
    216
    217int perf_trace_init(struct perf_event *p_event)
    218{
    219	struct trace_event_call *tp_event;
    220	u64 event_id = p_event->attr.config;
    221	int ret = -EINVAL;
    222
    223	mutex_lock(&event_mutex);
    224	list_for_each_entry(tp_event, &ftrace_events, list) {
    225		if (tp_event->event.type == event_id &&
    226		    tp_event->class && tp_event->class->reg &&
    227		    trace_event_try_get_ref(tp_event)) {
    228			ret = perf_trace_event_init(tp_event, p_event);
    229			if (ret)
    230				trace_event_put_ref(tp_event);
    231			break;
    232		}
    233	}
    234	mutex_unlock(&event_mutex);
    235
    236	return ret;
    237}
    238
    239void perf_trace_destroy(struct perf_event *p_event)
    240{
    241	mutex_lock(&event_mutex);
    242	perf_trace_event_close(p_event);
    243	perf_trace_event_unreg(p_event);
    244	mutex_unlock(&event_mutex);
    245}
    246
    247#ifdef CONFIG_KPROBE_EVENTS
    248int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
    249{
    250	int ret;
    251	char *func = NULL;
    252	struct trace_event_call *tp_event;
    253
    254	if (p_event->attr.kprobe_func) {
    255		func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
    256		if (!func)
    257			return -ENOMEM;
    258		ret = strncpy_from_user(
    259			func, u64_to_user_ptr(p_event->attr.kprobe_func),
    260			KSYM_NAME_LEN);
    261		if (ret == KSYM_NAME_LEN)
    262			ret = -E2BIG;
    263		if (ret < 0)
    264			goto out;
    265
    266		if (func[0] == '\0') {
    267			kfree(func);
    268			func = NULL;
    269		}
    270	}
    271
    272	tp_event = create_local_trace_kprobe(
    273		func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
    274		p_event->attr.probe_offset, is_retprobe);
    275	if (IS_ERR(tp_event)) {
    276		ret = PTR_ERR(tp_event);
    277		goto out;
    278	}
    279
    280	mutex_lock(&event_mutex);
    281	ret = perf_trace_event_init(tp_event, p_event);
    282	if (ret)
    283		destroy_local_trace_kprobe(tp_event);
    284	mutex_unlock(&event_mutex);
    285out:
    286	kfree(func);
    287	return ret;
    288}
    289
    290void perf_kprobe_destroy(struct perf_event *p_event)
    291{
    292	mutex_lock(&event_mutex);
    293	perf_trace_event_close(p_event);
    294	perf_trace_event_unreg(p_event);
    295	mutex_unlock(&event_mutex);
    296
    297	destroy_local_trace_kprobe(p_event->tp_event);
    298}
    299#endif /* CONFIG_KPROBE_EVENTS */
    300
    301#ifdef CONFIG_UPROBE_EVENTS
    302int perf_uprobe_init(struct perf_event *p_event,
    303		     unsigned long ref_ctr_offset, bool is_retprobe)
    304{
    305	int ret;
    306	char *path = NULL;
    307	struct trace_event_call *tp_event;
    308
    309	if (!p_event->attr.uprobe_path)
    310		return -EINVAL;
    311
    312	path = strndup_user(u64_to_user_ptr(p_event->attr.uprobe_path),
    313			    PATH_MAX);
    314	if (IS_ERR(path)) {
    315		ret = PTR_ERR(path);
    316		return (ret == -EINVAL) ? -E2BIG : ret;
    317	}
    318	if (path[0] == '\0') {
    319		ret = -EINVAL;
    320		goto out;
    321	}
    322
    323	tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
    324					     ref_ctr_offset, is_retprobe);
    325	if (IS_ERR(tp_event)) {
    326		ret = PTR_ERR(tp_event);
    327		goto out;
    328	}
    329
    330	/*
    331	 * local trace_uprobe need to hold event_mutex to call
    332	 * uprobe_buffer_enable() and uprobe_buffer_disable().
    333	 * event_mutex is not required for local trace_kprobes.
    334	 */
    335	mutex_lock(&event_mutex);
    336	ret = perf_trace_event_init(tp_event, p_event);
    337	if (ret)
    338		destroy_local_trace_uprobe(tp_event);
    339	mutex_unlock(&event_mutex);
    340out:
    341	kfree(path);
    342	return ret;
    343}
    344
    345void perf_uprobe_destroy(struct perf_event *p_event)
    346{
    347	mutex_lock(&event_mutex);
    348	perf_trace_event_close(p_event);
    349	perf_trace_event_unreg(p_event);
    350	mutex_unlock(&event_mutex);
    351	destroy_local_trace_uprobe(p_event->tp_event);
    352}
    353#endif /* CONFIG_UPROBE_EVENTS */
    354
    355int perf_trace_add(struct perf_event *p_event, int flags)
    356{
    357	struct trace_event_call *tp_event = p_event->tp_event;
    358
    359	if (!(flags & PERF_EF_START))
    360		p_event->hw.state = PERF_HES_STOPPED;
    361
    362	/*
    363	 * If TRACE_REG_PERF_ADD returns false; no custom action was performed
    364	 * and we need to take the default action of enqueueing our event on
    365	 * the right per-cpu hlist.
    366	 */
    367	if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) {
    368		struct hlist_head __percpu *pcpu_list;
    369		struct hlist_head *list;
    370
    371		pcpu_list = tp_event->perf_events;
    372		if (WARN_ON_ONCE(!pcpu_list))
    373			return -EINVAL;
    374
    375		list = this_cpu_ptr(pcpu_list);
    376		hlist_add_head_rcu(&p_event->hlist_entry, list);
    377	}
    378
    379	return 0;
    380}
    381
    382void perf_trace_del(struct perf_event *p_event, int flags)
    383{
    384	struct trace_event_call *tp_event = p_event->tp_event;
    385
    386	/*
    387	 * If TRACE_REG_PERF_DEL returns false; no custom action was performed
    388	 * and we need to take the default action of dequeueing our event from
    389	 * the right per-cpu hlist.
    390	 */
    391	if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event))
    392		hlist_del_rcu(&p_event->hlist_entry);
    393}
    394
    395void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp)
    396{
    397	char *raw_data;
    398	int rctx;
    399
    400	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
    401
    402	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
    403		      "perf buffer not large enough, wanted %d, have %d",
    404		      size, PERF_MAX_TRACE_SIZE))
    405		return NULL;
    406
    407	*rctxp = rctx = perf_swevent_get_recursion_context();
    408	if (rctx < 0)
    409		return NULL;
    410
    411	if (regs)
    412		*regs = this_cpu_ptr(&__perf_regs[rctx]);
    413	raw_data = this_cpu_ptr(perf_trace_buf[rctx]);
    414
    415	/* zero the dead bytes from align to not leak stack to user */
    416	memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
    417	return raw_data;
    418}
    419EXPORT_SYMBOL_GPL(perf_trace_buf_alloc);
    420NOKPROBE_SYMBOL(perf_trace_buf_alloc);
    421
    422void perf_trace_buf_update(void *record, u16 type)
    423{
    424	struct trace_entry *entry = record;
    425
    426	tracing_generic_entry_update(entry, type, tracing_gen_ctx());
    427}
    428NOKPROBE_SYMBOL(perf_trace_buf_update);
    429
    430#ifdef CONFIG_FUNCTION_TRACER
    431static void
    432perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
    433			  struct ftrace_ops *ops,  struct ftrace_regs *fregs)
    434{
    435	struct ftrace_entry *entry;
    436	struct perf_event *event;
    437	struct hlist_head head;
    438	struct pt_regs regs;
    439	int rctx;
    440	int bit;
    441
    442	if (!rcu_is_watching())
    443		return;
    444
    445	bit = ftrace_test_recursion_trylock(ip, parent_ip);
    446	if (bit < 0)
    447		return;
    448
    449	if ((unsigned long)ops->private != smp_processor_id())
    450		goto out;
    451
    452	event = container_of(ops, struct perf_event, ftrace_ops);
    453
    454	/*
    455	 * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all
    456	 * the perf code does is hlist_for_each_entry_rcu(), so we can
    457	 * get away with simply setting the @head.first pointer in order
    458	 * to create a singular list.
    459	 */
    460	head.first = &event->hlist_entry;
    461
    462#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
    463		    sizeof(u64)) - sizeof(u32))
    464
    465	BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
    466
    467	memset(&regs, 0, sizeof(regs));
    468	perf_fetch_caller_regs(&regs);
    469
    470	entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
    471	if (!entry)
    472		goto out;
    473
    474	entry->ip = ip;
    475	entry->parent_ip = parent_ip;
    476	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
    477			      1, &regs, &head, NULL);
    478
    479out:
    480	ftrace_test_recursion_unlock(bit);
    481#undef ENTRY_SIZE
    482}
    483
    484static int perf_ftrace_function_register(struct perf_event *event)
    485{
    486	struct ftrace_ops *ops = &event->ftrace_ops;
    487
    488	ops->func    = perf_ftrace_function_call;
    489	ops->private = (void *)(unsigned long)nr_cpu_ids;
    490
    491	return register_ftrace_function(ops);
    492}
    493
    494static int perf_ftrace_function_unregister(struct perf_event *event)
    495{
    496	struct ftrace_ops *ops = &event->ftrace_ops;
    497	int ret = unregister_ftrace_function(ops);
    498	ftrace_free_filter(ops);
    499	return ret;
    500}
    501
    502int perf_ftrace_event_register(struct trace_event_call *call,
    503			       enum trace_reg type, void *data)
    504{
    505	struct perf_event *event = data;
    506
    507	switch (type) {
    508	case TRACE_REG_REGISTER:
    509	case TRACE_REG_UNREGISTER:
    510		break;
    511	case TRACE_REG_PERF_REGISTER:
    512	case TRACE_REG_PERF_UNREGISTER:
    513		return 0;
    514	case TRACE_REG_PERF_OPEN:
    515		return perf_ftrace_function_register(data);
    516	case TRACE_REG_PERF_CLOSE:
    517		return perf_ftrace_function_unregister(data);
    518	case TRACE_REG_PERF_ADD:
    519		event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id();
    520		return 1;
    521	case TRACE_REG_PERF_DEL:
    522		event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids;
    523		return 1;
    524	}
    525
    526	return -EINVAL;
    527}
    528#endif /* CONFIG_FUNCTION_TRACER */