cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

trace_syscalls.c (20434B)


      1// SPDX-License-Identifier: GPL-2.0
      2#include <trace/syscall.h>
      3#include <trace/events/syscalls.h>
      4#include <linux/syscalls.h>
      5#include <linux/slab.h>
      6#include <linux/kernel.h>
      7#include <linux/module.h>	/* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
      8#include <linux/ftrace.h>
      9#include <linux/perf_event.h>
     10#include <linux/xarray.h>
     11#include <asm/syscall.h>
     12
     13#include "trace_output.h"
     14#include "trace.h"
     15
     16static DEFINE_MUTEX(syscall_trace_lock);
     17
     18static int syscall_enter_register(struct trace_event_call *event,
     19				 enum trace_reg type, void *data);
     20static int syscall_exit_register(struct trace_event_call *event,
     21				 enum trace_reg type, void *data);
     22
     23static struct list_head *
     24syscall_get_enter_fields(struct trace_event_call *call)
     25{
     26	struct syscall_metadata *entry = call->data;
     27
     28	return &entry->enter_fields;
     29}
     30
     31extern struct syscall_metadata *__start_syscalls_metadata[];
     32extern struct syscall_metadata *__stop_syscalls_metadata[];
     33
     34static DEFINE_XARRAY(syscalls_metadata_sparse);
     35static struct syscall_metadata **syscalls_metadata;
     36
     37#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
     38static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
     39{
     40	/*
     41	 * Only compare after the "sys" prefix. Archs that use
     42	 * syscall wrappers may have syscalls symbols aliases prefixed
     43	 * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
     44	 * mismatch.
     45	 */
     46	return !strcmp(sym + 3, name + 3);
     47}
     48#endif
     49
     50#ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
     51/*
     52 * Some architectures that allow for 32bit applications
     53 * to run on a 64bit kernel, do not map the syscalls for
     54 * the 32bit tasks the same as they do for 64bit tasks.
     55 *
     56 *     *cough*x86*cough*
     57 *
     58 * In such a case, instead of reporting the wrong syscalls,
     59 * simply ignore them.
     60 *
     61 * For an arch to ignore the compat syscalls it needs to
     62 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
     63 * define the function arch_trace_is_compat_syscall() to let
     64 * the tracing system know that it should ignore it.
     65 */
     66static int
     67trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
     68{
     69	if (unlikely(arch_trace_is_compat_syscall(regs)))
     70		return -1;
     71
     72	return syscall_get_nr(task, regs);
     73}
     74#else
     75static inline int
     76trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
     77{
     78	return syscall_get_nr(task, regs);
     79}
     80#endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
     81
     82static __init struct syscall_metadata *
     83find_syscall_meta(unsigned long syscall)
     84{
     85	struct syscall_metadata **start;
     86	struct syscall_metadata **stop;
     87	char str[KSYM_SYMBOL_LEN];
     88
     89
     90	start = __start_syscalls_metadata;
     91	stop = __stop_syscalls_metadata;
     92	kallsyms_lookup(syscall, NULL, NULL, NULL, str);
     93
     94	if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
     95		return NULL;
     96
     97	for ( ; start < stop; start++) {
     98		if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
     99			return *start;
    100	}
    101	return NULL;
    102}
    103
    104static struct syscall_metadata *syscall_nr_to_meta(int nr)
    105{
    106	if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR))
    107		return xa_load(&syscalls_metadata_sparse, (unsigned long)nr);
    108
    109	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
    110		return NULL;
    111
    112	return syscalls_metadata[nr];
    113}
    114
    115const char *get_syscall_name(int syscall)
    116{
    117	struct syscall_metadata *entry;
    118
    119	entry = syscall_nr_to_meta(syscall);
    120	if (!entry)
    121		return NULL;
    122
    123	return entry->name;
    124}
    125
    126static enum print_line_t
    127print_syscall_enter(struct trace_iterator *iter, int flags,
    128		    struct trace_event *event)
    129{
    130	struct trace_array *tr = iter->tr;
    131	struct trace_seq *s = &iter->seq;
    132	struct trace_entry *ent = iter->ent;
    133	struct syscall_trace_enter *trace;
    134	struct syscall_metadata *entry;
    135	int i, syscall;
    136
    137	trace = (typeof(trace))ent;
    138	syscall = trace->nr;
    139	entry = syscall_nr_to_meta(syscall);
    140
    141	if (!entry)
    142		goto end;
    143
    144	if (entry->enter_event->event.type != ent->type) {
    145		WARN_ON_ONCE(1);
    146		goto end;
    147	}
    148
    149	trace_seq_printf(s, "%s(", entry->name);
    150
    151	for (i = 0; i < entry->nb_args; i++) {
    152
    153		if (trace_seq_has_overflowed(s))
    154			goto end;
    155
    156		/* parameter types */
    157		if (tr && tr->trace_flags & TRACE_ITER_VERBOSE)
    158			trace_seq_printf(s, "%s ", entry->types[i]);
    159
    160		/* parameter values */
    161		trace_seq_printf(s, "%s: %lx%s", entry->args[i],
    162				 trace->args[i],
    163				 i == entry->nb_args - 1 ? "" : ", ");
    164	}
    165
    166	trace_seq_putc(s, ')');
    167end:
    168	trace_seq_putc(s, '\n');
    169
    170	return trace_handle_return(s);
    171}
    172
    173static enum print_line_t
    174print_syscall_exit(struct trace_iterator *iter, int flags,
    175		   struct trace_event *event)
    176{
    177	struct trace_seq *s = &iter->seq;
    178	struct trace_entry *ent = iter->ent;
    179	struct syscall_trace_exit *trace;
    180	int syscall;
    181	struct syscall_metadata *entry;
    182
    183	trace = (typeof(trace))ent;
    184	syscall = trace->nr;
    185	entry = syscall_nr_to_meta(syscall);
    186
    187	if (!entry) {
    188		trace_seq_putc(s, '\n');
    189		goto out;
    190	}
    191
    192	if (entry->exit_event->event.type != ent->type) {
    193		WARN_ON_ONCE(1);
    194		return TRACE_TYPE_UNHANDLED;
    195	}
    196
    197	trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
    198				trace->ret);
    199
    200 out:
    201	return trace_handle_return(s);
    202}
    203
    204extern char *__bad_type_size(void);
    205
    206#define SYSCALL_FIELD(_type, _name) {					\
    207	.type = #_type, .name = #_name,					\
    208	.size = sizeof(_type), .align = __alignof__(_type),		\
    209	.is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }
    210
    211static int __init
    212__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
    213{
    214	int i;
    215	int pos = 0;
    216
    217	/* When len=0, we just calculate the needed length */
    218#define LEN_OR_ZERO (len ? len - pos : 0)
    219
    220	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
    221	for (i = 0; i < entry->nb_args; i++) {
    222		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
    223				entry->args[i], sizeof(unsigned long),
    224				i == entry->nb_args - 1 ? "" : ", ");
    225	}
    226	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
    227
    228	for (i = 0; i < entry->nb_args; i++) {
    229		pos += snprintf(buf + pos, LEN_OR_ZERO,
    230				", ((unsigned long)(REC->%s))", entry->args[i]);
    231	}
    232
    233#undef LEN_OR_ZERO
    234
    235	/* return the length of print_fmt */
    236	return pos;
    237}
    238
    239static int __init set_syscall_print_fmt(struct trace_event_call *call)
    240{
    241	char *print_fmt;
    242	int len;
    243	struct syscall_metadata *entry = call->data;
    244
    245	if (entry->enter_event != call) {
    246		call->print_fmt = "\"0x%lx\", REC->ret";
    247		return 0;
    248	}
    249
    250	/* First: called with 0 length to calculate the needed length */
    251	len = __set_enter_print_fmt(entry, NULL, 0);
    252
    253	print_fmt = kmalloc(len + 1, GFP_KERNEL);
    254	if (!print_fmt)
    255		return -ENOMEM;
    256
    257	/* Second: actually write the @print_fmt */
    258	__set_enter_print_fmt(entry, print_fmt, len + 1);
    259	call->print_fmt = print_fmt;
    260
    261	return 0;
    262}
    263
    264static void __init free_syscall_print_fmt(struct trace_event_call *call)
    265{
    266	struct syscall_metadata *entry = call->data;
    267
    268	if (entry->enter_event == call)
    269		kfree(call->print_fmt);
    270}
    271
    272static int __init syscall_enter_define_fields(struct trace_event_call *call)
    273{
    274	struct syscall_trace_enter trace;
    275	struct syscall_metadata *meta = call->data;
    276	int offset = offsetof(typeof(trace), args);
    277	int ret = 0;
    278	int i;
    279
    280	for (i = 0; i < meta->nb_args; i++) {
    281		ret = trace_define_field(call, meta->types[i],
    282					 meta->args[i], offset,
    283					 sizeof(unsigned long), 0,
    284					 FILTER_OTHER);
    285		if (ret)
    286			break;
    287		offset += sizeof(unsigned long);
    288	}
    289
    290	return ret;
    291}
    292
    293static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
    294{
    295	struct trace_array *tr = data;
    296	struct trace_event_file *trace_file;
    297	struct syscall_trace_enter *entry;
    298	struct syscall_metadata *sys_data;
    299	struct trace_event_buffer fbuffer;
    300	unsigned long args[6];
    301	int syscall_nr;
    302	int size;
    303
    304	syscall_nr = trace_get_syscall_nr(current, regs);
    305	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
    306		return;
    307
    308	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
    309	trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
    310	if (!trace_file)
    311		return;
    312
    313	if (trace_trigger_soft_disabled(trace_file))
    314		return;
    315
    316	sys_data = syscall_nr_to_meta(syscall_nr);
    317	if (!sys_data)
    318		return;
    319
    320	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
    321
    322	entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
    323	if (!entry)
    324		return;
    325
    326	entry = ring_buffer_event_data(fbuffer.event);
    327	entry->nr = syscall_nr;
    328	syscall_get_arguments(current, regs, args);
    329	memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
    330
    331	trace_event_buffer_commit(&fbuffer);
    332}
    333
    334static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
    335{
    336	struct trace_array *tr = data;
    337	struct trace_event_file *trace_file;
    338	struct syscall_trace_exit *entry;
    339	struct syscall_metadata *sys_data;
    340	struct trace_event_buffer fbuffer;
    341	int syscall_nr;
    342
    343	syscall_nr = trace_get_syscall_nr(current, regs);
    344	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
    345		return;
    346
    347	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
    348	trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
    349	if (!trace_file)
    350		return;
    351
    352	if (trace_trigger_soft_disabled(trace_file))
    353		return;
    354
    355	sys_data = syscall_nr_to_meta(syscall_nr);
    356	if (!sys_data)
    357		return;
    358
    359	entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry));
    360	if (!entry)
    361		return;
    362
    363	entry = ring_buffer_event_data(fbuffer.event);
    364	entry->nr = syscall_nr;
    365	entry->ret = syscall_get_return_value(current, regs);
    366
    367	trace_event_buffer_commit(&fbuffer);
    368}
    369
    370static int reg_event_syscall_enter(struct trace_event_file *file,
    371				   struct trace_event_call *call)
    372{
    373	struct trace_array *tr = file->tr;
    374	int ret = 0;
    375	int num;
    376
    377	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    378	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
    379		return -ENOSYS;
    380	mutex_lock(&syscall_trace_lock);
    381	if (!tr->sys_refcount_enter)
    382		ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
    383	if (!ret) {
    384		rcu_assign_pointer(tr->enter_syscall_files[num], file);
    385		tr->sys_refcount_enter++;
    386	}
    387	mutex_unlock(&syscall_trace_lock);
    388	return ret;
    389}
    390
    391static void unreg_event_syscall_enter(struct trace_event_file *file,
    392				      struct trace_event_call *call)
    393{
    394	struct trace_array *tr = file->tr;
    395	int num;
    396
    397	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    398	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
    399		return;
    400	mutex_lock(&syscall_trace_lock);
    401	tr->sys_refcount_enter--;
    402	RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
    403	if (!tr->sys_refcount_enter)
    404		unregister_trace_sys_enter(ftrace_syscall_enter, tr);
    405	mutex_unlock(&syscall_trace_lock);
    406}
    407
    408static int reg_event_syscall_exit(struct trace_event_file *file,
    409				  struct trace_event_call *call)
    410{
    411	struct trace_array *tr = file->tr;
    412	int ret = 0;
    413	int num;
    414
    415	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    416	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
    417		return -ENOSYS;
    418	mutex_lock(&syscall_trace_lock);
    419	if (!tr->sys_refcount_exit)
    420		ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
    421	if (!ret) {
    422		rcu_assign_pointer(tr->exit_syscall_files[num], file);
    423		tr->sys_refcount_exit++;
    424	}
    425	mutex_unlock(&syscall_trace_lock);
    426	return ret;
    427}
    428
    429static void unreg_event_syscall_exit(struct trace_event_file *file,
    430				     struct trace_event_call *call)
    431{
    432	struct trace_array *tr = file->tr;
    433	int num;
    434
    435	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    436	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
    437		return;
    438	mutex_lock(&syscall_trace_lock);
    439	tr->sys_refcount_exit--;
    440	RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
    441	if (!tr->sys_refcount_exit)
    442		unregister_trace_sys_exit(ftrace_syscall_exit, tr);
    443	mutex_unlock(&syscall_trace_lock);
    444}
    445
    446static int __init init_syscall_trace(struct trace_event_call *call)
    447{
    448	int id;
    449	int num;
    450
    451	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    452	if (num < 0 || num >= NR_syscalls) {
    453		pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
    454				((struct syscall_metadata *)call->data)->name);
    455		return -ENOSYS;
    456	}
    457
    458	if (set_syscall_print_fmt(call) < 0)
    459		return -ENOMEM;
    460
    461	id = trace_event_raw_init(call);
    462
    463	if (id < 0) {
    464		free_syscall_print_fmt(call);
    465		return id;
    466	}
    467
    468	return id;
    469}
    470
    471static struct trace_event_fields __refdata syscall_enter_fields_array[] = {
    472	SYSCALL_FIELD(int, __syscall_nr),
    473	{ .type = TRACE_FUNCTION_TYPE,
    474	  .define_fields = syscall_enter_define_fields },
    475	{}
    476};
    477
    478struct trace_event_functions enter_syscall_print_funcs = {
    479	.trace		= print_syscall_enter,
    480};
    481
    482struct trace_event_functions exit_syscall_print_funcs = {
    483	.trace		= print_syscall_exit,
    484};
    485
    486struct trace_event_class __refdata event_class_syscall_enter = {
    487	.system		= "syscalls",
    488	.reg		= syscall_enter_register,
    489	.fields_array	= syscall_enter_fields_array,
    490	.get_fields	= syscall_get_enter_fields,
    491	.raw_init	= init_syscall_trace,
    492};
    493
    494struct trace_event_class __refdata event_class_syscall_exit = {
    495	.system		= "syscalls",
    496	.reg		= syscall_exit_register,
    497	.fields_array	= (struct trace_event_fields[]){
    498		SYSCALL_FIELD(int, __syscall_nr),
    499		SYSCALL_FIELD(long, ret),
    500		{}
    501	},
    502	.fields		= LIST_HEAD_INIT(event_class_syscall_exit.fields),
    503	.raw_init	= init_syscall_trace,
    504};
    505
    506unsigned long __init __weak arch_syscall_addr(int nr)
    507{
    508	return (unsigned long)sys_call_table[nr];
    509}
    510
    511void __init init_ftrace_syscalls(void)
    512{
    513	struct syscall_metadata *meta;
    514	unsigned long addr;
    515	int i;
    516	void *ret;
    517
    518	if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
    519		syscalls_metadata = kcalloc(NR_syscalls,
    520					sizeof(*syscalls_metadata),
    521					GFP_KERNEL);
    522		if (!syscalls_metadata) {
    523			WARN_ON(1);
    524			return;
    525		}
    526	}
    527
    528	for (i = 0; i < NR_syscalls; i++) {
    529		addr = arch_syscall_addr(i);
    530		meta = find_syscall_meta(addr);
    531		if (!meta)
    532			continue;
    533
    534		meta->syscall_nr = i;
    535
    536		if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
    537			syscalls_metadata[i] = meta;
    538		} else {
    539			ret = xa_store(&syscalls_metadata_sparse, i, meta,
    540					GFP_KERNEL);
    541			WARN(xa_is_err(ret),
    542				"Syscall memory allocation failed\n");
    543		}
    544
    545	}
    546}
    547
    548#ifdef CONFIG_PERF_EVENTS
    549
    550static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
    551static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
    552static int sys_perf_refcount_enter;
    553static int sys_perf_refcount_exit;
    554
    555static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs,
    556			       struct syscall_metadata *sys_data,
    557			       struct syscall_trace_enter *rec)
    558{
    559	struct syscall_tp_t {
    560		unsigned long long regs;
    561		unsigned long syscall_nr;
    562		unsigned long args[SYSCALL_DEFINE_MAXARGS];
    563	} param;
    564	int i;
    565
    566	*(struct pt_regs **)&param = regs;
    567	param.syscall_nr = rec->nr;
    568	for (i = 0; i < sys_data->nb_args; i++)
    569		param.args[i] = rec->args[i];
    570	return trace_call_bpf(call, &param);
    571}
    572
    573static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
    574{
    575	struct syscall_metadata *sys_data;
    576	struct syscall_trace_enter *rec;
    577	struct hlist_head *head;
    578	unsigned long args[6];
    579	bool valid_prog_array;
    580	int syscall_nr;
    581	int rctx;
    582	int size;
    583
    584	syscall_nr = trace_get_syscall_nr(current, regs);
    585	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
    586		return;
    587	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
    588		return;
    589
    590	sys_data = syscall_nr_to_meta(syscall_nr);
    591	if (!sys_data)
    592		return;
    593
    594	head = this_cpu_ptr(sys_data->enter_event->perf_events);
    595	valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
    596	if (!valid_prog_array && hlist_empty(head))
    597		return;
    598
    599	/* get the size after alignment with the u32 buffer size field */
    600	size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
    601	size = ALIGN(size + sizeof(u32), sizeof(u64));
    602	size -= sizeof(u32);
    603
    604	rec = perf_trace_buf_alloc(size, NULL, &rctx);
    605	if (!rec)
    606		return;
    607
    608	rec->nr = syscall_nr;
    609	syscall_get_arguments(current, regs, args);
    610	memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
    611
    612	if ((valid_prog_array &&
    613	     !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
    614	    hlist_empty(head)) {
    615		perf_swevent_put_recursion_context(rctx);
    616		return;
    617	}
    618
    619	perf_trace_buf_submit(rec, size, rctx,
    620			      sys_data->enter_event->event.type, 1, regs,
    621			      head, NULL);
    622}
    623
    624static int perf_sysenter_enable(struct trace_event_call *call)
    625{
    626	int ret = 0;
    627	int num;
    628
    629	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    630
    631	mutex_lock(&syscall_trace_lock);
    632	if (!sys_perf_refcount_enter)
    633		ret = register_trace_sys_enter(perf_syscall_enter, NULL);
    634	if (ret) {
    635		pr_info("event trace: Could not activate syscall entry trace point");
    636	} else {
    637		set_bit(num, enabled_perf_enter_syscalls);
    638		sys_perf_refcount_enter++;
    639	}
    640	mutex_unlock(&syscall_trace_lock);
    641	return ret;
    642}
    643
    644static void perf_sysenter_disable(struct trace_event_call *call)
    645{
    646	int num;
    647
    648	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    649
    650	mutex_lock(&syscall_trace_lock);
    651	sys_perf_refcount_enter--;
    652	clear_bit(num, enabled_perf_enter_syscalls);
    653	if (!sys_perf_refcount_enter)
    654		unregister_trace_sys_enter(perf_syscall_enter, NULL);
    655	mutex_unlock(&syscall_trace_lock);
    656}
    657
    658static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
    659			      struct syscall_trace_exit *rec)
    660{
    661	struct syscall_tp_t {
    662		unsigned long long regs;
    663		unsigned long syscall_nr;
    664		unsigned long ret;
    665	} param;
    666
    667	*(struct pt_regs **)&param = regs;
    668	param.syscall_nr = rec->nr;
    669	param.ret = rec->ret;
    670	return trace_call_bpf(call, &param);
    671}
    672
    673static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
    674{
    675	struct syscall_metadata *sys_data;
    676	struct syscall_trace_exit *rec;
    677	struct hlist_head *head;
    678	bool valid_prog_array;
    679	int syscall_nr;
    680	int rctx;
    681	int size;
    682
    683	syscall_nr = trace_get_syscall_nr(current, regs);
    684	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
    685		return;
    686	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
    687		return;
    688
    689	sys_data = syscall_nr_to_meta(syscall_nr);
    690	if (!sys_data)
    691		return;
    692
    693	head = this_cpu_ptr(sys_data->exit_event->perf_events);
    694	valid_prog_array = bpf_prog_array_valid(sys_data->exit_event);
    695	if (!valid_prog_array && hlist_empty(head))
    696		return;
    697
    698	/* We can probably do that at build time */
    699	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
    700	size -= sizeof(u32);
    701
    702	rec = perf_trace_buf_alloc(size, NULL, &rctx);
    703	if (!rec)
    704		return;
    705
    706	rec->nr = syscall_nr;
    707	rec->ret = syscall_get_return_value(current, regs);
    708
    709	if ((valid_prog_array &&
    710	     !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) ||
    711	    hlist_empty(head)) {
    712		perf_swevent_put_recursion_context(rctx);
    713		return;
    714	}
    715
    716	perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
    717			      1, regs, head, NULL);
    718}
    719
    720static int perf_sysexit_enable(struct trace_event_call *call)
    721{
    722	int ret = 0;
    723	int num;
    724
    725	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    726
    727	mutex_lock(&syscall_trace_lock);
    728	if (!sys_perf_refcount_exit)
    729		ret = register_trace_sys_exit(perf_syscall_exit, NULL);
    730	if (ret) {
    731		pr_info("event trace: Could not activate syscall exit trace point");
    732	} else {
    733		set_bit(num, enabled_perf_exit_syscalls);
    734		sys_perf_refcount_exit++;
    735	}
    736	mutex_unlock(&syscall_trace_lock);
    737	return ret;
    738}
    739
    740static void perf_sysexit_disable(struct trace_event_call *call)
    741{
    742	int num;
    743
    744	num = ((struct syscall_metadata *)call->data)->syscall_nr;
    745
    746	mutex_lock(&syscall_trace_lock);
    747	sys_perf_refcount_exit--;
    748	clear_bit(num, enabled_perf_exit_syscalls);
    749	if (!sys_perf_refcount_exit)
    750		unregister_trace_sys_exit(perf_syscall_exit, NULL);
    751	mutex_unlock(&syscall_trace_lock);
    752}
    753
    754#endif /* CONFIG_PERF_EVENTS */
    755
    756static int syscall_enter_register(struct trace_event_call *event,
    757				 enum trace_reg type, void *data)
    758{
    759	struct trace_event_file *file = data;
    760
    761	switch (type) {
    762	case TRACE_REG_REGISTER:
    763		return reg_event_syscall_enter(file, event);
    764	case TRACE_REG_UNREGISTER:
    765		unreg_event_syscall_enter(file, event);
    766		return 0;
    767
    768#ifdef CONFIG_PERF_EVENTS
    769	case TRACE_REG_PERF_REGISTER:
    770		return perf_sysenter_enable(event);
    771	case TRACE_REG_PERF_UNREGISTER:
    772		perf_sysenter_disable(event);
    773		return 0;
    774	case TRACE_REG_PERF_OPEN:
    775	case TRACE_REG_PERF_CLOSE:
    776	case TRACE_REG_PERF_ADD:
    777	case TRACE_REG_PERF_DEL:
    778		return 0;
    779#endif
    780	}
    781	return 0;
    782}
    783
    784static int syscall_exit_register(struct trace_event_call *event,
    785				 enum trace_reg type, void *data)
    786{
    787	struct trace_event_file *file = data;
    788
    789	switch (type) {
    790	case TRACE_REG_REGISTER:
    791		return reg_event_syscall_exit(file, event);
    792	case TRACE_REG_UNREGISTER:
    793		unreg_event_syscall_exit(file, event);
    794		return 0;
    795
    796#ifdef CONFIG_PERF_EVENTS
    797	case TRACE_REG_PERF_REGISTER:
    798		return perf_sysexit_enable(event);
    799	case TRACE_REG_PERF_UNREGISTER:
    800		perf_sysexit_disable(event);
    801		return 0;
    802	case TRACE_REG_PERF_OPEN:
    803	case TRACE_REG_PERF_CLOSE:
    804	case TRACE_REG_PERF_ADD:
    805	case TRACE_REG_PERF_DEL:
    806		return 0;
    807#endif
    808	}
    809	return 0;
    810}