cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

trace_events.c (96489B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * event tracer
      4 *
      5 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
      6 *
      7 *  - Added format output of fields of the trace point.
      8 *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
      9 *
     10 */
     11
     12#define pr_fmt(fmt) fmt
     13
     14#include <linux/workqueue.h>
     15#include <linux/security.h>
     16#include <linux/spinlock.h>
     17#include <linux/kthread.h>
     18#include <linux/tracefs.h>
     19#include <linux/uaccess.h>
     20#include <linux/module.h>
     21#include <linux/ctype.h>
     22#include <linux/sort.h>
     23#include <linux/slab.h>
     24#include <linux/delay.h>
     25
     26#include <trace/events/sched.h>
     27#include <trace/syscall.h>
     28
     29#include <asm/setup.h>
     30
     31#include "trace_output.h"
     32
     33#undef TRACE_SYSTEM
     34#define TRACE_SYSTEM "TRACE_SYSTEM"
     35
     36DEFINE_MUTEX(event_mutex);
     37
     38LIST_HEAD(ftrace_events);
     39static LIST_HEAD(ftrace_generic_fields);
     40static LIST_HEAD(ftrace_common_fields);
     41static bool eventdir_initialized;
     42
     43static LIST_HEAD(module_strings);
     44
     45struct module_string {
     46	struct list_head	next;
     47	struct module		*module;
     48	char			*str;
     49};
     50
     51#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
     52
     53static struct kmem_cache *field_cachep;
     54static struct kmem_cache *file_cachep;
     55
     56static inline int system_refcount(struct event_subsystem *system)
     57{
     58	return system->ref_count;
     59}
     60
     61static int system_refcount_inc(struct event_subsystem *system)
     62{
     63	return system->ref_count++;
     64}
     65
     66static int system_refcount_dec(struct event_subsystem *system)
     67{
     68	return --system->ref_count;
     69}
     70
     71/* Double loops, do not use break, only goto's work */
     72#define do_for_each_event_file(tr, file)			\
     73	list_for_each_entry(tr, &ftrace_trace_arrays, list) {	\
     74		list_for_each_entry(file, &tr->events, list)
     75
     76#define do_for_each_event_file_safe(tr, file)			\
     77	list_for_each_entry(tr, &ftrace_trace_arrays, list) {	\
     78		struct trace_event_file *___n;				\
     79		list_for_each_entry_safe(file, ___n, &tr->events, list)
     80
     81#define while_for_each_event_file()		\
     82	}
     83
     84static struct ftrace_event_field *
     85__find_event_field(struct list_head *head, char *name)
     86{
     87	struct ftrace_event_field *field;
     88
     89	list_for_each_entry(field, head, link) {
     90		if (!strcmp(field->name, name))
     91			return field;
     92	}
     93
     94	return NULL;
     95}
     96
     97struct ftrace_event_field *
     98trace_find_event_field(struct trace_event_call *call, char *name)
     99{
    100	struct ftrace_event_field *field;
    101	struct list_head *head;
    102
    103	head = trace_get_fields(call);
    104	field = __find_event_field(head, name);
    105	if (field)
    106		return field;
    107
    108	field = __find_event_field(&ftrace_generic_fields, name);
    109	if (field)
    110		return field;
    111
    112	return __find_event_field(&ftrace_common_fields, name);
    113}
    114
    115static int __trace_define_field(struct list_head *head, const char *type,
    116				const char *name, int offset, int size,
    117				int is_signed, int filter_type)
    118{
    119	struct ftrace_event_field *field;
    120
    121	field = kmem_cache_alloc(field_cachep, GFP_TRACE);
    122	if (!field)
    123		return -ENOMEM;
    124
    125	field->name = name;
    126	field->type = type;
    127
    128	if (filter_type == FILTER_OTHER)
    129		field->filter_type = filter_assign_type(type);
    130	else
    131		field->filter_type = filter_type;
    132
    133	field->offset = offset;
    134	field->size = size;
    135	field->is_signed = is_signed;
    136
    137	list_add(&field->link, head);
    138
    139	return 0;
    140}
    141
    142int trace_define_field(struct trace_event_call *call, const char *type,
    143		       const char *name, int offset, int size, int is_signed,
    144		       int filter_type)
    145{
    146	struct list_head *head;
    147
    148	if (WARN_ON(!call->class))
    149		return 0;
    150
    151	head = trace_get_fields(call);
    152	return __trace_define_field(head, type, name, offset, size,
    153				    is_signed, filter_type);
    154}
    155EXPORT_SYMBOL_GPL(trace_define_field);
    156
    157#define __generic_field(type, item, filter_type)			\
    158	ret = __trace_define_field(&ftrace_generic_fields, #type,	\
    159				   #item, 0, 0, is_signed_type(type),	\
    160				   filter_type);			\
    161	if (ret)							\
    162		return ret;
    163
    164#define __common_field(type, item)					\
    165	ret = __trace_define_field(&ftrace_common_fields, #type,	\
    166				   "common_" #item,			\
    167				   offsetof(typeof(ent), item),		\
    168				   sizeof(ent.item),			\
    169				   is_signed_type(type), FILTER_OTHER);	\
    170	if (ret)							\
    171		return ret;
    172
    173static int trace_define_generic_fields(void)
    174{
    175	int ret;
    176
    177	__generic_field(int, CPU, FILTER_CPU);
    178	__generic_field(int, cpu, FILTER_CPU);
    179	__generic_field(char *, COMM, FILTER_COMM);
    180	__generic_field(char *, comm, FILTER_COMM);
    181
    182	return ret;
    183}
    184
    185static int trace_define_common_fields(void)
    186{
    187	int ret;
    188	struct trace_entry ent;
    189
    190	__common_field(unsigned short, type);
    191	__common_field(unsigned char, flags);
    192	/* Holds both preempt_count and migrate_disable */
    193	__common_field(unsigned char, preempt_count);
    194	__common_field(int, pid);
    195
    196	return ret;
    197}
    198
    199static void trace_destroy_fields(struct trace_event_call *call)
    200{
    201	struct ftrace_event_field *field, *next;
    202	struct list_head *head;
    203
    204	head = trace_get_fields(call);
    205	list_for_each_entry_safe(field, next, head, link) {
    206		list_del(&field->link);
    207		kmem_cache_free(field_cachep, field);
    208	}
    209}
    210
    211/*
    212 * run-time version of trace_event_get_offsets_<call>() that returns the last
    213 * accessible offset of trace fields excluding __dynamic_array bytes
    214 */
    215int trace_event_get_offsets(struct trace_event_call *call)
    216{
    217	struct ftrace_event_field *tail;
    218	struct list_head *head;
    219
    220	head = trace_get_fields(call);
    221	/*
    222	 * head->next points to the last field with the largest offset,
    223	 * since it was added last by trace_define_field()
    224	 */
    225	tail = list_first_entry(head, struct ftrace_event_field, link);
    226	return tail->offset + tail->size;
    227}
    228
    229/*
    230 * Check if the referenced field is an array and return true,
    231 * as arrays are OK to dereference.
    232 */
    233static bool test_field(const char *fmt, struct trace_event_call *call)
    234{
    235	struct trace_event_fields *field = call->class->fields_array;
    236	const char *array_descriptor;
    237	const char *p = fmt;
    238	int len;
    239
    240	if (!(len = str_has_prefix(fmt, "REC->")))
    241		return false;
    242	fmt += len;
    243	for (p = fmt; *p; p++) {
    244		if (!isalnum(*p) && *p != '_')
    245			break;
    246	}
    247	len = p - fmt;
    248
    249	for (; field->type; field++) {
    250		if (strncmp(field->name, fmt, len) ||
    251		    field->name[len])
    252			continue;
    253		array_descriptor = strchr(field->type, '[');
    254		/* This is an array and is OK to dereference. */
    255		return array_descriptor != NULL;
    256	}
    257	return false;
    258}
    259
    260/*
    261 * Examine the print fmt of the event looking for unsafe dereference
    262 * pointers using %p* that could be recorded in the trace event and
    263 * much later referenced after the pointer was freed. Dereferencing
    264 * pointers are OK, if it is dereferenced into the event itself.
    265 */
    266static void test_event_printk(struct trace_event_call *call)
    267{
    268	u64 dereference_flags = 0;
    269	bool first = true;
    270	const char *fmt, *c, *r, *a;
    271	int parens = 0;
    272	char in_quote = 0;
    273	int start_arg = 0;
    274	int arg = 0;
    275	int i;
    276
    277	fmt = call->print_fmt;
    278
    279	if (!fmt)
    280		return;
    281
    282	for (i = 0; fmt[i]; i++) {
    283		switch (fmt[i]) {
    284		case '\\':
    285			i++;
    286			if (!fmt[i])
    287				return;
    288			continue;
    289		case '"':
    290		case '\'':
    291			/*
    292			 * The print fmt starts with a string that
    293			 * is processed first to find %p* usage,
    294			 * then after the first string, the print fmt
    295			 * contains arguments that are used to check
    296			 * if the dereferenced %p* usage is safe.
    297			 */
    298			if (first) {
    299				if (fmt[i] == '\'')
    300					continue;
    301				if (in_quote) {
    302					arg = 0;
    303					first = false;
    304					/*
    305					 * If there was no %p* uses
    306					 * the fmt is OK.
    307					 */
    308					if (!dereference_flags)
    309						return;
    310				}
    311			}
    312			if (in_quote) {
    313				if (in_quote == fmt[i])
    314					in_quote = 0;
    315			} else {
    316				in_quote = fmt[i];
    317			}
    318			continue;
    319		case '%':
    320			if (!first || !in_quote)
    321				continue;
    322			i++;
    323			if (!fmt[i])
    324				return;
    325			switch (fmt[i]) {
    326			case '%':
    327				continue;
    328			case 'p':
    329				/* Find dereferencing fields */
    330				switch (fmt[i + 1]) {
    331				case 'B': case 'R': case 'r':
    332				case 'b': case 'M': case 'm':
    333				case 'I': case 'i': case 'E':
    334				case 'U': case 'V': case 'N':
    335				case 'a': case 'd': case 'D':
    336				case 'g': case 't': case 'C':
    337				case 'O': case 'f':
    338					if (WARN_ONCE(arg == 63,
    339						      "Too many args for event: %s",
    340						      trace_event_name(call)))
    341						return;
    342					dereference_flags |= 1ULL << arg;
    343				}
    344				break;
    345			default:
    346			{
    347				bool star = false;
    348				int j;
    349
    350				/* Increment arg if %*s exists. */
    351				for (j = 0; fmt[i + j]; j++) {
    352					if (isdigit(fmt[i + j]) ||
    353					    fmt[i + j] == '.')
    354						continue;
    355					if (fmt[i + j] == '*') {
    356						star = true;
    357						continue;
    358					}
    359					if ((fmt[i + j] == 's') && star)
    360						arg++;
    361					break;
    362				}
    363				break;
    364			} /* default */
    365
    366			} /* switch */
    367			arg++;
    368			continue;
    369		case '(':
    370			if (in_quote)
    371				continue;
    372			parens++;
    373			continue;
    374		case ')':
    375			if (in_quote)
    376				continue;
    377			parens--;
    378			if (WARN_ONCE(parens < 0,
    379				      "Paren mismatch for event: %s\narg='%s'\n%*s",
    380				      trace_event_name(call),
    381				      fmt + start_arg,
    382				      (i - start_arg) + 5, "^"))
    383				return;
    384			continue;
    385		case ',':
    386			if (in_quote || parens)
    387				continue;
    388			i++;
    389			while (isspace(fmt[i]))
    390				i++;
    391			start_arg = i;
    392			if (!(dereference_flags & (1ULL << arg)))
    393				goto next_arg;
    394
    395			/* Find the REC-> in the argument */
    396			c = strchr(fmt + i, ',');
    397			r = strstr(fmt + i, "REC->");
    398			if (r && (!c || r < c)) {
    399				/*
    400				 * Addresses of events on the buffer,
    401				 * or an array on the buffer is
    402				 * OK to dereference.
    403				 * There's ways to fool this, but
    404				 * this is to catch common mistakes,
    405				 * not malicious code.
    406				 */
    407				a = strchr(fmt + i, '&');
    408				if ((a && (a < r)) || test_field(r, call))
    409					dereference_flags &= ~(1ULL << arg);
    410			} else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
    411				   (!c || r < c)) {
    412				dereference_flags &= ~(1ULL << arg);
    413			} else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
    414				   (!c || r < c)) {
    415				dereference_flags &= ~(1ULL << arg);
    416			}
    417
    418		next_arg:
    419			i--;
    420			arg++;
    421		}
    422	}
    423
    424	/*
    425	 * If you triggered the below warning, the trace event reported
    426	 * uses an unsafe dereference pointer %p*. As the data stored
    427	 * at the trace event time may no longer exist when the trace
    428	 * event is printed, dereferencing to the original source is
    429	 * unsafe. The source of the dereference must be copied into the
    430	 * event itself, and the dereference must access the copy instead.
    431	 */
    432	if (WARN_ON_ONCE(dereference_flags)) {
    433		arg = 1;
    434		while (!(dereference_flags & 1)) {
    435			dereference_flags >>= 1;
    436			arg++;
    437		}
    438		pr_warn("event %s has unsafe dereference of argument %d\n",
    439			trace_event_name(call), arg);
    440		pr_warn("print_fmt: %s\n", fmt);
    441	}
    442}
    443
    444int trace_event_raw_init(struct trace_event_call *call)
    445{
    446	int id;
    447
    448	id = register_trace_event(&call->event);
    449	if (!id)
    450		return -ENODEV;
    451
    452	test_event_printk(call);
    453
    454	return 0;
    455}
    456EXPORT_SYMBOL_GPL(trace_event_raw_init);
    457
    458bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
    459{
    460	struct trace_array *tr = trace_file->tr;
    461	struct trace_array_cpu *data;
    462	struct trace_pid_list *no_pid_list;
    463	struct trace_pid_list *pid_list;
    464
    465	pid_list = rcu_dereference_raw(tr->filtered_pids);
    466	no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
    467
    468	if (!pid_list && !no_pid_list)
    469		return false;
    470
    471	data = this_cpu_ptr(tr->array_buffer.data);
    472
    473	return data->ignore_pid;
    474}
    475EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
    476
    477void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
    478				 struct trace_event_file *trace_file,
    479				 unsigned long len)
    480{
    481	struct trace_event_call *event_call = trace_file->event_call;
    482
    483	if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
    484	    trace_event_ignore_this_pid(trace_file))
    485		return NULL;
    486
    487	/*
    488	 * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
    489	 * preemption (adding one to the preempt_count). Since we are
    490	 * interested in the preempt_count at the time the tracepoint was
    491	 * hit, we need to subtract one to offset the increment.
    492	 */
    493	fbuffer->trace_ctx = tracing_gen_ctx_dec();
    494	fbuffer->trace_file = trace_file;
    495
    496	fbuffer->event =
    497		trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
    498						event_call->event.type, len,
    499						fbuffer->trace_ctx);
    500	if (!fbuffer->event)
    501		return NULL;
    502
    503	fbuffer->regs = NULL;
    504	fbuffer->entry = ring_buffer_event_data(fbuffer->event);
    505	return fbuffer->entry;
    506}
    507EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
    508
    509int trace_event_reg(struct trace_event_call *call,
    510		    enum trace_reg type, void *data)
    511{
    512	struct trace_event_file *file = data;
    513
    514	WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
    515	switch (type) {
    516	case TRACE_REG_REGISTER:
    517		return tracepoint_probe_register(call->tp,
    518						 call->class->probe,
    519						 file);
    520	case TRACE_REG_UNREGISTER:
    521		tracepoint_probe_unregister(call->tp,
    522					    call->class->probe,
    523					    file);
    524		return 0;
    525
    526#ifdef CONFIG_PERF_EVENTS
    527	case TRACE_REG_PERF_REGISTER:
    528		return tracepoint_probe_register(call->tp,
    529						 call->class->perf_probe,
    530						 call);
    531	case TRACE_REG_PERF_UNREGISTER:
    532		tracepoint_probe_unregister(call->tp,
    533					    call->class->perf_probe,
    534					    call);
    535		return 0;
    536	case TRACE_REG_PERF_OPEN:
    537	case TRACE_REG_PERF_CLOSE:
    538	case TRACE_REG_PERF_ADD:
    539	case TRACE_REG_PERF_DEL:
    540		return 0;
    541#endif
    542	}
    543	return 0;
    544}
    545EXPORT_SYMBOL_GPL(trace_event_reg);
    546
    547void trace_event_enable_cmd_record(bool enable)
    548{
    549	struct trace_event_file *file;
    550	struct trace_array *tr;
    551
    552	lockdep_assert_held(&event_mutex);
    553
    554	do_for_each_event_file(tr, file) {
    555
    556		if (!(file->flags & EVENT_FILE_FL_ENABLED))
    557			continue;
    558
    559		if (enable) {
    560			tracing_start_cmdline_record();
    561			set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
    562		} else {
    563			tracing_stop_cmdline_record();
    564			clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
    565		}
    566	} while_for_each_event_file();
    567}
    568
    569void trace_event_enable_tgid_record(bool enable)
    570{
    571	struct trace_event_file *file;
    572	struct trace_array *tr;
    573
    574	lockdep_assert_held(&event_mutex);
    575
    576	do_for_each_event_file(tr, file) {
    577		if (!(file->flags & EVENT_FILE_FL_ENABLED))
    578			continue;
    579
    580		if (enable) {
    581			tracing_start_tgid_record();
    582			set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
    583		} else {
    584			tracing_stop_tgid_record();
    585			clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
    586				  &file->flags);
    587		}
    588	} while_for_each_event_file();
    589}
    590
    591static int __ftrace_event_enable_disable(struct trace_event_file *file,
    592					 int enable, int soft_disable)
    593{
    594	struct trace_event_call *call = file->event_call;
    595	struct trace_array *tr = file->tr;
    596	unsigned long file_flags = file->flags;
    597	int ret = 0;
    598	int disable;
    599
    600	switch (enable) {
    601	case 0:
    602		/*
    603		 * When soft_disable is set and enable is cleared, the sm_ref
    604		 * reference counter is decremented. If it reaches 0, we want
    605		 * to clear the SOFT_DISABLED flag but leave the event in the
    606		 * state that it was. That is, if the event was enabled and
    607		 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
    608		 * is set we do not want the event to be enabled before we
    609		 * clear the bit.
    610		 *
    611		 * When soft_disable is not set but the SOFT_MODE flag is,
    612		 * we do nothing. Do not disable the tracepoint, otherwise
    613		 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
    614		 */
    615		if (soft_disable) {
    616			if (atomic_dec_return(&file->sm_ref) > 0)
    617				break;
    618			disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
    619			clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
    620		} else
    621			disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
    622
    623		if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
    624			clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
    625			if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
    626				tracing_stop_cmdline_record();
    627				clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
    628			}
    629
    630			if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
    631				tracing_stop_tgid_record();
    632				clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
    633			}
    634
    635			call->class->reg(call, TRACE_REG_UNREGISTER, file);
    636		}
    637		/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
    638		if (file->flags & EVENT_FILE_FL_SOFT_MODE)
    639			set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
    640		else
    641			clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
    642		break;
    643	case 1:
    644		/*
    645		 * When soft_disable is set and enable is set, we want to
    646		 * register the tracepoint for the event, but leave the event
    647		 * as is. That means, if the event was already enabled, we do
    648		 * nothing (but set SOFT_MODE). If the event is disabled, we
    649		 * set SOFT_DISABLED before enabling the event tracepoint, so
    650		 * it still seems to be disabled.
    651		 */
    652		if (!soft_disable)
    653			clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
    654		else {
    655			if (atomic_inc_return(&file->sm_ref) > 1)
    656				break;
    657			set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
    658		}
    659
    660		if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
    661			bool cmd = false, tgid = false;
    662
    663			/* Keep the event disabled, when going to SOFT_MODE. */
    664			if (soft_disable)
    665				set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
    666
    667			if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
    668				cmd = true;
    669				tracing_start_cmdline_record();
    670				set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
    671			}
    672
    673			if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
    674				tgid = true;
    675				tracing_start_tgid_record();
    676				set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
    677			}
    678
    679			ret = call->class->reg(call, TRACE_REG_REGISTER, file);
    680			if (ret) {
    681				if (cmd)
    682					tracing_stop_cmdline_record();
    683				if (tgid)
    684					tracing_stop_tgid_record();
    685				pr_info("event trace: Could not enable event "
    686					"%s\n", trace_event_name(call));
    687				break;
    688			}
    689			set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
    690
    691			/* WAS_ENABLED gets set but never cleared. */
    692			set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags);
    693		}
    694		break;
    695	}
    696
    697	/* Enable or disable use of trace_buffered_event */
    698	if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
    699	    (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
    700		if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
    701			trace_buffered_event_enable();
    702		else
    703			trace_buffered_event_disable();
    704	}
    705
    706	return ret;
    707}
    708
    709int trace_event_enable_disable(struct trace_event_file *file,
    710			       int enable, int soft_disable)
    711{
    712	return __ftrace_event_enable_disable(file, enable, soft_disable);
    713}
    714
    715static int ftrace_event_enable_disable(struct trace_event_file *file,
    716				       int enable)
    717{
    718	return __ftrace_event_enable_disable(file, enable, 0);
    719}
    720
    721static void ftrace_clear_events(struct trace_array *tr)
    722{
    723	struct trace_event_file *file;
    724
    725	mutex_lock(&event_mutex);
    726	list_for_each_entry(file, &tr->events, list) {
    727		ftrace_event_enable_disable(file, 0);
    728	}
    729	mutex_unlock(&event_mutex);
    730}
    731
    732static void
    733event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
    734{
    735	struct trace_pid_list *pid_list;
    736	struct trace_array *tr = data;
    737
    738	pid_list = rcu_dereference_raw(tr->filtered_pids);
    739	trace_filter_add_remove_task(pid_list, NULL, task);
    740
    741	pid_list = rcu_dereference_raw(tr->filtered_no_pids);
    742	trace_filter_add_remove_task(pid_list, NULL, task);
    743}
    744
    745static void
    746event_filter_pid_sched_process_fork(void *data,
    747				    struct task_struct *self,
    748				    struct task_struct *task)
    749{
    750	struct trace_pid_list *pid_list;
    751	struct trace_array *tr = data;
    752
    753	pid_list = rcu_dereference_sched(tr->filtered_pids);
    754	trace_filter_add_remove_task(pid_list, self, task);
    755
    756	pid_list = rcu_dereference_sched(tr->filtered_no_pids);
    757	trace_filter_add_remove_task(pid_list, self, task);
    758}
    759
    760void trace_event_follow_fork(struct trace_array *tr, bool enable)
    761{
    762	if (enable) {
    763		register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
    764						       tr, INT_MIN);
    765		register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit,
    766						       tr, INT_MAX);
    767	} else {
    768		unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
    769						    tr);
    770		unregister_trace_sched_process_free(event_filter_pid_sched_process_exit,
    771						    tr);
    772	}
    773}
    774
    775static void
    776event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
    777					struct task_struct *prev,
    778					struct task_struct *next,
    779					unsigned int prev_state)
    780{
    781	struct trace_array *tr = data;
    782	struct trace_pid_list *no_pid_list;
    783	struct trace_pid_list *pid_list;
    784	bool ret;
    785
    786	pid_list = rcu_dereference_sched(tr->filtered_pids);
    787	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
    788
    789	/*
    790	 * Sched switch is funny, as we only want to ignore it
    791	 * in the notrace case if both prev and next should be ignored.
    792	 */
    793	ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
    794		trace_ignore_this_task(NULL, no_pid_list, next);
    795
    796	this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
    797		       (trace_ignore_this_task(pid_list, NULL, prev) &&
    798			trace_ignore_this_task(pid_list, NULL, next)));
    799}
    800
    801static void
    802event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
    803					 struct task_struct *prev,
    804					 struct task_struct *next,
    805					 unsigned int prev_state)
    806{
    807	struct trace_array *tr = data;
    808	struct trace_pid_list *no_pid_list;
    809	struct trace_pid_list *pid_list;
    810
    811	pid_list = rcu_dereference_sched(tr->filtered_pids);
    812	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
    813
    814	this_cpu_write(tr->array_buffer.data->ignore_pid,
    815		       trace_ignore_this_task(pid_list, no_pid_list, next));
    816}
    817
    818static void
    819event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
    820{
    821	struct trace_array *tr = data;
    822	struct trace_pid_list *no_pid_list;
    823	struct trace_pid_list *pid_list;
    824
    825	/* Nothing to do if we are already tracing */
    826	if (!this_cpu_read(tr->array_buffer.data->ignore_pid))
    827		return;
    828
    829	pid_list = rcu_dereference_sched(tr->filtered_pids);
    830	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
    831
    832	this_cpu_write(tr->array_buffer.data->ignore_pid,
    833		       trace_ignore_this_task(pid_list, no_pid_list, task));
    834}
    835
    836static void
    837event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
    838{
    839	struct trace_array *tr = data;
    840	struct trace_pid_list *no_pid_list;
    841	struct trace_pid_list *pid_list;
    842
    843	/* Nothing to do if we are not tracing */
    844	if (this_cpu_read(tr->array_buffer.data->ignore_pid))
    845		return;
    846
    847	pid_list = rcu_dereference_sched(tr->filtered_pids);
    848	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
    849
    850	/* Set tracing if current is enabled */
    851	this_cpu_write(tr->array_buffer.data->ignore_pid,
    852		       trace_ignore_this_task(pid_list, no_pid_list, current));
    853}
    854
    855static void unregister_pid_events(struct trace_array *tr)
    856{
    857	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
    858	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
    859
    860	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
    861	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
    862
    863	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
    864	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
    865
    866	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
    867	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
    868}
    869
    870static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
    871{
    872	struct trace_pid_list *pid_list;
    873	struct trace_pid_list *no_pid_list;
    874	struct trace_event_file *file;
    875	int cpu;
    876
    877	pid_list = rcu_dereference_protected(tr->filtered_pids,
    878					     lockdep_is_held(&event_mutex));
    879	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
    880					     lockdep_is_held(&event_mutex));
    881
    882	/* Make sure there's something to do */
    883	if (!pid_type_enabled(type, pid_list, no_pid_list))
    884		return;
    885
    886	if (!still_need_pid_events(type, pid_list, no_pid_list)) {
    887		unregister_pid_events(tr);
    888
    889		list_for_each_entry(file, &tr->events, list) {
    890			clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
    891		}
    892
    893		for_each_possible_cpu(cpu)
    894			per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
    895	}
    896
    897	if (type & TRACE_PIDS)
    898		rcu_assign_pointer(tr->filtered_pids, NULL);
    899
    900	if (type & TRACE_NO_PIDS)
    901		rcu_assign_pointer(tr->filtered_no_pids, NULL);
    902
    903	/* Wait till all users are no longer using pid filtering */
    904	tracepoint_synchronize_unregister();
    905
    906	if ((type & TRACE_PIDS) && pid_list)
    907		trace_pid_list_free(pid_list);
    908
    909	if ((type & TRACE_NO_PIDS) && no_pid_list)
    910		trace_pid_list_free(no_pid_list);
    911}
    912
    913static void ftrace_clear_event_pids(struct trace_array *tr, int type)
    914{
    915	mutex_lock(&event_mutex);
    916	__ftrace_clear_event_pids(tr, type);
    917	mutex_unlock(&event_mutex);
    918}
    919
    920static void __put_system(struct event_subsystem *system)
    921{
    922	struct event_filter *filter = system->filter;
    923
    924	WARN_ON_ONCE(system_refcount(system) == 0);
    925	if (system_refcount_dec(system))
    926		return;
    927
    928	list_del(&system->list);
    929
    930	if (filter) {
    931		kfree(filter->filter_string);
    932		kfree(filter);
    933	}
    934	kfree_const(system->name);
    935	kfree(system);
    936}
    937
    938static void __get_system(struct event_subsystem *system)
    939{
    940	WARN_ON_ONCE(system_refcount(system) == 0);
    941	system_refcount_inc(system);
    942}
    943
    944static void __get_system_dir(struct trace_subsystem_dir *dir)
    945{
    946	WARN_ON_ONCE(dir->ref_count == 0);
    947	dir->ref_count++;
    948	__get_system(dir->subsystem);
    949}
    950
    951static void __put_system_dir(struct trace_subsystem_dir *dir)
    952{
    953	WARN_ON_ONCE(dir->ref_count == 0);
    954	/* If the subsystem is about to be freed, the dir must be too */
    955	WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
    956
    957	__put_system(dir->subsystem);
    958	if (!--dir->ref_count)
    959		kfree(dir);
    960}
    961
    962static void put_system(struct trace_subsystem_dir *dir)
    963{
    964	mutex_lock(&event_mutex);
    965	__put_system_dir(dir);
    966	mutex_unlock(&event_mutex);
    967}
    968
    969static void remove_subsystem(struct trace_subsystem_dir *dir)
    970{
    971	if (!dir)
    972		return;
    973
    974	if (!--dir->nr_events) {
    975		tracefs_remove(dir->entry);
    976		list_del(&dir->list);
    977		__put_system_dir(dir);
    978	}
    979}
    980
    981static void remove_event_file_dir(struct trace_event_file *file)
    982{
    983	struct dentry *dir = file->dir;
    984	struct dentry *child;
    985
    986	if (dir) {
    987		spin_lock(&dir->d_lock);	/* probably unneeded */
    988		list_for_each_entry(child, &dir->d_subdirs, d_child) {
    989			if (d_really_is_positive(child))	/* probably unneeded */
    990				d_inode(child)->i_private = NULL;
    991		}
    992		spin_unlock(&dir->d_lock);
    993
    994		tracefs_remove(dir);
    995	}
    996
    997	list_del(&file->list);
    998	remove_subsystem(file->system);
    999	free_event_filter(file->filter);
   1000	kmem_cache_free(file_cachep, file);
   1001}
   1002
   1003/*
   1004 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
   1005 */
   1006static int
   1007__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
   1008			      const char *sub, const char *event, int set)
   1009{
   1010	struct trace_event_file *file;
   1011	struct trace_event_call *call;
   1012	const char *name;
   1013	int ret = -EINVAL;
   1014	int eret = 0;
   1015
   1016	list_for_each_entry(file, &tr->events, list) {
   1017
   1018		call = file->event_call;
   1019		name = trace_event_name(call);
   1020
   1021		if (!name || !call->class || !call->class->reg)
   1022			continue;
   1023
   1024		if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
   1025			continue;
   1026
   1027		if (match &&
   1028		    strcmp(match, name) != 0 &&
   1029		    strcmp(match, call->class->system) != 0)
   1030			continue;
   1031
   1032		if (sub && strcmp(sub, call->class->system) != 0)
   1033			continue;
   1034
   1035		if (event && strcmp(event, name) != 0)
   1036			continue;
   1037
   1038		ret = ftrace_event_enable_disable(file, set);
   1039
   1040		/*
   1041		 * Save the first error and return that. Some events
   1042		 * may still have been enabled, but let the user
   1043		 * know that something went wrong.
   1044		 */
   1045		if (ret && !eret)
   1046			eret = ret;
   1047
   1048		ret = eret;
   1049	}
   1050
   1051	return ret;
   1052}
   1053
   1054static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
   1055				  const char *sub, const char *event, int set)
   1056{
   1057	int ret;
   1058
   1059	mutex_lock(&event_mutex);
   1060	ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
   1061	mutex_unlock(&event_mutex);
   1062
   1063	return ret;
   1064}
   1065
   1066int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
   1067{
   1068	char *event = NULL, *sub = NULL, *match;
   1069	int ret;
   1070
   1071	if (!tr)
   1072		return -ENOENT;
   1073	/*
   1074	 * The buf format can be <subsystem>:<event-name>
   1075	 *  *:<event-name> means any event by that name.
   1076	 *  :<event-name> is the same.
   1077	 *
   1078	 *  <subsystem>:* means all events in that subsystem
   1079	 *  <subsystem>: means the same.
   1080	 *
   1081	 *  <name> (no ':') means all events in a subsystem with
   1082	 *  the name <name> or any event that matches <name>
   1083	 */
   1084
   1085	match = strsep(&buf, ":");
   1086	if (buf) {
   1087		sub = match;
   1088		event = buf;
   1089		match = NULL;
   1090
   1091		if (!strlen(sub) || strcmp(sub, "*") == 0)
   1092			sub = NULL;
   1093		if (!strlen(event) || strcmp(event, "*") == 0)
   1094			event = NULL;
   1095	}
   1096
   1097	ret = __ftrace_set_clr_event(tr, match, sub, event, set);
   1098
   1099	/* Put back the colon to allow this to be called again */
   1100	if (buf)
   1101		*(buf - 1) = ':';
   1102
   1103	return ret;
   1104}
   1105
   1106/**
   1107 * trace_set_clr_event - enable or disable an event
   1108 * @system: system name to match (NULL for any system)
   1109 * @event: event name to match (NULL for all events, within system)
   1110 * @set: 1 to enable, 0 to disable
   1111 *
   1112 * This is a way for other parts of the kernel to enable or disable
   1113 * event recording.
   1114 *
   1115 * Returns 0 on success, -EINVAL if the parameters do not match any
   1116 * registered events.
   1117 */
   1118int trace_set_clr_event(const char *system, const char *event, int set)
   1119{
   1120	struct trace_array *tr = top_trace_array();
   1121
   1122	if (!tr)
   1123		return -ENODEV;
   1124
   1125	return __ftrace_set_clr_event(tr, NULL, system, event, set);
   1126}
   1127EXPORT_SYMBOL_GPL(trace_set_clr_event);
   1128
   1129/**
   1130 * trace_array_set_clr_event - enable or disable an event for a trace array.
   1131 * @tr: concerned trace array.
   1132 * @system: system name to match (NULL for any system)
   1133 * @event: event name to match (NULL for all events, within system)
   1134 * @enable: true to enable, false to disable
   1135 *
   1136 * This is a way for other parts of the kernel to enable or disable
   1137 * event recording.
   1138 *
   1139 * Returns 0 on success, -EINVAL if the parameters do not match any
   1140 * registered events.
   1141 */
   1142int trace_array_set_clr_event(struct trace_array *tr, const char *system,
   1143		const char *event, bool enable)
   1144{
   1145	int set;
   1146
   1147	if (!tr)
   1148		return -ENOENT;
   1149
   1150	set = (enable == true) ? 1 : 0;
   1151	return __ftrace_set_clr_event(tr, NULL, system, event, set);
   1152}
   1153EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
   1154
   1155/* 128 should be much more than enough */
   1156#define EVENT_BUF_SIZE		127
   1157
   1158static ssize_t
   1159ftrace_event_write(struct file *file, const char __user *ubuf,
   1160		   size_t cnt, loff_t *ppos)
   1161{
   1162	struct trace_parser parser;
   1163	struct seq_file *m = file->private_data;
   1164	struct trace_array *tr = m->private;
   1165	ssize_t read, ret;
   1166
   1167	if (!cnt)
   1168		return 0;
   1169
   1170	ret = tracing_update_buffers();
   1171	if (ret < 0)
   1172		return ret;
   1173
   1174	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
   1175		return -ENOMEM;
   1176
   1177	read = trace_get_user(&parser, ubuf, cnt, ppos);
   1178
   1179	if (read >= 0 && trace_parser_loaded((&parser))) {
   1180		int set = 1;
   1181
   1182		if (*parser.buffer == '!')
   1183			set = 0;
   1184
   1185		ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
   1186		if (ret)
   1187			goto out_put;
   1188	}
   1189
   1190	ret = read;
   1191
   1192 out_put:
   1193	trace_parser_put(&parser);
   1194
   1195	return ret;
   1196}
   1197
   1198static void *
   1199t_next(struct seq_file *m, void *v, loff_t *pos)
   1200{
   1201	struct trace_event_file *file = v;
   1202	struct trace_event_call *call;
   1203	struct trace_array *tr = m->private;
   1204
   1205	(*pos)++;
   1206
   1207	list_for_each_entry_continue(file, &tr->events, list) {
   1208		call = file->event_call;
   1209		/*
   1210		 * The ftrace subsystem is for showing formats only.
   1211		 * They can not be enabled or disabled via the event files.
   1212		 */
   1213		if (call->class && call->class->reg &&
   1214		    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
   1215			return file;
   1216	}
   1217
   1218	return NULL;
   1219}
   1220
   1221static void *t_start(struct seq_file *m, loff_t *pos)
   1222{
   1223	struct trace_event_file *file;
   1224	struct trace_array *tr = m->private;
   1225	loff_t l;
   1226
   1227	mutex_lock(&event_mutex);
   1228
   1229	file = list_entry(&tr->events, struct trace_event_file, list);
   1230	for (l = 0; l <= *pos; ) {
   1231		file = t_next(m, file, &l);
   1232		if (!file)
   1233			break;
   1234	}
   1235	return file;
   1236}
   1237
   1238static void *
   1239s_next(struct seq_file *m, void *v, loff_t *pos)
   1240{
   1241	struct trace_event_file *file = v;
   1242	struct trace_array *tr = m->private;
   1243
   1244	(*pos)++;
   1245
   1246	list_for_each_entry_continue(file, &tr->events, list) {
   1247		if (file->flags & EVENT_FILE_FL_ENABLED)
   1248			return file;
   1249	}
   1250
   1251	return NULL;
   1252}
   1253
   1254static void *s_start(struct seq_file *m, loff_t *pos)
   1255{
   1256	struct trace_event_file *file;
   1257	struct trace_array *tr = m->private;
   1258	loff_t l;
   1259
   1260	mutex_lock(&event_mutex);
   1261
   1262	file = list_entry(&tr->events, struct trace_event_file, list);
   1263	for (l = 0; l <= *pos; ) {
   1264		file = s_next(m, file, &l);
   1265		if (!file)
   1266			break;
   1267	}
   1268	return file;
   1269}
   1270
   1271static int t_show(struct seq_file *m, void *v)
   1272{
   1273	struct trace_event_file *file = v;
   1274	struct trace_event_call *call = file->event_call;
   1275
   1276	if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
   1277		seq_printf(m, "%s:", call->class->system);
   1278	seq_printf(m, "%s\n", trace_event_name(call));
   1279
   1280	return 0;
   1281}
   1282
   1283static void t_stop(struct seq_file *m, void *p)
   1284{
   1285	mutex_unlock(&event_mutex);
   1286}
   1287
   1288static void *
   1289__next(struct seq_file *m, void *v, loff_t *pos, int type)
   1290{
   1291	struct trace_array *tr = m->private;
   1292	struct trace_pid_list *pid_list;
   1293
   1294	if (type == TRACE_PIDS)
   1295		pid_list = rcu_dereference_sched(tr->filtered_pids);
   1296	else
   1297		pid_list = rcu_dereference_sched(tr->filtered_no_pids);
   1298
   1299	return trace_pid_next(pid_list, v, pos);
   1300}
   1301
   1302static void *
   1303p_next(struct seq_file *m, void *v, loff_t *pos)
   1304{
   1305	return __next(m, v, pos, TRACE_PIDS);
   1306}
   1307
   1308static void *
   1309np_next(struct seq_file *m, void *v, loff_t *pos)
   1310{
   1311	return __next(m, v, pos, TRACE_NO_PIDS);
   1312}
   1313
   1314static void *__start(struct seq_file *m, loff_t *pos, int type)
   1315	__acquires(RCU)
   1316{
   1317	struct trace_pid_list *pid_list;
   1318	struct trace_array *tr = m->private;
   1319
   1320	/*
   1321	 * Grab the mutex, to keep calls to p_next() having the same
   1322	 * tr->filtered_pids as p_start() has.
   1323	 * If we just passed the tr->filtered_pids around, then RCU would
   1324	 * have been enough, but doing that makes things more complex.
   1325	 */
   1326	mutex_lock(&event_mutex);
   1327	rcu_read_lock_sched();
   1328
   1329	if (type == TRACE_PIDS)
   1330		pid_list = rcu_dereference_sched(tr->filtered_pids);
   1331	else
   1332		pid_list = rcu_dereference_sched(tr->filtered_no_pids);
   1333
   1334	if (!pid_list)
   1335		return NULL;
   1336
   1337	return trace_pid_start(pid_list, pos);
   1338}
   1339
   1340static void *p_start(struct seq_file *m, loff_t *pos)
   1341	__acquires(RCU)
   1342{
   1343	return __start(m, pos, TRACE_PIDS);
   1344}
   1345
   1346static void *np_start(struct seq_file *m, loff_t *pos)
   1347	__acquires(RCU)
   1348{
   1349	return __start(m, pos, TRACE_NO_PIDS);
   1350}
   1351
   1352static void p_stop(struct seq_file *m, void *p)
   1353	__releases(RCU)
   1354{
   1355	rcu_read_unlock_sched();
   1356	mutex_unlock(&event_mutex);
   1357}
   1358
   1359static ssize_t
   1360event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
   1361		  loff_t *ppos)
   1362{
   1363	struct trace_event_file *file;
   1364	unsigned long flags;
   1365	char buf[4] = "0";
   1366
   1367	mutex_lock(&event_mutex);
   1368	file = event_file_data(filp);
   1369	if (likely(file))
   1370		flags = file->flags;
   1371	mutex_unlock(&event_mutex);
   1372
   1373	if (!file)
   1374		return -ENODEV;
   1375
   1376	if (flags & EVENT_FILE_FL_ENABLED &&
   1377	    !(flags & EVENT_FILE_FL_SOFT_DISABLED))
   1378		strcpy(buf, "1");
   1379
   1380	if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
   1381	    flags & EVENT_FILE_FL_SOFT_MODE)
   1382		strcat(buf, "*");
   1383
   1384	strcat(buf, "\n");
   1385
   1386	return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
   1387}
   1388
   1389static ssize_t
   1390event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
   1391		   loff_t *ppos)
   1392{
   1393	struct trace_event_file *file;
   1394	unsigned long val;
   1395	int ret;
   1396
   1397	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   1398	if (ret)
   1399		return ret;
   1400
   1401	ret = tracing_update_buffers();
   1402	if (ret < 0)
   1403		return ret;
   1404
   1405	switch (val) {
   1406	case 0:
   1407	case 1:
   1408		ret = -ENODEV;
   1409		mutex_lock(&event_mutex);
   1410		file = event_file_data(filp);
   1411		if (likely(file))
   1412			ret = ftrace_event_enable_disable(file, val);
   1413		mutex_unlock(&event_mutex);
   1414		break;
   1415
   1416	default:
   1417		return -EINVAL;
   1418	}
   1419
   1420	*ppos += cnt;
   1421
   1422	return ret ? ret : cnt;
   1423}
   1424
   1425static ssize_t
   1426system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
   1427		   loff_t *ppos)
   1428{
   1429	const char set_to_char[4] = { '?', '0', '1', 'X' };
   1430	struct trace_subsystem_dir *dir = filp->private_data;
   1431	struct event_subsystem *system = dir->subsystem;
   1432	struct trace_event_call *call;
   1433	struct trace_event_file *file;
   1434	struct trace_array *tr = dir->tr;
   1435	char buf[2];
   1436	int set = 0;
   1437	int ret;
   1438
   1439	mutex_lock(&event_mutex);
   1440	list_for_each_entry(file, &tr->events, list) {
   1441		call = file->event_call;
   1442		if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
   1443		    !trace_event_name(call) || !call->class || !call->class->reg)
   1444			continue;
   1445
   1446		if (system && strcmp(call->class->system, system->name) != 0)
   1447			continue;
   1448
   1449		/*
   1450		 * We need to find out if all the events are set
   1451		 * or if all events or cleared, or if we have
   1452		 * a mixture.
   1453		 */
   1454		set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
   1455
   1456		/*
   1457		 * If we have a mixture, no need to look further.
   1458		 */
   1459		if (set == 3)
   1460			break;
   1461	}
   1462	mutex_unlock(&event_mutex);
   1463
   1464	buf[0] = set_to_char[set];
   1465	buf[1] = '\n';
   1466
   1467	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
   1468
   1469	return ret;
   1470}
   1471
   1472static ssize_t
   1473system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
   1474		    loff_t *ppos)
   1475{
   1476	struct trace_subsystem_dir *dir = filp->private_data;
   1477	struct event_subsystem *system = dir->subsystem;
   1478	const char *name = NULL;
   1479	unsigned long val;
   1480	ssize_t ret;
   1481
   1482	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   1483	if (ret)
   1484		return ret;
   1485
   1486	ret = tracing_update_buffers();
   1487	if (ret < 0)
   1488		return ret;
   1489
   1490	if (val != 0 && val != 1)
   1491		return -EINVAL;
   1492
   1493	/*
   1494	 * Opening of "enable" adds a ref count to system,
   1495	 * so the name is safe to use.
   1496	 */
   1497	if (system)
   1498		name = system->name;
   1499
   1500	ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
   1501	if (ret)
   1502		goto out;
   1503
   1504	ret = cnt;
   1505
   1506out:
   1507	*ppos += cnt;
   1508
   1509	return ret;
   1510}
   1511
   1512enum {
   1513	FORMAT_HEADER		= 1,
   1514	FORMAT_FIELD_SEPERATOR	= 2,
   1515	FORMAT_PRINTFMT		= 3,
   1516};
   1517
   1518static void *f_next(struct seq_file *m, void *v, loff_t *pos)
   1519{
   1520	struct trace_event_call *call = event_file_data(m->private);
   1521	struct list_head *common_head = &ftrace_common_fields;
   1522	struct list_head *head = trace_get_fields(call);
   1523	struct list_head *node = v;
   1524
   1525	(*pos)++;
   1526
   1527	switch ((unsigned long)v) {
   1528	case FORMAT_HEADER:
   1529		node = common_head;
   1530		break;
   1531
   1532	case FORMAT_FIELD_SEPERATOR:
   1533		node = head;
   1534		break;
   1535
   1536	case FORMAT_PRINTFMT:
   1537		/* all done */
   1538		return NULL;
   1539	}
   1540
   1541	node = node->prev;
   1542	if (node == common_head)
   1543		return (void *)FORMAT_FIELD_SEPERATOR;
   1544	else if (node == head)
   1545		return (void *)FORMAT_PRINTFMT;
   1546	else
   1547		return node;
   1548}
   1549
   1550static int f_show(struct seq_file *m, void *v)
   1551{
   1552	struct trace_event_call *call = event_file_data(m->private);
   1553	struct ftrace_event_field *field;
   1554	const char *array_descriptor;
   1555
   1556	switch ((unsigned long)v) {
   1557	case FORMAT_HEADER:
   1558		seq_printf(m, "name: %s\n", trace_event_name(call));
   1559		seq_printf(m, "ID: %d\n", call->event.type);
   1560		seq_puts(m, "format:\n");
   1561		return 0;
   1562
   1563	case FORMAT_FIELD_SEPERATOR:
   1564		seq_putc(m, '\n');
   1565		return 0;
   1566
   1567	case FORMAT_PRINTFMT:
   1568		seq_printf(m, "\nprint fmt: %s\n",
   1569			   call->print_fmt);
   1570		return 0;
   1571	}
   1572
   1573	field = list_entry(v, struct ftrace_event_field, link);
   1574	/*
   1575	 * Smartly shows the array type(except dynamic array).
   1576	 * Normal:
   1577	 *	field:TYPE VAR
   1578	 * If TYPE := TYPE[LEN], it is shown:
   1579	 *	field:TYPE VAR[LEN]
   1580	 */
   1581	array_descriptor = strchr(field->type, '[');
   1582
   1583	if (str_has_prefix(field->type, "__data_loc"))
   1584		array_descriptor = NULL;
   1585
   1586	if (!array_descriptor)
   1587		seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
   1588			   field->type, field->name, field->offset,
   1589			   field->size, !!field->is_signed);
   1590	else
   1591		seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
   1592			   (int)(array_descriptor - field->type),
   1593			   field->type, field->name,
   1594			   array_descriptor, field->offset,
   1595			   field->size, !!field->is_signed);
   1596
   1597	return 0;
   1598}
   1599
   1600static void *f_start(struct seq_file *m, loff_t *pos)
   1601{
   1602	void *p = (void *)FORMAT_HEADER;
   1603	loff_t l = 0;
   1604
   1605	/* ->stop() is called even if ->start() fails */
   1606	mutex_lock(&event_mutex);
   1607	if (!event_file_data(m->private))
   1608		return ERR_PTR(-ENODEV);
   1609
   1610	while (l < *pos && p)
   1611		p = f_next(m, p, &l);
   1612
   1613	return p;
   1614}
   1615
   1616static void f_stop(struct seq_file *m, void *p)
   1617{
   1618	mutex_unlock(&event_mutex);
   1619}
   1620
   1621static const struct seq_operations trace_format_seq_ops = {
   1622	.start		= f_start,
   1623	.next		= f_next,
   1624	.stop		= f_stop,
   1625	.show		= f_show,
   1626};
   1627
   1628static int trace_format_open(struct inode *inode, struct file *file)
   1629{
   1630	struct seq_file *m;
   1631	int ret;
   1632
   1633	/* Do we want to hide event format files on tracefs lockdown? */
   1634
   1635	ret = seq_open(file, &trace_format_seq_ops);
   1636	if (ret < 0)
   1637		return ret;
   1638
   1639	m = file->private_data;
   1640	m->private = file;
   1641
   1642	return 0;
   1643}
   1644
   1645static ssize_t
   1646event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
   1647{
   1648	int id = (long)event_file_data(filp);
   1649	char buf[32];
   1650	int len;
   1651
   1652	if (unlikely(!id))
   1653		return -ENODEV;
   1654
   1655	len = sprintf(buf, "%d\n", id);
   1656
   1657	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
   1658}
   1659
   1660static ssize_t
   1661event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
   1662		  loff_t *ppos)
   1663{
   1664	struct trace_event_file *file;
   1665	struct trace_seq *s;
   1666	int r = -ENODEV;
   1667
   1668	if (*ppos)
   1669		return 0;
   1670
   1671	s = kmalloc(sizeof(*s), GFP_KERNEL);
   1672
   1673	if (!s)
   1674		return -ENOMEM;
   1675
   1676	trace_seq_init(s);
   1677
   1678	mutex_lock(&event_mutex);
   1679	file = event_file_data(filp);
   1680	if (file)
   1681		print_event_filter(file, s);
   1682	mutex_unlock(&event_mutex);
   1683
   1684	if (file)
   1685		r = simple_read_from_buffer(ubuf, cnt, ppos,
   1686					    s->buffer, trace_seq_used(s));
   1687
   1688	kfree(s);
   1689
   1690	return r;
   1691}
   1692
   1693static ssize_t
   1694event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
   1695		   loff_t *ppos)
   1696{
   1697	struct trace_event_file *file;
   1698	char *buf;
   1699	int err = -ENODEV;
   1700
   1701	if (cnt >= PAGE_SIZE)
   1702		return -EINVAL;
   1703
   1704	buf = memdup_user_nul(ubuf, cnt);
   1705	if (IS_ERR(buf))
   1706		return PTR_ERR(buf);
   1707
   1708	mutex_lock(&event_mutex);
   1709	file = event_file_data(filp);
   1710	if (file)
   1711		err = apply_event_filter(file, buf);
   1712	mutex_unlock(&event_mutex);
   1713
   1714	kfree(buf);
   1715	if (err < 0)
   1716		return err;
   1717
   1718	*ppos += cnt;
   1719
   1720	return cnt;
   1721}
   1722
   1723static LIST_HEAD(event_subsystems);
   1724
   1725static int subsystem_open(struct inode *inode, struct file *filp)
   1726{
   1727	struct trace_subsystem_dir *dir = NULL, *iter_dir;
   1728	struct trace_array *tr = NULL, *iter_tr;
   1729	struct event_subsystem *system = NULL;
   1730	int ret;
   1731
   1732	if (tracing_is_disabled())
   1733		return -ENODEV;
   1734
   1735	/* Make sure the system still exists */
   1736	mutex_lock(&event_mutex);
   1737	mutex_lock(&trace_types_lock);
   1738	list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
   1739		list_for_each_entry(iter_dir, &iter_tr->systems, list) {
   1740			if (iter_dir == inode->i_private) {
   1741				/* Don't open systems with no events */
   1742				tr = iter_tr;
   1743				dir = iter_dir;
   1744				if (dir->nr_events) {
   1745					__get_system_dir(dir);
   1746					system = dir->subsystem;
   1747				}
   1748				goto exit_loop;
   1749			}
   1750		}
   1751	}
   1752 exit_loop:
   1753	mutex_unlock(&trace_types_lock);
   1754	mutex_unlock(&event_mutex);
   1755
   1756	if (!system)
   1757		return -ENODEV;
   1758
   1759	/* Still need to increment the ref count of the system */
   1760	if (trace_array_get(tr) < 0) {
   1761		put_system(dir);
   1762		return -ENODEV;
   1763	}
   1764
   1765	ret = tracing_open_generic(inode, filp);
   1766	if (ret < 0) {
   1767		trace_array_put(tr);
   1768		put_system(dir);
   1769	}
   1770
   1771	return ret;
   1772}
   1773
   1774static int system_tr_open(struct inode *inode, struct file *filp)
   1775{
   1776	struct trace_subsystem_dir *dir;
   1777	struct trace_array *tr = inode->i_private;
   1778	int ret;
   1779
   1780	/* Make a temporary dir that has no system but points to tr */
   1781	dir = kzalloc(sizeof(*dir), GFP_KERNEL);
   1782	if (!dir)
   1783		return -ENOMEM;
   1784
   1785	ret = tracing_open_generic_tr(inode, filp);
   1786	if (ret < 0) {
   1787		kfree(dir);
   1788		return ret;
   1789	}
   1790	dir->tr = tr;
   1791	filp->private_data = dir;
   1792
   1793	return 0;
   1794}
   1795
   1796static int subsystem_release(struct inode *inode, struct file *file)
   1797{
   1798	struct trace_subsystem_dir *dir = file->private_data;
   1799
   1800	trace_array_put(dir->tr);
   1801
   1802	/*
   1803	 * If dir->subsystem is NULL, then this is a temporary
   1804	 * descriptor that was made for a trace_array to enable
   1805	 * all subsystems.
   1806	 */
   1807	if (dir->subsystem)
   1808		put_system(dir);
   1809	else
   1810		kfree(dir);
   1811
   1812	return 0;
   1813}
   1814
   1815static ssize_t
   1816subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
   1817		      loff_t *ppos)
   1818{
   1819	struct trace_subsystem_dir *dir = filp->private_data;
   1820	struct event_subsystem *system = dir->subsystem;
   1821	struct trace_seq *s;
   1822	int r;
   1823
   1824	if (*ppos)
   1825		return 0;
   1826
   1827	s = kmalloc(sizeof(*s), GFP_KERNEL);
   1828	if (!s)
   1829		return -ENOMEM;
   1830
   1831	trace_seq_init(s);
   1832
   1833	print_subsystem_event_filter(system, s);
   1834	r = simple_read_from_buffer(ubuf, cnt, ppos,
   1835				    s->buffer, trace_seq_used(s));
   1836
   1837	kfree(s);
   1838
   1839	return r;
   1840}
   1841
   1842static ssize_t
   1843subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
   1844		       loff_t *ppos)
   1845{
   1846	struct trace_subsystem_dir *dir = filp->private_data;
   1847	char *buf;
   1848	int err;
   1849
   1850	if (cnt >= PAGE_SIZE)
   1851		return -EINVAL;
   1852
   1853	buf = memdup_user_nul(ubuf, cnt);
   1854	if (IS_ERR(buf))
   1855		return PTR_ERR(buf);
   1856
   1857	err = apply_subsystem_event_filter(dir, buf);
   1858	kfree(buf);
   1859	if (err < 0)
   1860		return err;
   1861
   1862	*ppos += cnt;
   1863
   1864	return cnt;
   1865}
   1866
   1867static ssize_t
   1868show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
   1869{
   1870	int (*func)(struct trace_seq *s) = filp->private_data;
   1871	struct trace_seq *s;
   1872	int r;
   1873
   1874	if (*ppos)
   1875		return 0;
   1876
   1877	s = kmalloc(sizeof(*s), GFP_KERNEL);
   1878	if (!s)
   1879		return -ENOMEM;
   1880
   1881	trace_seq_init(s);
   1882
   1883	func(s);
   1884	r = simple_read_from_buffer(ubuf, cnt, ppos,
   1885				    s->buffer, trace_seq_used(s));
   1886
   1887	kfree(s);
   1888
   1889	return r;
   1890}
   1891
   1892static void ignore_task_cpu(void *data)
   1893{
   1894	struct trace_array *tr = data;
   1895	struct trace_pid_list *pid_list;
   1896	struct trace_pid_list *no_pid_list;
   1897
   1898	/*
   1899	 * This function is called by on_each_cpu() while the
   1900	 * event_mutex is held.
   1901	 */
   1902	pid_list = rcu_dereference_protected(tr->filtered_pids,
   1903					     mutex_is_locked(&event_mutex));
   1904	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
   1905					     mutex_is_locked(&event_mutex));
   1906
   1907	this_cpu_write(tr->array_buffer.data->ignore_pid,
   1908		       trace_ignore_this_task(pid_list, no_pid_list, current));
   1909}
   1910
   1911static void register_pid_events(struct trace_array *tr)
   1912{
   1913	/*
   1914	 * Register a probe that is called before all other probes
   1915	 * to set ignore_pid if next or prev do not match.
   1916	 * Register a probe this is called after all other probes
   1917	 * to only keep ignore_pid set if next pid matches.
   1918	 */
   1919	register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
   1920					 tr, INT_MAX);
   1921	register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
   1922					 tr, 0);
   1923
   1924	register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
   1925					 tr, INT_MAX);
   1926	register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
   1927					 tr, 0);
   1928
   1929	register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
   1930					     tr, INT_MAX);
   1931	register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
   1932					     tr, 0);
   1933
   1934	register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
   1935					 tr, INT_MAX);
   1936	register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
   1937					 tr, 0);
   1938}
   1939
   1940static ssize_t
   1941event_pid_write(struct file *filp, const char __user *ubuf,
   1942		size_t cnt, loff_t *ppos, int type)
   1943{
   1944	struct seq_file *m = filp->private_data;
   1945	struct trace_array *tr = m->private;
   1946	struct trace_pid_list *filtered_pids = NULL;
   1947	struct trace_pid_list *other_pids = NULL;
   1948	struct trace_pid_list *pid_list;
   1949	struct trace_event_file *file;
   1950	ssize_t ret;
   1951
   1952	if (!cnt)
   1953		return 0;
   1954
   1955	ret = tracing_update_buffers();
   1956	if (ret < 0)
   1957		return ret;
   1958
   1959	mutex_lock(&event_mutex);
   1960
   1961	if (type == TRACE_PIDS) {
   1962		filtered_pids = rcu_dereference_protected(tr->filtered_pids,
   1963							  lockdep_is_held(&event_mutex));
   1964		other_pids = rcu_dereference_protected(tr->filtered_no_pids,
   1965							  lockdep_is_held(&event_mutex));
   1966	} else {
   1967		filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
   1968							  lockdep_is_held(&event_mutex));
   1969		other_pids = rcu_dereference_protected(tr->filtered_pids,
   1970							  lockdep_is_held(&event_mutex));
   1971	}
   1972
   1973	ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
   1974	if (ret < 0)
   1975		goto out;
   1976
   1977	if (type == TRACE_PIDS)
   1978		rcu_assign_pointer(tr->filtered_pids, pid_list);
   1979	else
   1980		rcu_assign_pointer(tr->filtered_no_pids, pid_list);
   1981
   1982	list_for_each_entry(file, &tr->events, list) {
   1983		set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
   1984	}
   1985
   1986	if (filtered_pids) {
   1987		tracepoint_synchronize_unregister();
   1988		trace_pid_list_free(filtered_pids);
   1989	} else if (pid_list && !other_pids) {
   1990		register_pid_events(tr);
   1991	}
   1992
   1993	/*
   1994	 * Ignoring of pids is done at task switch. But we have to
   1995	 * check for those tasks that are currently running.
   1996	 * Always do this in case a pid was appended or removed.
   1997	 */
   1998	on_each_cpu(ignore_task_cpu, tr, 1);
   1999
   2000 out:
   2001	mutex_unlock(&event_mutex);
   2002
   2003	if (ret > 0)
   2004		*ppos += ret;
   2005
   2006	return ret;
   2007}
   2008
   2009static ssize_t
   2010ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
   2011		       size_t cnt, loff_t *ppos)
   2012{
   2013	return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
   2014}
   2015
   2016static ssize_t
   2017ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
   2018			size_t cnt, loff_t *ppos)
   2019{
   2020	return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
   2021}
   2022
   2023static int ftrace_event_avail_open(struct inode *inode, struct file *file);
   2024static int ftrace_event_set_open(struct inode *inode, struct file *file);
   2025static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
   2026static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
   2027static int ftrace_event_release(struct inode *inode, struct file *file);
   2028
   2029static const struct seq_operations show_event_seq_ops = {
   2030	.start = t_start,
   2031	.next = t_next,
   2032	.show = t_show,
   2033	.stop = t_stop,
   2034};
   2035
   2036static const struct seq_operations show_set_event_seq_ops = {
   2037	.start = s_start,
   2038	.next = s_next,
   2039	.show = t_show,
   2040	.stop = t_stop,
   2041};
   2042
   2043static const struct seq_operations show_set_pid_seq_ops = {
   2044	.start = p_start,
   2045	.next = p_next,
   2046	.show = trace_pid_show,
   2047	.stop = p_stop,
   2048};
   2049
   2050static const struct seq_operations show_set_no_pid_seq_ops = {
   2051	.start = np_start,
   2052	.next = np_next,
   2053	.show = trace_pid_show,
   2054	.stop = p_stop,
   2055};
   2056
   2057static const struct file_operations ftrace_avail_fops = {
   2058	.open = ftrace_event_avail_open,
   2059	.read = seq_read,
   2060	.llseek = seq_lseek,
   2061	.release = seq_release,
   2062};
   2063
   2064static const struct file_operations ftrace_set_event_fops = {
   2065	.open = ftrace_event_set_open,
   2066	.read = seq_read,
   2067	.write = ftrace_event_write,
   2068	.llseek = seq_lseek,
   2069	.release = ftrace_event_release,
   2070};
   2071
   2072static const struct file_operations ftrace_set_event_pid_fops = {
   2073	.open = ftrace_event_set_pid_open,
   2074	.read = seq_read,
   2075	.write = ftrace_event_pid_write,
   2076	.llseek = seq_lseek,
   2077	.release = ftrace_event_release,
   2078};
   2079
   2080static const struct file_operations ftrace_set_event_notrace_pid_fops = {
   2081	.open = ftrace_event_set_npid_open,
   2082	.read = seq_read,
   2083	.write = ftrace_event_npid_write,
   2084	.llseek = seq_lseek,
   2085	.release = ftrace_event_release,
   2086};
   2087
   2088static const struct file_operations ftrace_enable_fops = {
   2089	.open = tracing_open_generic,
   2090	.read = event_enable_read,
   2091	.write = event_enable_write,
   2092	.llseek = default_llseek,
   2093};
   2094
   2095static const struct file_operations ftrace_event_format_fops = {
   2096	.open = trace_format_open,
   2097	.read = seq_read,
   2098	.llseek = seq_lseek,
   2099	.release = seq_release,
   2100};
   2101
   2102static const struct file_operations ftrace_event_id_fops = {
   2103	.read = event_id_read,
   2104	.llseek = default_llseek,
   2105};
   2106
   2107static const struct file_operations ftrace_event_filter_fops = {
   2108	.open = tracing_open_generic,
   2109	.read = event_filter_read,
   2110	.write = event_filter_write,
   2111	.llseek = default_llseek,
   2112};
   2113
   2114static const struct file_operations ftrace_subsystem_filter_fops = {
   2115	.open = subsystem_open,
   2116	.read = subsystem_filter_read,
   2117	.write = subsystem_filter_write,
   2118	.llseek = default_llseek,
   2119	.release = subsystem_release,
   2120};
   2121
   2122static const struct file_operations ftrace_system_enable_fops = {
   2123	.open = subsystem_open,
   2124	.read = system_enable_read,
   2125	.write = system_enable_write,
   2126	.llseek = default_llseek,
   2127	.release = subsystem_release,
   2128};
   2129
   2130static const struct file_operations ftrace_tr_enable_fops = {
   2131	.open = system_tr_open,
   2132	.read = system_enable_read,
   2133	.write = system_enable_write,
   2134	.llseek = default_llseek,
   2135	.release = subsystem_release,
   2136};
   2137
   2138static const struct file_operations ftrace_show_header_fops = {
   2139	.open = tracing_open_generic,
   2140	.read = show_header,
   2141	.llseek = default_llseek,
   2142};
   2143
   2144static int
   2145ftrace_event_open(struct inode *inode, struct file *file,
   2146		  const struct seq_operations *seq_ops)
   2147{
   2148	struct seq_file *m;
   2149	int ret;
   2150
   2151	ret = security_locked_down(LOCKDOWN_TRACEFS);
   2152	if (ret)
   2153		return ret;
   2154
   2155	ret = seq_open(file, seq_ops);
   2156	if (ret < 0)
   2157		return ret;
   2158	m = file->private_data;
   2159	/* copy tr over to seq ops */
   2160	m->private = inode->i_private;
   2161
   2162	return ret;
   2163}
   2164
   2165static int ftrace_event_release(struct inode *inode, struct file *file)
   2166{
   2167	struct trace_array *tr = inode->i_private;
   2168
   2169	trace_array_put(tr);
   2170
   2171	return seq_release(inode, file);
   2172}
   2173
   2174static int
   2175ftrace_event_avail_open(struct inode *inode, struct file *file)
   2176{
   2177	const struct seq_operations *seq_ops = &show_event_seq_ops;
   2178
   2179	/* Checks for tracefs lockdown */
   2180	return ftrace_event_open(inode, file, seq_ops);
   2181}
   2182
   2183static int
   2184ftrace_event_set_open(struct inode *inode, struct file *file)
   2185{
   2186	const struct seq_operations *seq_ops = &show_set_event_seq_ops;
   2187	struct trace_array *tr = inode->i_private;
   2188	int ret;
   2189
   2190	ret = tracing_check_open_get_tr(tr);
   2191	if (ret)
   2192		return ret;
   2193
   2194	if ((file->f_mode & FMODE_WRITE) &&
   2195	    (file->f_flags & O_TRUNC))
   2196		ftrace_clear_events(tr);
   2197
   2198	ret = ftrace_event_open(inode, file, seq_ops);
   2199	if (ret < 0)
   2200		trace_array_put(tr);
   2201	return ret;
   2202}
   2203
   2204static int
   2205ftrace_event_set_pid_open(struct inode *inode, struct file *file)
   2206{
   2207	const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
   2208	struct trace_array *tr = inode->i_private;
   2209	int ret;
   2210
   2211	ret = tracing_check_open_get_tr(tr);
   2212	if (ret)
   2213		return ret;
   2214
   2215	if ((file->f_mode & FMODE_WRITE) &&
   2216	    (file->f_flags & O_TRUNC))
   2217		ftrace_clear_event_pids(tr, TRACE_PIDS);
   2218
   2219	ret = ftrace_event_open(inode, file, seq_ops);
   2220	if (ret < 0)
   2221		trace_array_put(tr);
   2222	return ret;
   2223}
   2224
   2225static int
   2226ftrace_event_set_npid_open(struct inode *inode, struct file *file)
   2227{
   2228	const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
   2229	struct trace_array *tr = inode->i_private;
   2230	int ret;
   2231
   2232	ret = tracing_check_open_get_tr(tr);
   2233	if (ret)
   2234		return ret;
   2235
   2236	if ((file->f_mode & FMODE_WRITE) &&
   2237	    (file->f_flags & O_TRUNC))
   2238		ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
   2239
   2240	ret = ftrace_event_open(inode, file, seq_ops);
   2241	if (ret < 0)
   2242		trace_array_put(tr);
   2243	return ret;
   2244}
   2245
   2246static struct event_subsystem *
   2247create_new_subsystem(const char *name)
   2248{
   2249	struct event_subsystem *system;
   2250
   2251	/* need to create new entry */
   2252	system = kmalloc(sizeof(*system), GFP_KERNEL);
   2253	if (!system)
   2254		return NULL;
   2255
   2256	system->ref_count = 1;
   2257
   2258	/* Only allocate if dynamic (kprobes and modules) */
   2259	system->name = kstrdup_const(name, GFP_KERNEL);
   2260	if (!system->name)
   2261		goto out_free;
   2262
   2263	system->filter = NULL;
   2264
   2265	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
   2266	if (!system->filter)
   2267		goto out_free;
   2268
   2269	list_add(&system->list, &event_subsystems);
   2270
   2271	return system;
   2272
   2273 out_free:
   2274	kfree_const(system->name);
   2275	kfree(system);
   2276	return NULL;
   2277}
   2278
   2279static struct dentry *
   2280event_subsystem_dir(struct trace_array *tr, const char *name,
   2281		    struct trace_event_file *file, struct dentry *parent)
   2282{
   2283	struct event_subsystem *system, *iter;
   2284	struct trace_subsystem_dir *dir;
   2285	struct dentry *entry;
   2286
   2287	/* First see if we did not already create this dir */
   2288	list_for_each_entry(dir, &tr->systems, list) {
   2289		system = dir->subsystem;
   2290		if (strcmp(system->name, name) == 0) {
   2291			dir->nr_events++;
   2292			file->system = dir;
   2293			return dir->entry;
   2294		}
   2295	}
   2296
   2297	/* Now see if the system itself exists. */
   2298	system = NULL;
   2299	list_for_each_entry(iter, &event_subsystems, list) {
   2300		if (strcmp(iter->name, name) == 0) {
   2301			system = iter;
   2302			break;
   2303		}
   2304	}
   2305
   2306	dir = kmalloc(sizeof(*dir), GFP_KERNEL);
   2307	if (!dir)
   2308		goto out_fail;
   2309
   2310	if (!system) {
   2311		system = create_new_subsystem(name);
   2312		if (!system)
   2313			goto out_free;
   2314	} else
   2315		__get_system(system);
   2316
   2317	dir->entry = tracefs_create_dir(name, parent);
   2318	if (!dir->entry) {
   2319		pr_warn("Failed to create system directory %s\n", name);
   2320		__put_system(system);
   2321		goto out_free;
   2322	}
   2323
   2324	dir->tr = tr;
   2325	dir->ref_count = 1;
   2326	dir->nr_events = 1;
   2327	dir->subsystem = system;
   2328	file->system = dir;
   2329
   2330	/* the ftrace system is special, do not create enable or filter files */
   2331	if (strcmp(name, "ftrace") != 0) {
   2332
   2333		entry = tracefs_create_file("filter", TRACE_MODE_WRITE,
   2334					    dir->entry, dir,
   2335					    &ftrace_subsystem_filter_fops);
   2336		if (!entry) {
   2337			kfree(system->filter);
   2338			system->filter = NULL;
   2339			pr_warn("Could not create tracefs '%s/filter' entry\n", name);
   2340		}
   2341
   2342		trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir,
   2343				  &ftrace_system_enable_fops);
   2344	}
   2345
   2346	list_add(&dir->list, &tr->systems);
   2347
   2348	return dir->entry;
   2349
   2350 out_free:
   2351	kfree(dir);
   2352 out_fail:
   2353	/* Only print this message if failed on memory allocation */
   2354	if (!dir || !system)
   2355		pr_warn("No memory to create event subsystem %s\n", name);
   2356	return NULL;
   2357}
   2358
   2359static int
   2360event_define_fields(struct trace_event_call *call)
   2361{
   2362	struct list_head *head;
   2363	int ret = 0;
   2364
   2365	/*
   2366	 * Other events may have the same class. Only update
   2367	 * the fields if they are not already defined.
   2368	 */
   2369	head = trace_get_fields(call);
   2370	if (list_empty(head)) {
   2371		struct trace_event_fields *field = call->class->fields_array;
   2372		unsigned int offset = sizeof(struct trace_entry);
   2373
   2374		for (; field->type; field++) {
   2375			if (field->type == TRACE_FUNCTION_TYPE) {
   2376				field->define_fields(call);
   2377				break;
   2378			}
   2379
   2380			offset = ALIGN(offset, field->align);
   2381			ret = trace_define_field(call, field->type, field->name,
   2382						 offset, field->size,
   2383						 field->is_signed, field->filter_type);
   2384			if (WARN_ON_ONCE(ret)) {
   2385				pr_err("error code is %d\n", ret);
   2386				break;
   2387			}
   2388
   2389			offset += field->size;
   2390		}
   2391	}
   2392
   2393	return ret;
   2394}
   2395
   2396static int
   2397event_create_dir(struct dentry *parent, struct trace_event_file *file)
   2398{
   2399	struct trace_event_call *call = file->event_call;
   2400	struct trace_array *tr = file->tr;
   2401	struct dentry *d_events;
   2402	const char *name;
   2403	int ret;
   2404
   2405	/*
   2406	 * If the trace point header did not define TRACE_SYSTEM
   2407	 * then the system would be called "TRACE_SYSTEM".
   2408	 */
   2409	if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
   2410		d_events = event_subsystem_dir(tr, call->class->system, file, parent);
   2411		if (!d_events)
   2412			return -ENOMEM;
   2413	} else
   2414		d_events = parent;
   2415
   2416	name = trace_event_name(call);
   2417	file->dir = tracefs_create_dir(name, d_events);
   2418	if (!file->dir) {
   2419		pr_warn("Could not create tracefs '%s' directory\n", name);
   2420		return -1;
   2421	}
   2422
   2423	if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
   2424		trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file,
   2425				  &ftrace_enable_fops);
   2426
   2427#ifdef CONFIG_PERF_EVENTS
   2428	if (call->event.type && call->class->reg)
   2429		trace_create_file("id", TRACE_MODE_READ, file->dir,
   2430				  (void *)(long)call->event.type,
   2431				  &ftrace_event_id_fops);
   2432#endif
   2433
   2434	ret = event_define_fields(call);
   2435	if (ret < 0) {
   2436		pr_warn("Could not initialize trace point events/%s\n", name);
   2437		return ret;
   2438	}
   2439
   2440	/*
   2441	 * Only event directories that can be enabled should have
   2442	 * triggers or filters.
   2443	 */
   2444	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
   2445		trace_create_file("filter", TRACE_MODE_WRITE, file->dir,
   2446				  file, &ftrace_event_filter_fops);
   2447
   2448		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
   2449				  file, &event_trigger_fops);
   2450	}
   2451
   2452#ifdef CONFIG_HIST_TRIGGERS
   2453	trace_create_file("hist", TRACE_MODE_READ, file->dir, file,
   2454			  &event_hist_fops);
   2455#endif
   2456#ifdef CONFIG_HIST_TRIGGERS_DEBUG
   2457	trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file,
   2458			  &event_hist_debug_fops);
   2459#endif
   2460	trace_create_file("format", TRACE_MODE_READ, file->dir, call,
   2461			  &ftrace_event_format_fops);
   2462
   2463#ifdef CONFIG_TRACE_EVENT_INJECT
   2464	if (call->event.type && call->class->reg)
   2465		trace_create_file("inject", 0200, file->dir, file,
   2466				  &event_inject_fops);
   2467#endif
   2468
   2469	return 0;
   2470}
   2471
   2472static void remove_event_from_tracers(struct trace_event_call *call)
   2473{
   2474	struct trace_event_file *file;
   2475	struct trace_array *tr;
   2476
   2477	do_for_each_event_file_safe(tr, file) {
   2478		if (file->event_call != call)
   2479			continue;
   2480
   2481		remove_event_file_dir(file);
   2482		/*
   2483		 * The do_for_each_event_file_safe() is
   2484		 * a double loop. After finding the call for this
   2485		 * trace_array, we use break to jump to the next
   2486		 * trace_array.
   2487		 */
   2488		break;
   2489	} while_for_each_event_file();
   2490}
   2491
   2492static void event_remove(struct trace_event_call *call)
   2493{
   2494	struct trace_array *tr;
   2495	struct trace_event_file *file;
   2496
   2497	do_for_each_event_file(tr, file) {
   2498		if (file->event_call != call)
   2499			continue;
   2500
   2501		if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
   2502			tr->clear_trace = true;
   2503
   2504		ftrace_event_enable_disable(file, 0);
   2505		/*
   2506		 * The do_for_each_event_file() is
   2507		 * a double loop. After finding the call for this
   2508		 * trace_array, we use break to jump to the next
   2509		 * trace_array.
   2510		 */
   2511		break;
   2512	} while_for_each_event_file();
   2513
   2514	if (call->event.funcs)
   2515		__unregister_trace_event(&call->event);
   2516	remove_event_from_tracers(call);
   2517	list_del(&call->list);
   2518}
   2519
   2520static int event_init(struct trace_event_call *call)
   2521{
   2522	int ret = 0;
   2523	const char *name;
   2524
   2525	name = trace_event_name(call);
   2526	if (WARN_ON(!name))
   2527		return -EINVAL;
   2528
   2529	if (call->class->raw_init) {
   2530		ret = call->class->raw_init(call);
   2531		if (ret < 0 && ret != -ENOSYS)
   2532			pr_warn("Could not initialize trace events/%s\n", name);
   2533	}
   2534
   2535	return ret;
   2536}
   2537
   2538static int
   2539__register_event(struct trace_event_call *call, struct module *mod)
   2540{
   2541	int ret;
   2542
   2543	ret = event_init(call);
   2544	if (ret < 0)
   2545		return ret;
   2546
   2547	list_add(&call->list, &ftrace_events);
   2548	if (call->flags & TRACE_EVENT_FL_DYNAMIC)
   2549		atomic_set(&call->refcnt, 0);
   2550	else
   2551		call->module = mod;
   2552
   2553	return 0;
   2554}
   2555
   2556static char *eval_replace(char *ptr, struct trace_eval_map *map, int len)
   2557{
   2558	int rlen;
   2559	int elen;
   2560
   2561	/* Find the length of the eval value as a string */
   2562	elen = snprintf(ptr, 0, "%ld", map->eval_value);
   2563	/* Make sure there's enough room to replace the string with the value */
   2564	if (len < elen)
   2565		return NULL;
   2566
   2567	snprintf(ptr, elen + 1, "%ld", map->eval_value);
   2568
   2569	/* Get the rest of the string of ptr */
   2570	rlen = strlen(ptr + len);
   2571	memmove(ptr + elen, ptr + len, rlen);
   2572	/* Make sure we end the new string */
   2573	ptr[elen + rlen] = 0;
   2574
   2575	return ptr + elen;
   2576}
   2577
   2578static void update_event_printk(struct trace_event_call *call,
   2579				struct trace_eval_map *map)
   2580{
   2581	char *ptr;
   2582	int quote = 0;
   2583	int len = strlen(map->eval_string);
   2584
   2585	for (ptr = call->print_fmt; *ptr; ptr++) {
   2586		if (*ptr == '\\') {
   2587			ptr++;
   2588			/* paranoid */
   2589			if (!*ptr)
   2590				break;
   2591			continue;
   2592		}
   2593		if (*ptr == '"') {
   2594			quote ^= 1;
   2595			continue;
   2596		}
   2597		if (quote)
   2598			continue;
   2599		if (isdigit(*ptr)) {
   2600			/* skip numbers */
   2601			do {
   2602				ptr++;
   2603				/* Check for alpha chars like ULL */
   2604			} while (isalnum(*ptr));
   2605			if (!*ptr)
   2606				break;
   2607			/*
   2608			 * A number must have some kind of delimiter after
   2609			 * it, and we can ignore that too.
   2610			 */
   2611			continue;
   2612		}
   2613		if (isalpha(*ptr) || *ptr == '_') {
   2614			if (strncmp(map->eval_string, ptr, len) == 0 &&
   2615			    !isalnum(ptr[len]) && ptr[len] != '_') {
   2616				ptr = eval_replace(ptr, map, len);
   2617				/* enum/sizeof string smaller than value */
   2618				if (WARN_ON_ONCE(!ptr))
   2619					return;
   2620				/*
   2621				 * No need to decrement here, as eval_replace()
   2622				 * returns the pointer to the character passed
   2623				 * the eval, and two evals can not be placed
   2624				 * back to back without something in between.
   2625				 * We can skip that something in between.
   2626				 */
   2627				continue;
   2628			}
   2629		skip_more:
   2630			do {
   2631				ptr++;
   2632			} while (isalnum(*ptr) || *ptr == '_');
   2633			if (!*ptr)
   2634				break;
   2635			/*
   2636			 * If what comes after this variable is a '.' or
   2637			 * '->' then we can continue to ignore that string.
   2638			 */
   2639			if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
   2640				ptr += *ptr == '.' ? 1 : 2;
   2641				if (!*ptr)
   2642					break;
   2643				goto skip_more;
   2644			}
   2645			/*
   2646			 * Once again, we can skip the delimiter that came
   2647			 * after the string.
   2648			 */
   2649			continue;
   2650		}
   2651	}
   2652}
   2653
   2654static void add_str_to_module(struct module *module, char *str)
   2655{
   2656	struct module_string *modstr;
   2657
   2658	modstr = kmalloc(sizeof(*modstr), GFP_KERNEL);
   2659
   2660	/*
   2661	 * If we failed to allocate memory here, then we'll just
   2662	 * let the str memory leak when the module is removed.
   2663	 * If this fails to allocate, there's worse problems than
   2664	 * a leaked string on module removal.
   2665	 */
   2666	if (WARN_ON_ONCE(!modstr))
   2667		return;
   2668
   2669	modstr->module = module;
   2670	modstr->str = str;
   2671
   2672	list_add(&modstr->next, &module_strings);
   2673}
   2674
   2675static void update_event_fields(struct trace_event_call *call,
   2676				struct trace_eval_map *map)
   2677{
   2678	struct ftrace_event_field *field;
   2679	struct list_head *head;
   2680	char *ptr;
   2681	char *str;
   2682	int len = strlen(map->eval_string);
   2683
   2684	/* Dynamic events should never have field maps */
   2685	if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC))
   2686		return;
   2687
   2688	head = trace_get_fields(call);
   2689	list_for_each_entry(field, head, link) {
   2690		ptr = strchr(field->type, '[');
   2691		if (!ptr)
   2692			continue;
   2693		ptr++;
   2694
   2695		if (!isalpha(*ptr) && *ptr != '_')
   2696			continue;
   2697
   2698		if (strncmp(map->eval_string, ptr, len) != 0)
   2699			continue;
   2700
   2701		str = kstrdup(field->type, GFP_KERNEL);
   2702		if (WARN_ON_ONCE(!str))
   2703			return;
   2704		ptr = str + (ptr - field->type);
   2705		ptr = eval_replace(ptr, map, len);
   2706		/* enum/sizeof string smaller than value */
   2707		if (WARN_ON_ONCE(!ptr)) {
   2708			kfree(str);
   2709			continue;
   2710		}
   2711
   2712		/*
   2713		 * If the event is part of a module, then we need to free the string
   2714		 * when the module is removed. Otherwise, it will stay allocated
   2715		 * until a reboot.
   2716		 */
   2717		if (call->module)
   2718			add_str_to_module(call->module, str);
   2719
   2720		field->type = str;
   2721	}
   2722}
   2723
   2724void trace_event_eval_update(struct trace_eval_map **map, int len)
   2725{
   2726	struct trace_event_call *call, *p;
   2727	const char *last_system = NULL;
   2728	bool first = false;
   2729	int last_i;
   2730	int i;
   2731
   2732	down_write(&trace_event_sem);
   2733	list_for_each_entry_safe(call, p, &ftrace_events, list) {
   2734		/* events are usually grouped together with systems */
   2735		if (!last_system || call->class->system != last_system) {
   2736			first = true;
   2737			last_i = 0;
   2738			last_system = call->class->system;
   2739		}
   2740
   2741		/*
   2742		 * Since calls are grouped by systems, the likelihood that the
   2743		 * next call in the iteration belongs to the same system as the
   2744		 * previous call is high. As an optimization, we skip searching
   2745		 * for a map[] that matches the call's system if the last call
   2746		 * was from the same system. That's what last_i is for. If the
   2747		 * call has the same system as the previous call, then last_i
   2748		 * will be the index of the first map[] that has a matching
   2749		 * system.
   2750		 */
   2751		for (i = last_i; i < len; i++) {
   2752			if (call->class->system == map[i]->system) {
   2753				/* Save the first system if need be */
   2754				if (first) {
   2755					last_i = i;
   2756					first = false;
   2757				}
   2758				update_event_printk(call, map[i]);
   2759				update_event_fields(call, map[i]);
   2760			}
   2761		}
   2762	}
   2763	up_write(&trace_event_sem);
   2764}
   2765
   2766static struct trace_event_file *
   2767trace_create_new_event(struct trace_event_call *call,
   2768		       struct trace_array *tr)
   2769{
   2770	struct trace_pid_list *no_pid_list;
   2771	struct trace_pid_list *pid_list;
   2772	struct trace_event_file *file;
   2773	unsigned int first;
   2774
   2775	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
   2776	if (!file)
   2777		return NULL;
   2778
   2779	pid_list = rcu_dereference_protected(tr->filtered_pids,
   2780					     lockdep_is_held(&event_mutex));
   2781	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
   2782					     lockdep_is_held(&event_mutex));
   2783
   2784	if (!trace_pid_list_first(pid_list, &first) ||
   2785	    !trace_pid_list_first(no_pid_list, &first))
   2786		file->flags |= EVENT_FILE_FL_PID_FILTER;
   2787
   2788	file->event_call = call;
   2789	file->tr = tr;
   2790	atomic_set(&file->sm_ref, 0);
   2791	atomic_set(&file->tm_ref, 0);
   2792	INIT_LIST_HEAD(&file->triggers);
   2793	list_add(&file->list, &tr->events);
   2794
   2795	return file;
   2796}
   2797
   2798/* Add an event to a trace directory */
   2799static int
   2800__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
   2801{
   2802	struct trace_event_file *file;
   2803
   2804	file = trace_create_new_event(call, tr);
   2805	if (!file)
   2806		return -ENOMEM;
   2807
   2808	if (eventdir_initialized)
   2809		return event_create_dir(tr->event_dir, file);
   2810	else
   2811		return event_define_fields(call);
   2812}
   2813
   2814/*
   2815 * Just create a descriptor for early init. A descriptor is required
   2816 * for enabling events at boot. We want to enable events before
   2817 * the filesystem is initialized.
   2818 */
   2819static int
   2820__trace_early_add_new_event(struct trace_event_call *call,
   2821			    struct trace_array *tr)
   2822{
   2823	struct trace_event_file *file;
   2824
   2825	file = trace_create_new_event(call, tr);
   2826	if (!file)
   2827		return -ENOMEM;
   2828
   2829	return event_define_fields(call);
   2830}
   2831
   2832struct ftrace_module_file_ops;
   2833static void __add_event_to_tracers(struct trace_event_call *call);
   2834
   2835/* Add an additional event_call dynamically */
   2836int trace_add_event_call(struct trace_event_call *call)
   2837{
   2838	int ret;
   2839	lockdep_assert_held(&event_mutex);
   2840
   2841	mutex_lock(&trace_types_lock);
   2842
   2843	ret = __register_event(call, NULL);
   2844	if (ret >= 0)
   2845		__add_event_to_tracers(call);
   2846
   2847	mutex_unlock(&trace_types_lock);
   2848	return ret;
   2849}
   2850EXPORT_SYMBOL_GPL(trace_add_event_call);
   2851
   2852/*
   2853 * Must be called under locking of trace_types_lock, event_mutex and
   2854 * trace_event_sem.
   2855 */
   2856static void __trace_remove_event_call(struct trace_event_call *call)
   2857{
   2858	event_remove(call);
   2859	trace_destroy_fields(call);
   2860	free_event_filter(call->filter);
   2861	call->filter = NULL;
   2862}
   2863
   2864static int probe_remove_event_call(struct trace_event_call *call)
   2865{
   2866	struct trace_array *tr;
   2867	struct trace_event_file *file;
   2868
   2869#ifdef CONFIG_PERF_EVENTS
   2870	if (call->perf_refcount)
   2871		return -EBUSY;
   2872#endif
   2873	do_for_each_event_file(tr, file) {
   2874		if (file->event_call != call)
   2875			continue;
   2876		/*
   2877		 * We can't rely on ftrace_event_enable_disable(enable => 0)
   2878		 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
   2879		 * TRACE_REG_UNREGISTER.
   2880		 */
   2881		if (file->flags & EVENT_FILE_FL_ENABLED)
   2882			return -EBUSY;
   2883		/*
   2884		 * The do_for_each_event_file_safe() is
   2885		 * a double loop. After finding the call for this
   2886		 * trace_array, we use break to jump to the next
   2887		 * trace_array.
   2888		 */
   2889		break;
   2890	} while_for_each_event_file();
   2891
   2892	__trace_remove_event_call(call);
   2893
   2894	return 0;
   2895}
   2896
   2897/* Remove an event_call */
   2898int trace_remove_event_call(struct trace_event_call *call)
   2899{
   2900	int ret;
   2901
   2902	lockdep_assert_held(&event_mutex);
   2903
   2904	mutex_lock(&trace_types_lock);
   2905	down_write(&trace_event_sem);
   2906	ret = probe_remove_event_call(call);
   2907	up_write(&trace_event_sem);
   2908	mutex_unlock(&trace_types_lock);
   2909
   2910	return ret;
   2911}
   2912EXPORT_SYMBOL_GPL(trace_remove_event_call);
   2913
   2914#define for_each_event(event, start, end)			\
   2915	for (event = start;					\
   2916	     (unsigned long)event < (unsigned long)end;		\
   2917	     event++)
   2918
   2919#ifdef CONFIG_MODULES
   2920
   2921static void trace_module_add_events(struct module *mod)
   2922{
   2923	struct trace_event_call **call, **start, **end;
   2924
   2925	if (!mod->num_trace_events)
   2926		return;
   2927
   2928	/* Don't add infrastructure for mods without tracepoints */
   2929	if (trace_module_has_bad_taint(mod)) {
   2930		pr_err("%s: module has bad taint, not creating trace events\n",
   2931		       mod->name);
   2932		return;
   2933	}
   2934
   2935	start = mod->trace_events;
   2936	end = mod->trace_events + mod->num_trace_events;
   2937
   2938	for_each_event(call, start, end) {
   2939		__register_event(*call, mod);
   2940		__add_event_to_tracers(*call);
   2941	}
   2942}
   2943
   2944static void trace_module_remove_events(struct module *mod)
   2945{
   2946	struct trace_event_call *call, *p;
   2947	struct module_string *modstr, *m;
   2948
   2949	down_write(&trace_event_sem);
   2950	list_for_each_entry_safe(call, p, &ftrace_events, list) {
   2951		if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
   2952			continue;
   2953		if (call->module == mod)
   2954			__trace_remove_event_call(call);
   2955	}
   2956	/* Check for any strings allocade for this module */
   2957	list_for_each_entry_safe(modstr, m, &module_strings, next) {
   2958		if (modstr->module != mod)
   2959			continue;
   2960		list_del(&modstr->next);
   2961		kfree(modstr->str);
   2962		kfree(modstr);
   2963	}
   2964	up_write(&trace_event_sem);
   2965
   2966	/*
   2967	 * It is safest to reset the ring buffer if the module being unloaded
   2968	 * registered any events that were used. The only worry is if
   2969	 * a new module gets loaded, and takes on the same id as the events
   2970	 * of this module. When printing out the buffer, traced events left
   2971	 * over from this module may be passed to the new module events and
   2972	 * unexpected results may occur.
   2973	 */
   2974	tracing_reset_all_online_cpus();
   2975}
   2976
   2977static int trace_module_notify(struct notifier_block *self,
   2978			       unsigned long val, void *data)
   2979{
   2980	struct module *mod = data;
   2981
   2982	mutex_lock(&event_mutex);
   2983	mutex_lock(&trace_types_lock);
   2984	switch (val) {
   2985	case MODULE_STATE_COMING:
   2986		trace_module_add_events(mod);
   2987		break;
   2988	case MODULE_STATE_GOING:
   2989		trace_module_remove_events(mod);
   2990		break;
   2991	}
   2992	mutex_unlock(&trace_types_lock);
   2993	mutex_unlock(&event_mutex);
   2994
   2995	return NOTIFY_OK;
   2996}
   2997
   2998static struct notifier_block trace_module_nb = {
   2999	.notifier_call = trace_module_notify,
   3000	.priority = 1, /* higher than trace.c module notify */
   3001};
   3002#endif /* CONFIG_MODULES */
   3003
   3004/* Create a new event directory structure for a trace directory. */
   3005static void
   3006__trace_add_event_dirs(struct trace_array *tr)
   3007{
   3008	struct trace_event_call *call;
   3009	int ret;
   3010
   3011	list_for_each_entry(call, &ftrace_events, list) {
   3012		ret = __trace_add_new_event(call, tr);
   3013		if (ret < 0)
   3014			pr_warn("Could not create directory for event %s\n",
   3015				trace_event_name(call));
   3016	}
   3017}
   3018
   3019/* Returns any file that matches the system and event */
   3020struct trace_event_file *
   3021__find_event_file(struct trace_array *tr, const char *system, const char *event)
   3022{
   3023	struct trace_event_file *file;
   3024	struct trace_event_call *call;
   3025	const char *name;
   3026
   3027	list_for_each_entry(file, &tr->events, list) {
   3028
   3029		call = file->event_call;
   3030		name = trace_event_name(call);
   3031
   3032		if (!name || !call->class)
   3033			continue;
   3034
   3035		if (strcmp(event, name) == 0 &&
   3036		    strcmp(system, call->class->system) == 0)
   3037			return file;
   3038	}
   3039	return NULL;
   3040}
   3041
   3042/* Returns valid trace event files that match system and event */
   3043struct trace_event_file *
   3044find_event_file(struct trace_array *tr, const char *system, const char *event)
   3045{
   3046	struct trace_event_file *file;
   3047
   3048	file = __find_event_file(tr, system, event);
   3049	if (!file || !file->event_call->class->reg ||
   3050	    file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
   3051		return NULL;
   3052
   3053	return file;
   3054}
   3055
   3056/**
   3057 * trace_get_event_file - Find and return a trace event file
   3058 * @instance: The name of the trace instance containing the event
   3059 * @system: The name of the system containing the event
   3060 * @event: The name of the event
   3061 *
   3062 * Return a trace event file given the trace instance name, trace
   3063 * system, and trace event name.  If the instance name is NULL, it
   3064 * refers to the top-level trace array.
   3065 *
   3066 * This function will look it up and return it if found, after calling
   3067 * trace_array_get() to prevent the instance from going away, and
   3068 * increment the event's module refcount to prevent it from being
   3069 * removed.
   3070 *
   3071 * To release the file, call trace_put_event_file(), which will call
   3072 * trace_array_put() and decrement the event's module refcount.
   3073 *
   3074 * Return: The trace event on success, ERR_PTR otherwise.
   3075 */
   3076struct trace_event_file *trace_get_event_file(const char *instance,
   3077					      const char *system,
   3078					      const char *event)
   3079{
   3080	struct trace_array *tr = top_trace_array();
   3081	struct trace_event_file *file = NULL;
   3082	int ret = -EINVAL;
   3083
   3084	if (instance) {
   3085		tr = trace_array_find_get(instance);
   3086		if (!tr)
   3087			return ERR_PTR(-ENOENT);
   3088	} else {
   3089		ret = trace_array_get(tr);
   3090		if (ret)
   3091			return ERR_PTR(ret);
   3092	}
   3093
   3094	mutex_lock(&event_mutex);
   3095
   3096	file = find_event_file(tr, system, event);
   3097	if (!file) {
   3098		trace_array_put(tr);
   3099		ret = -EINVAL;
   3100		goto out;
   3101	}
   3102
   3103	/* Don't let event modules unload while in use */
   3104	ret = trace_event_try_get_ref(file->event_call);
   3105	if (!ret) {
   3106		trace_array_put(tr);
   3107		ret = -EBUSY;
   3108		goto out;
   3109	}
   3110
   3111	ret = 0;
   3112 out:
   3113	mutex_unlock(&event_mutex);
   3114
   3115	if (ret)
   3116		file = ERR_PTR(ret);
   3117
   3118	return file;
   3119}
   3120EXPORT_SYMBOL_GPL(trace_get_event_file);
   3121
   3122/**
   3123 * trace_put_event_file - Release a file from trace_get_event_file()
   3124 * @file: The trace event file
   3125 *
   3126 * If a file was retrieved using trace_get_event_file(), this should
   3127 * be called when it's no longer needed.  It will cancel the previous
   3128 * trace_array_get() called by that function, and decrement the
   3129 * event's module refcount.
   3130 */
   3131void trace_put_event_file(struct trace_event_file *file)
   3132{
   3133	mutex_lock(&event_mutex);
   3134	trace_event_put_ref(file->event_call);
   3135	mutex_unlock(&event_mutex);
   3136
   3137	trace_array_put(file->tr);
   3138}
   3139EXPORT_SYMBOL_GPL(trace_put_event_file);
   3140
   3141#ifdef CONFIG_DYNAMIC_FTRACE
   3142
   3143/* Avoid typos */
   3144#define ENABLE_EVENT_STR	"enable_event"
   3145#define DISABLE_EVENT_STR	"disable_event"
   3146
   3147struct event_probe_data {
   3148	struct trace_event_file	*file;
   3149	unsigned long			count;
   3150	int				ref;
   3151	bool				enable;
   3152};
   3153
   3154static void update_event_probe(struct event_probe_data *data)
   3155{
   3156	if (data->enable)
   3157		clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
   3158	else
   3159		set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
   3160}
   3161
   3162static void
   3163event_enable_probe(unsigned long ip, unsigned long parent_ip,
   3164		   struct trace_array *tr, struct ftrace_probe_ops *ops,
   3165		   void *data)
   3166{
   3167	struct ftrace_func_mapper *mapper = data;
   3168	struct event_probe_data *edata;
   3169	void **pdata;
   3170
   3171	pdata = ftrace_func_mapper_find_ip(mapper, ip);
   3172	if (!pdata || !*pdata)
   3173		return;
   3174
   3175	edata = *pdata;
   3176	update_event_probe(edata);
   3177}
   3178
   3179static void
   3180event_enable_count_probe(unsigned long ip, unsigned long parent_ip,
   3181			 struct trace_array *tr, struct ftrace_probe_ops *ops,
   3182			 void *data)
   3183{
   3184	struct ftrace_func_mapper *mapper = data;
   3185	struct event_probe_data *edata;
   3186	void **pdata;
   3187
   3188	pdata = ftrace_func_mapper_find_ip(mapper, ip);
   3189	if (!pdata || !*pdata)
   3190		return;
   3191
   3192	edata = *pdata;
   3193
   3194	if (!edata->count)
   3195		return;
   3196
   3197	/* Skip if the event is in a state we want to switch to */
   3198	if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
   3199		return;
   3200
   3201	if (edata->count != -1)
   3202		(edata->count)--;
   3203
   3204	update_event_probe(edata);
   3205}
   3206
   3207static int
   3208event_enable_print(struct seq_file *m, unsigned long ip,
   3209		   struct ftrace_probe_ops *ops, void *data)
   3210{
   3211	struct ftrace_func_mapper *mapper = data;
   3212	struct event_probe_data *edata;
   3213	void **pdata;
   3214
   3215	pdata = ftrace_func_mapper_find_ip(mapper, ip);
   3216
   3217	if (WARN_ON_ONCE(!pdata || !*pdata))
   3218		return 0;
   3219
   3220	edata = *pdata;
   3221
   3222	seq_printf(m, "%ps:", (void *)ip);
   3223
   3224	seq_printf(m, "%s:%s:%s",
   3225		   edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
   3226		   edata->file->event_call->class->system,
   3227		   trace_event_name(edata->file->event_call));
   3228
   3229	if (edata->count == -1)
   3230		seq_puts(m, ":unlimited\n");
   3231	else
   3232		seq_printf(m, ":count=%ld\n", edata->count);
   3233
   3234	return 0;
   3235}
   3236
   3237static int
   3238event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
   3239		  unsigned long ip, void *init_data, void **data)
   3240{
   3241	struct ftrace_func_mapper *mapper = *data;
   3242	struct event_probe_data *edata = init_data;
   3243	int ret;
   3244
   3245	if (!mapper) {
   3246		mapper = allocate_ftrace_func_mapper();
   3247		if (!mapper)
   3248			return -ENODEV;
   3249		*data = mapper;
   3250	}
   3251
   3252	ret = ftrace_func_mapper_add_ip(mapper, ip, edata);
   3253	if (ret < 0)
   3254		return ret;
   3255
   3256	edata->ref++;
   3257
   3258	return 0;
   3259}
   3260
   3261static int free_probe_data(void *data)
   3262{
   3263	struct event_probe_data *edata = data;
   3264
   3265	edata->ref--;
   3266	if (!edata->ref) {
   3267		/* Remove the SOFT_MODE flag */
   3268		__ftrace_event_enable_disable(edata->file, 0, 1);
   3269		trace_event_put_ref(edata->file->event_call);
   3270		kfree(edata);
   3271	}
   3272	return 0;
   3273}
   3274
   3275static void
   3276event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
   3277		  unsigned long ip, void *data)
   3278{
   3279	struct ftrace_func_mapper *mapper = data;
   3280	struct event_probe_data *edata;
   3281
   3282	if (!ip) {
   3283		if (!mapper)
   3284			return;
   3285		free_ftrace_func_mapper(mapper, free_probe_data);
   3286		return;
   3287	}
   3288
   3289	edata = ftrace_func_mapper_remove_ip(mapper, ip);
   3290
   3291	if (WARN_ON_ONCE(!edata))
   3292		return;
   3293
   3294	if (WARN_ON_ONCE(edata->ref <= 0))
   3295		return;
   3296
   3297	free_probe_data(edata);
   3298}
   3299
   3300static struct ftrace_probe_ops event_enable_probe_ops = {
   3301	.func			= event_enable_probe,
   3302	.print			= event_enable_print,
   3303	.init			= event_enable_init,
   3304	.free			= event_enable_free,
   3305};
   3306
   3307static struct ftrace_probe_ops event_enable_count_probe_ops = {
   3308	.func			= event_enable_count_probe,
   3309	.print			= event_enable_print,
   3310	.init			= event_enable_init,
   3311	.free			= event_enable_free,
   3312};
   3313
   3314static struct ftrace_probe_ops event_disable_probe_ops = {
   3315	.func			= event_enable_probe,
   3316	.print			= event_enable_print,
   3317	.init			= event_enable_init,
   3318	.free			= event_enable_free,
   3319};
   3320
   3321static struct ftrace_probe_ops event_disable_count_probe_ops = {
   3322	.func			= event_enable_count_probe,
   3323	.print			= event_enable_print,
   3324	.init			= event_enable_init,
   3325	.free			= event_enable_free,
   3326};
   3327
   3328static int
   3329event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
   3330		  char *glob, char *cmd, char *param, int enabled)
   3331{
   3332	struct trace_event_file *file;
   3333	struct ftrace_probe_ops *ops;
   3334	struct event_probe_data *data;
   3335	const char *system;
   3336	const char *event;
   3337	char *number;
   3338	bool enable;
   3339	int ret;
   3340
   3341	if (!tr)
   3342		return -ENODEV;
   3343
   3344	/* hash funcs only work with set_ftrace_filter */
   3345	if (!enabled || !param)
   3346		return -EINVAL;
   3347
   3348	system = strsep(&param, ":");
   3349	if (!param)
   3350		return -EINVAL;
   3351
   3352	event = strsep(&param, ":");
   3353
   3354	mutex_lock(&event_mutex);
   3355
   3356	ret = -EINVAL;
   3357	file = find_event_file(tr, system, event);
   3358	if (!file)
   3359		goto out;
   3360
   3361	enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
   3362
   3363	if (enable)
   3364		ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
   3365	else
   3366		ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
   3367
   3368	if (glob[0] == '!') {
   3369		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
   3370		goto out;
   3371	}
   3372
   3373	ret = -ENOMEM;
   3374
   3375	data = kzalloc(sizeof(*data), GFP_KERNEL);
   3376	if (!data)
   3377		goto out;
   3378
   3379	data->enable = enable;
   3380	data->count = -1;
   3381	data->file = file;
   3382
   3383	if (!param)
   3384		goto out_reg;
   3385
   3386	number = strsep(&param, ":");
   3387
   3388	ret = -EINVAL;
   3389	if (!strlen(number))
   3390		goto out_free;
   3391
   3392	/*
   3393	 * We use the callback data field (which is a pointer)
   3394	 * as our counter.
   3395	 */
   3396	ret = kstrtoul(number, 0, &data->count);
   3397	if (ret)
   3398		goto out_free;
   3399
   3400 out_reg:
   3401	/* Don't let event modules unload while probe registered */
   3402	ret = trace_event_try_get_ref(file->event_call);
   3403	if (!ret) {
   3404		ret = -EBUSY;
   3405		goto out_free;
   3406	}
   3407
   3408	ret = __ftrace_event_enable_disable(file, 1, 1);
   3409	if (ret < 0)
   3410		goto out_put;
   3411
   3412	ret = register_ftrace_function_probe(glob, tr, ops, data);
   3413	/*
   3414	 * The above returns on success the # of functions enabled,
   3415	 * but if it didn't find any functions it returns zero.
   3416	 * Consider no functions a failure too.
   3417	 */
   3418	if (!ret) {
   3419		ret = -ENOENT;
   3420		goto out_disable;
   3421	} else if (ret < 0)
   3422		goto out_disable;
   3423	/* Just return zero, not the number of enabled functions */
   3424	ret = 0;
   3425 out:
   3426	mutex_unlock(&event_mutex);
   3427	return ret;
   3428
   3429 out_disable:
   3430	__ftrace_event_enable_disable(file, 0, 1);
   3431 out_put:
   3432	trace_event_put_ref(file->event_call);
   3433 out_free:
   3434	kfree(data);
   3435	goto out;
   3436}
   3437
   3438static struct ftrace_func_command event_enable_cmd = {
   3439	.name			= ENABLE_EVENT_STR,
   3440	.func			= event_enable_func,
   3441};
   3442
   3443static struct ftrace_func_command event_disable_cmd = {
   3444	.name			= DISABLE_EVENT_STR,
   3445	.func			= event_enable_func,
   3446};
   3447
   3448static __init int register_event_cmds(void)
   3449{
   3450	int ret;
   3451
   3452	ret = register_ftrace_command(&event_enable_cmd);
   3453	if (WARN_ON(ret < 0))
   3454		return ret;
   3455	ret = register_ftrace_command(&event_disable_cmd);
   3456	if (WARN_ON(ret < 0))
   3457		unregister_ftrace_command(&event_enable_cmd);
   3458	return ret;
   3459}
   3460#else
   3461static inline int register_event_cmds(void) { return 0; }
   3462#endif /* CONFIG_DYNAMIC_FTRACE */
   3463
   3464/*
   3465 * The top level array and trace arrays created by boot-time tracing
   3466 * have already had its trace_event_file descriptors created in order
   3467 * to allow for early events to be recorded.
   3468 * This function is called after the tracefs has been initialized,
   3469 * and we now have to create the files associated to the events.
   3470 */
   3471static void __trace_early_add_event_dirs(struct trace_array *tr)
   3472{
   3473	struct trace_event_file *file;
   3474	int ret;
   3475
   3476
   3477	list_for_each_entry(file, &tr->events, list) {
   3478		ret = event_create_dir(tr->event_dir, file);
   3479		if (ret < 0)
   3480			pr_warn("Could not create directory for event %s\n",
   3481				trace_event_name(file->event_call));
   3482	}
   3483}
   3484
   3485/*
   3486 * For early boot up, the top trace array and the trace arrays created
   3487 * by boot-time tracing require to have a list of events that can be
   3488 * enabled. This must be done before the filesystem is set up in order
   3489 * to allow events to be traced early.
   3490 */
   3491void __trace_early_add_events(struct trace_array *tr)
   3492{
   3493	struct trace_event_call *call;
   3494	int ret;
   3495
   3496	list_for_each_entry(call, &ftrace_events, list) {
   3497		/* Early boot up should not have any modules loaded */
   3498		if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
   3499		    WARN_ON_ONCE(call->module))
   3500			continue;
   3501
   3502		ret = __trace_early_add_new_event(call, tr);
   3503		if (ret < 0)
   3504			pr_warn("Could not create early event %s\n",
   3505				trace_event_name(call));
   3506	}
   3507}
   3508
   3509/* Remove the event directory structure for a trace directory. */
   3510static void
   3511__trace_remove_event_dirs(struct trace_array *tr)
   3512{
   3513	struct trace_event_file *file, *next;
   3514
   3515	list_for_each_entry_safe(file, next, &tr->events, list)
   3516		remove_event_file_dir(file);
   3517}
   3518
   3519static void __add_event_to_tracers(struct trace_event_call *call)
   3520{
   3521	struct trace_array *tr;
   3522
   3523	list_for_each_entry(tr, &ftrace_trace_arrays, list)
   3524		__trace_add_new_event(call, tr);
   3525}
   3526
   3527extern struct trace_event_call *__start_ftrace_events[];
   3528extern struct trace_event_call *__stop_ftrace_events[];
   3529
   3530static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
   3531
   3532static __init int setup_trace_event(char *str)
   3533{
   3534	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
   3535	ring_buffer_expanded = true;
   3536	disable_tracing_selftest("running event tracing");
   3537
   3538	return 1;
   3539}
   3540__setup("trace_event=", setup_trace_event);
   3541
   3542/* Expects to have event_mutex held when called */
   3543static int
   3544create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
   3545{
   3546	struct dentry *d_events;
   3547	struct dentry *entry;
   3548
   3549	entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
   3550				  tr, &ftrace_set_event_fops);
   3551	if (!entry)
   3552		return -ENOMEM;
   3553
   3554	d_events = tracefs_create_dir("events", parent);
   3555	if (!d_events) {
   3556		pr_warn("Could not create tracefs 'events' directory\n");
   3557		return -ENOMEM;
   3558	}
   3559
   3560	entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events,
   3561				  tr, &ftrace_tr_enable_fops);
   3562	if (!entry)
   3563		return -ENOMEM;
   3564
   3565	/* There are not as crucial, just warn if they are not created */
   3566
   3567	trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
   3568			  tr, &ftrace_set_event_pid_fops);
   3569
   3570	trace_create_file("set_event_notrace_pid",
   3571			  TRACE_MODE_WRITE, parent, tr,
   3572			  &ftrace_set_event_notrace_pid_fops);
   3573
   3574	/* ring buffer internal formats */
   3575	trace_create_file("header_page", TRACE_MODE_READ, d_events,
   3576				  ring_buffer_print_page_header,
   3577				  &ftrace_show_header_fops);
   3578
   3579	trace_create_file("header_event", TRACE_MODE_READ, d_events,
   3580				  ring_buffer_print_entry_header,
   3581				  &ftrace_show_header_fops);
   3582
   3583	tr->event_dir = d_events;
   3584
   3585	return 0;
   3586}
   3587
   3588/**
   3589 * event_trace_add_tracer - add a instance of a trace_array to events
   3590 * @parent: The parent dentry to place the files/directories for events in
   3591 * @tr: The trace array associated with these events
   3592 *
   3593 * When a new instance is created, it needs to set up its events
   3594 * directory, as well as other files associated with events. It also
   3595 * creates the event hierarchy in the @parent/events directory.
   3596 *
   3597 * Returns 0 on success.
   3598 *
   3599 * Must be called with event_mutex held.
   3600 */
   3601int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
   3602{
   3603	int ret;
   3604
   3605	lockdep_assert_held(&event_mutex);
   3606
   3607	ret = create_event_toplevel_files(parent, tr);
   3608	if (ret)
   3609		goto out;
   3610
   3611	down_write(&trace_event_sem);
   3612	/* If tr already has the event list, it is initialized in early boot. */
   3613	if (unlikely(!list_empty(&tr->events)))
   3614		__trace_early_add_event_dirs(tr);
   3615	else
   3616		__trace_add_event_dirs(tr);
   3617	up_write(&trace_event_sem);
   3618
   3619 out:
   3620	return ret;
   3621}
   3622
   3623/*
   3624 * The top trace array already had its file descriptors created.
   3625 * Now the files themselves need to be created.
   3626 */
   3627static __init int
   3628early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
   3629{
   3630	int ret;
   3631
   3632	mutex_lock(&event_mutex);
   3633
   3634	ret = create_event_toplevel_files(parent, tr);
   3635	if (ret)
   3636		goto out_unlock;
   3637
   3638	down_write(&trace_event_sem);
   3639	__trace_early_add_event_dirs(tr);
   3640	up_write(&trace_event_sem);
   3641
   3642 out_unlock:
   3643	mutex_unlock(&event_mutex);
   3644
   3645	return ret;
   3646}
   3647
   3648/* Must be called with event_mutex held */
   3649int event_trace_del_tracer(struct trace_array *tr)
   3650{
   3651	lockdep_assert_held(&event_mutex);
   3652
   3653	/* Disable any event triggers and associated soft-disabled events */
   3654	clear_event_triggers(tr);
   3655
   3656	/* Clear the pid list */
   3657	__ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
   3658
   3659	/* Disable any running events */
   3660	__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
   3661
   3662	/* Make sure no more events are being executed */
   3663	tracepoint_synchronize_unregister();
   3664
   3665	down_write(&trace_event_sem);
   3666	__trace_remove_event_dirs(tr);
   3667	tracefs_remove(tr->event_dir);
   3668	up_write(&trace_event_sem);
   3669
   3670	tr->event_dir = NULL;
   3671
   3672	return 0;
   3673}
   3674
   3675static __init int event_trace_memsetup(void)
   3676{
   3677	field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
   3678	file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
   3679	return 0;
   3680}
   3681
   3682static __init void
   3683early_enable_events(struct trace_array *tr, bool disable_first)
   3684{
   3685	char *buf = bootup_event_buf;
   3686	char *token;
   3687	int ret;
   3688
   3689	while (true) {
   3690		token = strsep(&buf, ",");
   3691
   3692		if (!token)
   3693			break;
   3694
   3695		if (*token) {
   3696			/* Restarting syscalls requires that we stop them first */
   3697			if (disable_first)
   3698				ftrace_set_clr_event(tr, token, 0);
   3699
   3700			ret = ftrace_set_clr_event(tr, token, 1);
   3701			if (ret)
   3702				pr_warn("Failed to enable trace event: %s\n", token);
   3703		}
   3704
   3705		/* Put back the comma to allow this to be called again */
   3706		if (buf)
   3707			*(buf - 1) = ',';
   3708	}
   3709}
   3710
   3711static __init int event_trace_enable(void)
   3712{
   3713	struct trace_array *tr = top_trace_array();
   3714	struct trace_event_call **iter, *call;
   3715	int ret;
   3716
   3717	if (!tr)
   3718		return -ENODEV;
   3719
   3720	for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
   3721
   3722		call = *iter;
   3723		ret = event_init(call);
   3724		if (!ret)
   3725			list_add(&call->list, &ftrace_events);
   3726	}
   3727
   3728	/*
   3729	 * We need the top trace array to have a working set of trace
   3730	 * points at early init, before the debug files and directories
   3731	 * are created. Create the file entries now, and attach them
   3732	 * to the actual file dentries later.
   3733	 */
   3734	__trace_early_add_events(tr);
   3735
   3736	early_enable_events(tr, false);
   3737
   3738	trace_printk_start_comm();
   3739
   3740	register_event_cmds();
   3741
   3742	register_trigger_cmds();
   3743
   3744	return 0;
   3745}
   3746
   3747/*
   3748 * event_trace_enable() is called from trace_event_init() first to
   3749 * initialize events and perhaps start any events that are on the
   3750 * command line. Unfortunately, there are some events that will not
   3751 * start this early, like the system call tracepoints that need
   3752 * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
   3753 * event_trace_enable() is called before pid 1 starts, and this flag
   3754 * is never set, making the syscall tracepoint never get reached, but
   3755 * the event is enabled regardless (and not doing anything).
   3756 */
   3757static __init int event_trace_enable_again(void)
   3758{
   3759	struct trace_array *tr;
   3760
   3761	tr = top_trace_array();
   3762	if (!tr)
   3763		return -ENODEV;
   3764
   3765	early_enable_events(tr, true);
   3766
   3767	return 0;
   3768}
   3769
   3770early_initcall(event_trace_enable_again);
   3771
   3772/* Init fields which doesn't related to the tracefs */
   3773static __init int event_trace_init_fields(void)
   3774{
   3775	if (trace_define_generic_fields())
   3776		pr_warn("tracing: Failed to allocated generic fields");
   3777
   3778	if (trace_define_common_fields())
   3779		pr_warn("tracing: Failed to allocate common fields");
   3780
   3781	return 0;
   3782}
   3783
   3784__init int event_trace_init(void)
   3785{
   3786	struct trace_array *tr;
   3787	int ret;
   3788
   3789	tr = top_trace_array();
   3790	if (!tr)
   3791		return -ENODEV;
   3792
   3793	trace_create_file("available_events", TRACE_MODE_READ,
   3794			  NULL, tr, &ftrace_avail_fops);
   3795
   3796	ret = early_event_add_tracer(NULL, tr);
   3797	if (ret)
   3798		return ret;
   3799
   3800#ifdef CONFIG_MODULES
   3801	ret = register_module_notifier(&trace_module_nb);
   3802	if (ret)
   3803		pr_warn("Failed to register trace events module notifier\n");
   3804#endif
   3805
   3806	eventdir_initialized = true;
   3807
   3808	return 0;
   3809}
   3810
   3811void __init trace_event_init(void)
   3812{
   3813	event_trace_memsetup();
   3814	init_ftrace_syscalls();
   3815	event_trace_enable();
   3816	event_trace_init_fields();
   3817}
   3818
   3819#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
   3820
   3821static DEFINE_SPINLOCK(test_spinlock);
   3822static DEFINE_SPINLOCK(test_spinlock_irq);
   3823static DEFINE_MUTEX(test_mutex);
   3824
   3825static __init void test_work(struct work_struct *dummy)
   3826{
   3827	spin_lock(&test_spinlock);
   3828	spin_lock_irq(&test_spinlock_irq);
   3829	udelay(1);
   3830	spin_unlock_irq(&test_spinlock_irq);
   3831	spin_unlock(&test_spinlock);
   3832
   3833	mutex_lock(&test_mutex);
   3834	msleep(1);
   3835	mutex_unlock(&test_mutex);
   3836}
   3837
   3838static __init int event_test_thread(void *unused)
   3839{
   3840	void *test_malloc;
   3841
   3842	test_malloc = kmalloc(1234, GFP_KERNEL);
   3843	if (!test_malloc)
   3844		pr_info("failed to kmalloc\n");
   3845
   3846	schedule_on_each_cpu(test_work);
   3847
   3848	kfree(test_malloc);
   3849
   3850	set_current_state(TASK_INTERRUPTIBLE);
   3851	while (!kthread_should_stop()) {
   3852		schedule();
   3853		set_current_state(TASK_INTERRUPTIBLE);
   3854	}
   3855	__set_current_state(TASK_RUNNING);
   3856
   3857	return 0;
   3858}
   3859
   3860/*
   3861 * Do various things that may trigger events.
   3862 */
   3863static __init void event_test_stuff(void)
   3864{
   3865	struct task_struct *test_thread;
   3866
   3867	test_thread = kthread_run(event_test_thread, NULL, "test-events");
   3868	msleep(1);
   3869	kthread_stop(test_thread);
   3870}
   3871
   3872/*
   3873 * For every trace event defined, we will test each trace point separately,
   3874 * and then by groups, and finally all trace points.
   3875 */
   3876static __init void event_trace_self_tests(void)
   3877{
   3878	struct trace_subsystem_dir *dir;
   3879	struct trace_event_file *file;
   3880	struct trace_event_call *call;
   3881	struct event_subsystem *system;
   3882	struct trace_array *tr;
   3883	int ret;
   3884
   3885	tr = top_trace_array();
   3886	if (!tr)
   3887		return;
   3888
   3889	pr_info("Running tests on trace events:\n");
   3890
   3891	list_for_each_entry(file, &tr->events, list) {
   3892
   3893		call = file->event_call;
   3894
   3895		/* Only test those that have a probe */
   3896		if (!call->class || !call->class->probe)
   3897			continue;
   3898
   3899/*
   3900 * Testing syscall events here is pretty useless, but
   3901 * we still do it if configured. But this is time consuming.
   3902 * What we really need is a user thread to perform the
   3903 * syscalls as we test.
   3904 */
   3905#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
   3906		if (call->class->system &&
   3907		    strcmp(call->class->system, "syscalls") == 0)
   3908			continue;
   3909#endif
   3910
   3911		pr_info("Testing event %s: ", trace_event_name(call));
   3912
   3913		/*
   3914		 * If an event is already enabled, someone is using
   3915		 * it and the self test should not be on.
   3916		 */
   3917		if (file->flags & EVENT_FILE_FL_ENABLED) {
   3918			pr_warn("Enabled event during self test!\n");
   3919			WARN_ON_ONCE(1);
   3920			continue;
   3921		}
   3922
   3923		ftrace_event_enable_disable(file, 1);
   3924		event_test_stuff();
   3925		ftrace_event_enable_disable(file, 0);
   3926
   3927		pr_cont("OK\n");
   3928	}
   3929
   3930	/* Now test at the sub system level */
   3931
   3932	pr_info("Running tests on trace event systems:\n");
   3933
   3934	list_for_each_entry(dir, &tr->systems, list) {
   3935
   3936		system = dir->subsystem;
   3937
   3938		/* the ftrace system is special, skip it */
   3939		if (strcmp(system->name, "ftrace") == 0)
   3940			continue;
   3941
   3942		pr_info("Testing event system %s: ", system->name);
   3943
   3944		ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
   3945		if (WARN_ON_ONCE(ret)) {
   3946			pr_warn("error enabling system %s\n",
   3947				system->name);
   3948			continue;
   3949		}
   3950
   3951		event_test_stuff();
   3952
   3953		ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
   3954		if (WARN_ON_ONCE(ret)) {
   3955			pr_warn("error disabling system %s\n",
   3956				system->name);
   3957			continue;
   3958		}
   3959
   3960		pr_cont("OK\n");
   3961	}
   3962
   3963	/* Test with all events enabled */
   3964
   3965	pr_info("Running tests on all trace events:\n");
   3966	pr_info("Testing all events: ");
   3967
   3968	ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
   3969	if (WARN_ON_ONCE(ret)) {
   3970		pr_warn("error enabling all events\n");
   3971		return;
   3972	}
   3973
   3974	event_test_stuff();
   3975
   3976	/* reset sysname */
   3977	ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
   3978	if (WARN_ON_ONCE(ret)) {
   3979		pr_warn("error disabling all events\n");
   3980		return;
   3981	}
   3982
   3983	pr_cont("OK\n");
   3984}
   3985
   3986#ifdef CONFIG_FUNCTION_TRACER
   3987
   3988static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
   3989
   3990static struct trace_event_file event_trace_file __initdata;
   3991
   3992static void __init
   3993function_test_events_call(unsigned long ip, unsigned long parent_ip,
   3994			  struct ftrace_ops *op, struct ftrace_regs *regs)
   3995{
   3996	struct trace_buffer *buffer;
   3997	struct ring_buffer_event *event;
   3998	struct ftrace_entry *entry;
   3999	unsigned int trace_ctx;
   4000	long disabled;
   4001	int cpu;
   4002
   4003	trace_ctx = tracing_gen_ctx();
   4004	preempt_disable_notrace();
   4005	cpu = raw_smp_processor_id();
   4006	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
   4007
   4008	if (disabled != 1)
   4009		goto out;
   4010
   4011	event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
   4012						TRACE_FN, sizeof(*entry),
   4013						trace_ctx);
   4014	if (!event)
   4015		goto out;
   4016	entry	= ring_buffer_event_data(event);
   4017	entry->ip			= ip;
   4018	entry->parent_ip		= parent_ip;
   4019
   4020	event_trigger_unlock_commit(&event_trace_file, buffer, event,
   4021				    entry, trace_ctx);
   4022 out:
   4023	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
   4024	preempt_enable_notrace();
   4025}
   4026
   4027static struct ftrace_ops trace_ops __initdata  =
   4028{
   4029	.func = function_test_events_call,
   4030};
   4031
   4032static __init void event_trace_self_test_with_function(void)
   4033{
   4034	int ret;
   4035
   4036	event_trace_file.tr = top_trace_array();
   4037	if (WARN_ON(!event_trace_file.tr))
   4038		return;
   4039
   4040	ret = register_ftrace_function(&trace_ops);
   4041	if (WARN_ON(ret < 0)) {
   4042		pr_info("Failed to enable function tracer for event tests\n");
   4043		return;
   4044	}
   4045	pr_info("Running tests again, along with the function tracer\n");
   4046	event_trace_self_tests();
   4047	unregister_ftrace_function(&trace_ops);
   4048}
   4049#else
   4050static __init void event_trace_self_test_with_function(void)
   4051{
   4052}
   4053#endif
   4054
   4055static __init int event_trace_self_tests_init(void)
   4056{
   4057	if (!tracing_selftest_disabled) {
   4058		event_trace_self_tests();
   4059		event_trace_self_test_with_function();
   4060	}
   4061
   4062	return 0;
   4063}
   4064
   4065late_initcall(event_trace_self_tests_init);
   4066
   4067#endif