cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

trace.c (255357B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * ring buffer based function tracer
      4 *
      5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
      6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
      7 *
      8 * Originally taken from the RT patch by:
      9 *    Arnaldo Carvalho de Melo <acme@redhat.com>
     10 *
     11 * Based on code from the latency_tracer, that is:
     12 *  Copyright (C) 2004-2006 Ingo Molnar
     13 *  Copyright (C) 2004 Nadia Yvette Chambers
     14 */
     15#include <linux/ring_buffer.h>
     16#include <generated/utsrelease.h>
     17#include <linux/stacktrace.h>
     18#include <linux/writeback.h>
     19#include <linux/kallsyms.h>
     20#include <linux/security.h>
     21#include <linux/seq_file.h>
     22#include <linux/notifier.h>
     23#include <linux/irqflags.h>
     24#include <linux/debugfs.h>
     25#include <linux/tracefs.h>
     26#include <linux/pagemap.h>
     27#include <linux/hardirq.h>
     28#include <linux/linkage.h>
     29#include <linux/uaccess.h>
     30#include <linux/vmalloc.h>
     31#include <linux/ftrace.h>
     32#include <linux/module.h>
     33#include <linux/percpu.h>
     34#include <linux/splice.h>
     35#include <linux/kdebug.h>
     36#include <linux/string.h>
     37#include <linux/mount.h>
     38#include <linux/rwsem.h>
     39#include <linux/slab.h>
     40#include <linux/ctype.h>
     41#include <linux/init.h>
     42#include <linux/panic_notifier.h>
     43#include <linux/poll.h>
     44#include <linux/nmi.h>
     45#include <linux/fs.h>
     46#include <linux/trace.h>
     47#include <linux/sched/clock.h>
     48#include <linux/sched/rt.h>
     49#include <linux/fsnotify.h>
     50#include <linux/irq_work.h>
     51#include <linux/workqueue.h>
     52
     53#include "trace.h"
     54#include "trace_output.h"
     55
     56/*
     57 * On boot up, the ring buffer is set to the minimum size, so that
     58 * we do not waste memory on systems that are not using tracing.
     59 */
     60bool ring_buffer_expanded;
     61
     62/*
     63 * We need to change this state when a selftest is running.
     64 * A selftest will lurk into the ring-buffer to count the
     65 * entries inserted during the selftest although some concurrent
     66 * insertions into the ring-buffer such as trace_printk could occurred
     67 * at the same time, giving false positive or negative results.
     68 */
     69static bool __read_mostly tracing_selftest_running;
     70
     71/*
     72 * If boot-time tracing including tracers/events via kernel cmdline
     73 * is running, we do not want to run SELFTEST.
     74 */
     75bool __read_mostly tracing_selftest_disabled;
     76
     77#ifdef CONFIG_FTRACE_STARTUP_TEST
     78void __init disable_tracing_selftest(const char *reason)
     79{
     80	if (!tracing_selftest_disabled) {
     81		tracing_selftest_disabled = true;
     82		pr_info("Ftrace startup test is disabled due to %s\n", reason);
     83	}
     84}
     85#endif
     86
     87/* Pipe tracepoints to printk */
     88struct trace_iterator *tracepoint_print_iter;
     89int tracepoint_printk;
     90static bool tracepoint_printk_stop_on_boot __initdata;
     91static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
     92
     93/* For tracers that don't implement custom flags */
     94static struct tracer_opt dummy_tracer_opt[] = {
     95	{ }
     96};
     97
     98static int
     99dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
    100{
    101	return 0;
    102}
    103
    104/*
    105 * To prevent the comm cache from being overwritten when no
    106 * tracing is active, only save the comm when a trace event
    107 * occurred.
    108 */
    109static DEFINE_PER_CPU(bool, trace_taskinfo_save);
    110
    111/*
    112 * Kill all tracing for good (never come back).
    113 * It is initialized to 1 but will turn to zero if the initialization
    114 * of the tracer is successful. But that is the only place that sets
    115 * this back to zero.
    116 */
    117static int tracing_disabled = 1;
    118
    119cpumask_var_t __read_mostly	tracing_buffer_mask;
    120
    121/*
    122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
    123 *
    124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
    125 * is set, then ftrace_dump is called. This will output the contents
    126 * of the ftrace buffers to the console.  This is very useful for
    127 * capturing traces that lead to crashes and outputing it to a
    128 * serial console.
    129 *
    130 * It is default off, but you can enable it with either specifying
    131 * "ftrace_dump_on_oops" in the kernel command line, or setting
    132 * /proc/sys/kernel/ftrace_dump_on_oops
    133 * Set 1 if you want to dump buffers of all CPUs
    134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
    135 */
    136
    137enum ftrace_dump_mode ftrace_dump_on_oops;
    138
    139/* When set, tracing will stop when a WARN*() is hit */
    140int __disable_trace_on_warning;
    141
    142#ifdef CONFIG_TRACE_EVAL_MAP_FILE
    143/* Map of enums to their values, for "eval_map" file */
    144struct trace_eval_map_head {
    145	struct module			*mod;
    146	unsigned long			length;
    147};
    148
    149union trace_eval_map_item;
    150
    151struct trace_eval_map_tail {
    152	/*
    153	 * "end" is first and points to NULL as it must be different
    154	 * than "mod" or "eval_string"
    155	 */
    156	union trace_eval_map_item	*next;
    157	const char			*end;	/* points to NULL */
    158};
    159
    160static DEFINE_MUTEX(trace_eval_mutex);
    161
    162/*
    163 * The trace_eval_maps are saved in an array with two extra elements,
    164 * one at the beginning, and one at the end. The beginning item contains
    165 * the count of the saved maps (head.length), and the module they
    166 * belong to if not built in (head.mod). The ending item contains a
    167 * pointer to the next array of saved eval_map items.
    168 */
    169union trace_eval_map_item {
    170	struct trace_eval_map		map;
    171	struct trace_eval_map_head	head;
    172	struct trace_eval_map_tail	tail;
    173};
    174
    175static union trace_eval_map_item *trace_eval_maps;
    176#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
    177
    178int tracing_set_tracer(struct trace_array *tr, const char *buf);
    179static void ftrace_trace_userstack(struct trace_array *tr,
    180				   struct trace_buffer *buffer,
    181				   unsigned int trace_ctx);
    182
    183#define MAX_TRACER_SIZE		100
    184static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
    185static char *default_bootup_tracer;
    186
    187static bool allocate_snapshot;
    188static bool snapshot_at_boot;
    189
    190static int __init set_cmdline_ftrace(char *str)
    191{
    192	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
    193	default_bootup_tracer = bootup_tracer_buf;
    194	/* We are using ftrace early, expand it */
    195	ring_buffer_expanded = true;
    196	return 1;
    197}
    198__setup("ftrace=", set_cmdline_ftrace);
    199
    200static int __init set_ftrace_dump_on_oops(char *str)
    201{
    202	if (*str++ != '=' || !*str || !strcmp("1", str)) {
    203		ftrace_dump_on_oops = DUMP_ALL;
    204		return 1;
    205	}
    206
    207	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
    208		ftrace_dump_on_oops = DUMP_ORIG;
    209                return 1;
    210        }
    211
    212        return 0;
    213}
    214__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
    215
    216static int __init stop_trace_on_warning(char *str)
    217{
    218	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
    219		__disable_trace_on_warning = 1;
    220	return 1;
    221}
    222__setup("traceoff_on_warning", stop_trace_on_warning);
    223
    224static int __init boot_alloc_snapshot(char *str)
    225{
    226	allocate_snapshot = true;
    227	/* We also need the main ring buffer expanded */
    228	ring_buffer_expanded = true;
    229	return 1;
    230}
    231__setup("alloc_snapshot", boot_alloc_snapshot);
    232
    233
    234static int __init boot_snapshot(char *str)
    235{
    236	snapshot_at_boot = true;
    237	boot_alloc_snapshot(str);
    238	return 1;
    239}
    240__setup("ftrace_boot_snapshot", boot_snapshot);
    241
    242
    243static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
    244
    245static int __init set_trace_boot_options(char *str)
    246{
    247	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
    248	return 1;
    249}
    250__setup("trace_options=", set_trace_boot_options);
    251
    252static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
    253static char *trace_boot_clock __initdata;
    254
    255static int __init set_trace_boot_clock(char *str)
    256{
    257	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
    258	trace_boot_clock = trace_boot_clock_buf;
    259	return 1;
    260}
    261__setup("trace_clock=", set_trace_boot_clock);
    262
    263static int __init set_tracepoint_printk(char *str)
    264{
    265	/* Ignore the "tp_printk_stop_on_boot" param */
    266	if (*str == '_')
    267		return 0;
    268
    269	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
    270		tracepoint_printk = 1;
    271	return 1;
    272}
    273__setup("tp_printk", set_tracepoint_printk);
    274
    275static int __init set_tracepoint_printk_stop(char *str)
    276{
    277	tracepoint_printk_stop_on_boot = true;
    278	return 1;
    279}
    280__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
    281
    282unsigned long long ns2usecs(u64 nsec)
    283{
    284	nsec += 500;
    285	do_div(nsec, 1000);
    286	return nsec;
    287}
    288
    289static void
    290trace_process_export(struct trace_export *export,
    291	       struct ring_buffer_event *event, int flag)
    292{
    293	struct trace_entry *entry;
    294	unsigned int size = 0;
    295
    296	if (export->flags & flag) {
    297		entry = ring_buffer_event_data(event);
    298		size = ring_buffer_event_length(event);
    299		export->write(export, entry, size);
    300	}
    301}
    302
    303static DEFINE_MUTEX(ftrace_export_lock);
    304
    305static struct trace_export __rcu *ftrace_exports_list __read_mostly;
    306
    307static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
    308static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
    309static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
    310
    311static inline void ftrace_exports_enable(struct trace_export *export)
    312{
    313	if (export->flags & TRACE_EXPORT_FUNCTION)
    314		static_branch_inc(&trace_function_exports_enabled);
    315
    316	if (export->flags & TRACE_EXPORT_EVENT)
    317		static_branch_inc(&trace_event_exports_enabled);
    318
    319	if (export->flags & TRACE_EXPORT_MARKER)
    320		static_branch_inc(&trace_marker_exports_enabled);
    321}
    322
    323static inline void ftrace_exports_disable(struct trace_export *export)
    324{
    325	if (export->flags & TRACE_EXPORT_FUNCTION)
    326		static_branch_dec(&trace_function_exports_enabled);
    327
    328	if (export->flags & TRACE_EXPORT_EVENT)
    329		static_branch_dec(&trace_event_exports_enabled);
    330
    331	if (export->flags & TRACE_EXPORT_MARKER)
    332		static_branch_dec(&trace_marker_exports_enabled);
    333}
    334
    335static void ftrace_exports(struct ring_buffer_event *event, int flag)
    336{
    337	struct trace_export *export;
    338
    339	preempt_disable_notrace();
    340
    341	export = rcu_dereference_raw_check(ftrace_exports_list);
    342	while (export) {
    343		trace_process_export(export, event, flag);
    344		export = rcu_dereference_raw_check(export->next);
    345	}
    346
    347	preempt_enable_notrace();
    348}
    349
    350static inline void
    351add_trace_export(struct trace_export **list, struct trace_export *export)
    352{
    353	rcu_assign_pointer(export->next, *list);
    354	/*
    355	 * We are entering export into the list but another
    356	 * CPU might be walking that list. We need to make sure
    357	 * the export->next pointer is valid before another CPU sees
    358	 * the export pointer included into the list.
    359	 */
    360	rcu_assign_pointer(*list, export);
    361}
    362
    363static inline int
    364rm_trace_export(struct trace_export **list, struct trace_export *export)
    365{
    366	struct trace_export **p;
    367
    368	for (p = list; *p != NULL; p = &(*p)->next)
    369		if (*p == export)
    370			break;
    371
    372	if (*p != export)
    373		return -1;
    374
    375	rcu_assign_pointer(*p, (*p)->next);
    376
    377	return 0;
    378}
    379
    380static inline void
    381add_ftrace_export(struct trace_export **list, struct trace_export *export)
    382{
    383	ftrace_exports_enable(export);
    384
    385	add_trace_export(list, export);
    386}
    387
    388static inline int
    389rm_ftrace_export(struct trace_export **list, struct trace_export *export)
    390{
    391	int ret;
    392
    393	ret = rm_trace_export(list, export);
    394	ftrace_exports_disable(export);
    395
    396	return ret;
    397}
    398
    399int register_ftrace_export(struct trace_export *export)
    400{
    401	if (WARN_ON_ONCE(!export->write))
    402		return -1;
    403
    404	mutex_lock(&ftrace_export_lock);
    405
    406	add_ftrace_export(&ftrace_exports_list, export);
    407
    408	mutex_unlock(&ftrace_export_lock);
    409
    410	return 0;
    411}
    412EXPORT_SYMBOL_GPL(register_ftrace_export);
    413
    414int unregister_ftrace_export(struct trace_export *export)
    415{
    416	int ret;
    417
    418	mutex_lock(&ftrace_export_lock);
    419
    420	ret = rm_ftrace_export(&ftrace_exports_list, export);
    421
    422	mutex_unlock(&ftrace_export_lock);
    423
    424	return ret;
    425}
    426EXPORT_SYMBOL_GPL(unregister_ftrace_export);
    427
    428/* trace_flags holds trace_options default values */
    429#define TRACE_DEFAULT_FLAGS						\
    430	(FUNCTION_DEFAULT_FLAGS |					\
    431	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
    432	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
    433	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
    434	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
    435	 TRACE_ITER_HASH_PTR)
    436
    437/* trace_options that are only supported by global_trace */
    438#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
    439	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
    440
    441/* trace_flags that are default zero for instances */
    442#define ZEROED_TRACE_FLAGS \
    443	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
    444
    445/*
    446 * The global_trace is the descriptor that holds the top-level tracing
    447 * buffers for the live tracing.
    448 */
    449static struct trace_array global_trace = {
    450	.trace_flags = TRACE_DEFAULT_FLAGS,
    451};
    452
    453LIST_HEAD(ftrace_trace_arrays);
    454
    455int trace_array_get(struct trace_array *this_tr)
    456{
    457	struct trace_array *tr;
    458	int ret = -ENODEV;
    459
    460	mutex_lock(&trace_types_lock);
    461	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
    462		if (tr == this_tr) {
    463			tr->ref++;
    464			ret = 0;
    465			break;
    466		}
    467	}
    468	mutex_unlock(&trace_types_lock);
    469
    470	return ret;
    471}
    472
    473static void __trace_array_put(struct trace_array *this_tr)
    474{
    475	WARN_ON(!this_tr->ref);
    476	this_tr->ref--;
    477}
    478
    479/**
    480 * trace_array_put - Decrement the reference counter for this trace array.
    481 * @this_tr : pointer to the trace array
    482 *
    483 * NOTE: Use this when we no longer need the trace array returned by
    484 * trace_array_get_by_name(). This ensures the trace array can be later
    485 * destroyed.
    486 *
    487 */
    488void trace_array_put(struct trace_array *this_tr)
    489{
    490	if (!this_tr)
    491		return;
    492
    493	mutex_lock(&trace_types_lock);
    494	__trace_array_put(this_tr);
    495	mutex_unlock(&trace_types_lock);
    496}
    497EXPORT_SYMBOL_GPL(trace_array_put);
    498
    499int tracing_check_open_get_tr(struct trace_array *tr)
    500{
    501	int ret;
    502
    503	ret = security_locked_down(LOCKDOWN_TRACEFS);
    504	if (ret)
    505		return ret;
    506
    507	if (tracing_disabled)
    508		return -ENODEV;
    509
    510	if (tr && trace_array_get(tr) < 0)
    511		return -ENODEV;
    512
    513	return 0;
    514}
    515
    516int call_filter_check_discard(struct trace_event_call *call, void *rec,
    517			      struct trace_buffer *buffer,
    518			      struct ring_buffer_event *event)
    519{
    520	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
    521	    !filter_match_preds(call->filter, rec)) {
    522		__trace_event_discard_commit(buffer, event);
    523		return 1;
    524	}
    525
    526	return 0;
    527}
    528
    529/**
    530 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
    531 * @filtered_pids: The list of pids to check
    532 * @search_pid: The PID to find in @filtered_pids
    533 *
    534 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
    535 */
    536bool
    537trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
    538{
    539	return trace_pid_list_is_set(filtered_pids, search_pid);
    540}
    541
    542/**
    543 * trace_ignore_this_task - should a task be ignored for tracing
    544 * @filtered_pids: The list of pids to check
    545 * @filtered_no_pids: The list of pids not to be traced
    546 * @task: The task that should be ignored if not filtered
    547 *
    548 * Checks if @task should be traced or not from @filtered_pids.
    549 * Returns true if @task should *NOT* be traced.
    550 * Returns false if @task should be traced.
    551 */
    552bool
    553trace_ignore_this_task(struct trace_pid_list *filtered_pids,
    554		       struct trace_pid_list *filtered_no_pids,
    555		       struct task_struct *task)
    556{
    557	/*
    558	 * If filtered_no_pids is not empty, and the task's pid is listed
    559	 * in filtered_no_pids, then return true.
    560	 * Otherwise, if filtered_pids is empty, that means we can
    561	 * trace all tasks. If it has content, then only trace pids
    562	 * within filtered_pids.
    563	 */
    564
    565	return (filtered_pids &&
    566		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
    567		(filtered_no_pids &&
    568		 trace_find_filtered_pid(filtered_no_pids, task->pid));
    569}
    570
    571/**
    572 * trace_filter_add_remove_task - Add or remove a task from a pid_list
    573 * @pid_list: The list to modify
    574 * @self: The current task for fork or NULL for exit
    575 * @task: The task to add or remove
    576 *
    577 * If adding a task, if @self is defined, the task is only added if @self
    578 * is also included in @pid_list. This happens on fork and tasks should
    579 * only be added when the parent is listed. If @self is NULL, then the
    580 * @task pid will be removed from the list, which would happen on exit
    581 * of a task.
    582 */
    583void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
    584				  struct task_struct *self,
    585				  struct task_struct *task)
    586{
    587	if (!pid_list)
    588		return;
    589
    590	/* For forks, we only add if the forking task is listed */
    591	if (self) {
    592		if (!trace_find_filtered_pid(pid_list, self->pid))
    593			return;
    594	}
    595
    596	/* "self" is set for forks, and NULL for exits */
    597	if (self)
    598		trace_pid_list_set(pid_list, task->pid);
    599	else
    600		trace_pid_list_clear(pid_list, task->pid);
    601}
    602
    603/**
    604 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
    605 * @pid_list: The pid list to show
    606 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
    607 * @pos: The position of the file
    608 *
    609 * This is used by the seq_file "next" operation to iterate the pids
    610 * listed in a trace_pid_list structure.
    611 *
    612 * Returns the pid+1 as we want to display pid of zero, but NULL would
    613 * stop the iteration.
    614 */
    615void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
    616{
    617	long pid = (unsigned long)v;
    618	unsigned int next;
    619
    620	(*pos)++;
    621
    622	/* pid already is +1 of the actual previous bit */
    623	if (trace_pid_list_next(pid_list, pid, &next) < 0)
    624		return NULL;
    625
    626	pid = next;
    627
    628	/* Return pid + 1 to allow zero to be represented */
    629	return (void *)(pid + 1);
    630}
    631
    632/**
    633 * trace_pid_start - Used for seq_file to start reading pid lists
    634 * @pid_list: The pid list to show
    635 * @pos: The position of the file
    636 *
    637 * This is used by seq_file "start" operation to start the iteration
    638 * of listing pids.
    639 *
    640 * Returns the pid+1 as we want to display pid of zero, but NULL would
    641 * stop the iteration.
    642 */
    643void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
    644{
    645	unsigned long pid;
    646	unsigned int first;
    647	loff_t l = 0;
    648
    649	if (trace_pid_list_first(pid_list, &first) < 0)
    650		return NULL;
    651
    652	pid = first;
    653
    654	/* Return pid + 1 so that zero can be the exit value */
    655	for (pid++; pid && l < *pos;
    656	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
    657		;
    658	return (void *)pid;
    659}
    660
    661/**
    662 * trace_pid_show - show the current pid in seq_file processing
    663 * @m: The seq_file structure to write into
    664 * @v: A void pointer of the pid (+1) value to display
    665 *
    666 * Can be directly used by seq_file operations to display the current
    667 * pid value.
    668 */
    669int trace_pid_show(struct seq_file *m, void *v)
    670{
    671	unsigned long pid = (unsigned long)v - 1;
    672
    673	seq_printf(m, "%lu\n", pid);
    674	return 0;
    675}
    676
    677/* 128 should be much more than enough */
    678#define PID_BUF_SIZE		127
    679
    680int trace_pid_write(struct trace_pid_list *filtered_pids,
    681		    struct trace_pid_list **new_pid_list,
    682		    const char __user *ubuf, size_t cnt)
    683{
    684	struct trace_pid_list *pid_list;
    685	struct trace_parser parser;
    686	unsigned long val;
    687	int nr_pids = 0;
    688	ssize_t read = 0;
    689	ssize_t ret;
    690	loff_t pos;
    691	pid_t pid;
    692
    693	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
    694		return -ENOMEM;
    695
    696	/*
    697	 * Always recreate a new array. The write is an all or nothing
    698	 * operation. Always create a new array when adding new pids by
    699	 * the user. If the operation fails, then the current list is
    700	 * not modified.
    701	 */
    702	pid_list = trace_pid_list_alloc();
    703	if (!pid_list) {
    704		trace_parser_put(&parser);
    705		return -ENOMEM;
    706	}
    707
    708	if (filtered_pids) {
    709		/* copy the current bits to the new max */
    710		ret = trace_pid_list_first(filtered_pids, &pid);
    711		while (!ret) {
    712			trace_pid_list_set(pid_list, pid);
    713			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
    714			nr_pids++;
    715		}
    716	}
    717
    718	ret = 0;
    719	while (cnt > 0) {
    720
    721		pos = 0;
    722
    723		ret = trace_get_user(&parser, ubuf, cnt, &pos);
    724		if (ret < 0)
    725			break;
    726
    727		read += ret;
    728		ubuf += ret;
    729		cnt -= ret;
    730
    731		if (!trace_parser_loaded(&parser))
    732			break;
    733
    734		ret = -EINVAL;
    735		if (kstrtoul(parser.buffer, 0, &val))
    736			break;
    737
    738		pid = (pid_t)val;
    739
    740		if (trace_pid_list_set(pid_list, pid) < 0) {
    741			ret = -1;
    742			break;
    743		}
    744		nr_pids++;
    745
    746		trace_parser_clear(&parser);
    747		ret = 0;
    748	}
    749	trace_parser_put(&parser);
    750
    751	if (ret < 0) {
    752		trace_pid_list_free(pid_list);
    753		return ret;
    754	}
    755
    756	if (!nr_pids) {
    757		/* Cleared the list of pids */
    758		trace_pid_list_free(pid_list);
    759		pid_list = NULL;
    760	}
    761
    762	*new_pid_list = pid_list;
    763
    764	return read;
    765}
    766
    767static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
    768{
    769	u64 ts;
    770
    771	/* Early boot up does not have a buffer yet */
    772	if (!buf->buffer)
    773		return trace_clock_local();
    774
    775	ts = ring_buffer_time_stamp(buf->buffer);
    776	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
    777
    778	return ts;
    779}
    780
    781u64 ftrace_now(int cpu)
    782{
    783	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
    784}
    785
    786/**
    787 * tracing_is_enabled - Show if global_trace has been enabled
    788 *
    789 * Shows if the global trace has been enabled or not. It uses the
    790 * mirror flag "buffer_disabled" to be used in fast paths such as for
    791 * the irqsoff tracer. But it may be inaccurate due to races. If you
    792 * need to know the accurate state, use tracing_is_on() which is a little
    793 * slower, but accurate.
    794 */
    795int tracing_is_enabled(void)
    796{
    797	/*
    798	 * For quick access (irqsoff uses this in fast path), just
    799	 * return the mirror variable of the state of the ring buffer.
    800	 * It's a little racy, but we don't really care.
    801	 */
    802	smp_rmb();
    803	return !global_trace.buffer_disabled;
    804}
    805
    806/*
    807 * trace_buf_size is the size in bytes that is allocated
    808 * for a buffer. Note, the number of bytes is always rounded
    809 * to page size.
    810 *
    811 * This number is purposely set to a low number of 16384.
    812 * If the dump on oops happens, it will be much appreciated
    813 * to not have to wait for all that output. Anyway this can be
    814 * boot time and run time configurable.
    815 */
    816#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
    817
    818static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
    819
    820/* trace_types holds a link list of available tracers. */
    821static struct tracer		*trace_types __read_mostly;
    822
    823/*
    824 * trace_types_lock is used to protect the trace_types list.
    825 */
    826DEFINE_MUTEX(trace_types_lock);
    827
    828/*
    829 * serialize the access of the ring buffer
    830 *
    831 * ring buffer serializes readers, but it is low level protection.
    832 * The validity of the events (which returns by ring_buffer_peek() ..etc)
    833 * are not protected by ring buffer.
    834 *
    835 * The content of events may become garbage if we allow other process consumes
    836 * these events concurrently:
    837 *   A) the page of the consumed events may become a normal page
    838 *      (not reader page) in ring buffer, and this page will be rewritten
    839 *      by events producer.
    840 *   B) The page of the consumed events may become a page for splice_read,
    841 *      and this page will be returned to system.
    842 *
    843 * These primitives allow multi process access to different cpu ring buffer
    844 * concurrently.
    845 *
    846 * These primitives don't distinguish read-only and read-consume access.
    847 * Multi read-only access are also serialized.
    848 */
    849
    850#ifdef CONFIG_SMP
    851static DECLARE_RWSEM(all_cpu_access_lock);
    852static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
    853
    854static inline void trace_access_lock(int cpu)
    855{
    856	if (cpu == RING_BUFFER_ALL_CPUS) {
    857		/* gain it for accessing the whole ring buffer. */
    858		down_write(&all_cpu_access_lock);
    859	} else {
    860		/* gain it for accessing a cpu ring buffer. */
    861
    862		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
    863		down_read(&all_cpu_access_lock);
    864
    865		/* Secondly block other access to this @cpu ring buffer. */
    866		mutex_lock(&per_cpu(cpu_access_lock, cpu));
    867	}
    868}
    869
    870static inline void trace_access_unlock(int cpu)
    871{
    872	if (cpu == RING_BUFFER_ALL_CPUS) {
    873		up_write(&all_cpu_access_lock);
    874	} else {
    875		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
    876		up_read(&all_cpu_access_lock);
    877	}
    878}
    879
    880static inline void trace_access_lock_init(void)
    881{
    882	int cpu;
    883
    884	for_each_possible_cpu(cpu)
    885		mutex_init(&per_cpu(cpu_access_lock, cpu));
    886}
    887
    888#else
    889
    890static DEFINE_MUTEX(access_lock);
    891
    892static inline void trace_access_lock(int cpu)
    893{
    894	(void)cpu;
    895	mutex_lock(&access_lock);
    896}
    897
    898static inline void trace_access_unlock(int cpu)
    899{
    900	(void)cpu;
    901	mutex_unlock(&access_lock);
    902}
    903
    904static inline void trace_access_lock_init(void)
    905{
    906}
    907
    908#endif
    909
    910#ifdef CONFIG_STACKTRACE
    911static void __ftrace_trace_stack(struct trace_buffer *buffer,
    912				 unsigned int trace_ctx,
    913				 int skip, struct pt_regs *regs);
    914static inline void ftrace_trace_stack(struct trace_array *tr,
    915				      struct trace_buffer *buffer,
    916				      unsigned int trace_ctx,
    917				      int skip, struct pt_regs *regs);
    918
    919#else
    920static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
    921					unsigned int trace_ctx,
    922					int skip, struct pt_regs *regs)
    923{
    924}
    925static inline void ftrace_trace_stack(struct trace_array *tr,
    926				      struct trace_buffer *buffer,
    927				      unsigned long trace_ctx,
    928				      int skip, struct pt_regs *regs)
    929{
    930}
    931
    932#endif
    933
    934static __always_inline void
    935trace_event_setup(struct ring_buffer_event *event,
    936		  int type, unsigned int trace_ctx)
    937{
    938	struct trace_entry *ent = ring_buffer_event_data(event);
    939
    940	tracing_generic_entry_update(ent, type, trace_ctx);
    941}
    942
    943static __always_inline struct ring_buffer_event *
    944__trace_buffer_lock_reserve(struct trace_buffer *buffer,
    945			  int type,
    946			  unsigned long len,
    947			  unsigned int trace_ctx)
    948{
    949	struct ring_buffer_event *event;
    950
    951	event = ring_buffer_lock_reserve(buffer, len);
    952	if (event != NULL)
    953		trace_event_setup(event, type, trace_ctx);
    954
    955	return event;
    956}
    957
    958void tracer_tracing_on(struct trace_array *tr)
    959{
    960	if (tr->array_buffer.buffer)
    961		ring_buffer_record_on(tr->array_buffer.buffer);
    962	/*
    963	 * This flag is looked at when buffers haven't been allocated
    964	 * yet, or by some tracers (like irqsoff), that just want to
    965	 * know if the ring buffer has been disabled, but it can handle
    966	 * races of where it gets disabled but we still do a record.
    967	 * As the check is in the fast path of the tracers, it is more
    968	 * important to be fast than accurate.
    969	 */
    970	tr->buffer_disabled = 0;
    971	/* Make the flag seen by readers */
    972	smp_wmb();
    973}
    974
    975/**
    976 * tracing_on - enable tracing buffers
    977 *
    978 * This function enables tracing buffers that may have been
    979 * disabled with tracing_off.
    980 */
    981void tracing_on(void)
    982{
    983	tracer_tracing_on(&global_trace);
    984}
    985EXPORT_SYMBOL_GPL(tracing_on);
    986
    987
    988static __always_inline void
    989__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
    990{
    991	__this_cpu_write(trace_taskinfo_save, true);
    992
    993	/* If this is the temp buffer, we need to commit fully */
    994	if (this_cpu_read(trace_buffered_event) == event) {
    995		/* Length is in event->array[0] */
    996		ring_buffer_write(buffer, event->array[0], &event->array[1]);
    997		/* Release the temp buffer */
    998		this_cpu_dec(trace_buffered_event_cnt);
    999		/* ring_buffer_unlock_commit() enables preemption */
   1000		preempt_enable_notrace();
   1001	} else
   1002		ring_buffer_unlock_commit(buffer, event);
   1003}
   1004
   1005/**
   1006 * __trace_puts - write a constant string into the trace buffer.
   1007 * @ip:	   The address of the caller
   1008 * @str:   The constant string to write
   1009 * @size:  The size of the string.
   1010 */
   1011int __trace_puts(unsigned long ip, const char *str, int size)
   1012{
   1013	struct ring_buffer_event *event;
   1014	struct trace_buffer *buffer;
   1015	struct print_entry *entry;
   1016	unsigned int trace_ctx;
   1017	int alloc;
   1018
   1019	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
   1020		return 0;
   1021
   1022	if (unlikely(tracing_selftest_running || tracing_disabled))
   1023		return 0;
   1024
   1025	alloc = sizeof(*entry) + size + 2; /* possible \n added */
   1026
   1027	trace_ctx = tracing_gen_ctx();
   1028	buffer = global_trace.array_buffer.buffer;
   1029	ring_buffer_nest_start(buffer);
   1030	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
   1031					    trace_ctx);
   1032	if (!event) {
   1033		size = 0;
   1034		goto out;
   1035	}
   1036
   1037	entry = ring_buffer_event_data(event);
   1038	entry->ip = ip;
   1039
   1040	memcpy(&entry->buf, str, size);
   1041
   1042	/* Add a newline if necessary */
   1043	if (entry->buf[size - 1] != '\n') {
   1044		entry->buf[size] = '\n';
   1045		entry->buf[size + 1] = '\0';
   1046	} else
   1047		entry->buf[size] = '\0';
   1048
   1049	__buffer_unlock_commit(buffer, event);
   1050	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
   1051 out:
   1052	ring_buffer_nest_end(buffer);
   1053	return size;
   1054}
   1055EXPORT_SYMBOL_GPL(__trace_puts);
   1056
   1057/**
   1058 * __trace_bputs - write the pointer to a constant string into trace buffer
   1059 * @ip:	   The address of the caller
   1060 * @str:   The constant string to write to the buffer to
   1061 */
   1062int __trace_bputs(unsigned long ip, const char *str)
   1063{
   1064	struct ring_buffer_event *event;
   1065	struct trace_buffer *buffer;
   1066	struct bputs_entry *entry;
   1067	unsigned int trace_ctx;
   1068	int size = sizeof(struct bputs_entry);
   1069	int ret = 0;
   1070
   1071	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
   1072		return 0;
   1073
   1074	if (unlikely(tracing_selftest_running || tracing_disabled))
   1075		return 0;
   1076
   1077	trace_ctx = tracing_gen_ctx();
   1078	buffer = global_trace.array_buffer.buffer;
   1079
   1080	ring_buffer_nest_start(buffer);
   1081	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
   1082					    trace_ctx);
   1083	if (!event)
   1084		goto out;
   1085
   1086	entry = ring_buffer_event_data(event);
   1087	entry->ip			= ip;
   1088	entry->str			= str;
   1089
   1090	__buffer_unlock_commit(buffer, event);
   1091	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
   1092
   1093	ret = 1;
   1094 out:
   1095	ring_buffer_nest_end(buffer);
   1096	return ret;
   1097}
   1098EXPORT_SYMBOL_GPL(__trace_bputs);
   1099
   1100#ifdef CONFIG_TRACER_SNAPSHOT
   1101static void tracing_snapshot_instance_cond(struct trace_array *tr,
   1102					   void *cond_data)
   1103{
   1104	struct tracer *tracer = tr->current_trace;
   1105	unsigned long flags;
   1106
   1107	if (in_nmi()) {
   1108		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
   1109		internal_trace_puts("*** snapshot is being ignored        ***\n");
   1110		return;
   1111	}
   1112
   1113	if (!tr->allocated_snapshot) {
   1114		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
   1115		internal_trace_puts("*** stopping trace here!   ***\n");
   1116		tracing_off();
   1117		return;
   1118	}
   1119
   1120	/* Note, snapshot can not be used when the tracer uses it */
   1121	if (tracer->use_max_tr) {
   1122		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
   1123		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
   1124		return;
   1125	}
   1126
   1127	local_irq_save(flags);
   1128	update_max_tr(tr, current, smp_processor_id(), cond_data);
   1129	local_irq_restore(flags);
   1130}
   1131
   1132void tracing_snapshot_instance(struct trace_array *tr)
   1133{
   1134	tracing_snapshot_instance_cond(tr, NULL);
   1135}
   1136
   1137/**
   1138 * tracing_snapshot - take a snapshot of the current buffer.
   1139 *
   1140 * This causes a swap between the snapshot buffer and the current live
   1141 * tracing buffer. You can use this to take snapshots of the live
   1142 * trace when some condition is triggered, but continue to trace.
   1143 *
   1144 * Note, make sure to allocate the snapshot with either
   1145 * a tracing_snapshot_alloc(), or by doing it manually
   1146 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
   1147 *
   1148 * If the snapshot buffer is not allocated, it will stop tracing.
   1149 * Basically making a permanent snapshot.
   1150 */
   1151void tracing_snapshot(void)
   1152{
   1153	struct trace_array *tr = &global_trace;
   1154
   1155	tracing_snapshot_instance(tr);
   1156}
   1157EXPORT_SYMBOL_GPL(tracing_snapshot);
   1158
   1159/**
   1160 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
   1161 * @tr:		The tracing instance to snapshot
   1162 * @cond_data:	The data to be tested conditionally, and possibly saved
   1163 *
   1164 * This is the same as tracing_snapshot() except that the snapshot is
   1165 * conditional - the snapshot will only happen if the
   1166 * cond_snapshot.update() implementation receiving the cond_data
   1167 * returns true, which means that the trace array's cond_snapshot
   1168 * update() operation used the cond_data to determine whether the
   1169 * snapshot should be taken, and if it was, presumably saved it along
   1170 * with the snapshot.
   1171 */
   1172void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
   1173{
   1174	tracing_snapshot_instance_cond(tr, cond_data);
   1175}
   1176EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
   1177
   1178/**
   1179 * tracing_cond_snapshot_data - get the user data associated with a snapshot
   1180 * @tr:		The tracing instance
   1181 *
   1182 * When the user enables a conditional snapshot using
   1183 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
   1184 * with the snapshot.  This accessor is used to retrieve it.
   1185 *
   1186 * Should not be called from cond_snapshot.update(), since it takes
   1187 * the tr->max_lock lock, which the code calling
   1188 * cond_snapshot.update() has already done.
   1189 *
   1190 * Returns the cond_data associated with the trace array's snapshot.
   1191 */
   1192void *tracing_cond_snapshot_data(struct trace_array *tr)
   1193{
   1194	void *cond_data = NULL;
   1195
   1196	arch_spin_lock(&tr->max_lock);
   1197
   1198	if (tr->cond_snapshot)
   1199		cond_data = tr->cond_snapshot->cond_data;
   1200
   1201	arch_spin_unlock(&tr->max_lock);
   1202
   1203	return cond_data;
   1204}
   1205EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
   1206
   1207static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
   1208					struct array_buffer *size_buf, int cpu_id);
   1209static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
   1210
   1211int tracing_alloc_snapshot_instance(struct trace_array *tr)
   1212{
   1213	int ret;
   1214
   1215	if (!tr->allocated_snapshot) {
   1216
   1217		/* allocate spare buffer */
   1218		ret = resize_buffer_duplicate_size(&tr->max_buffer,
   1219				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
   1220		if (ret < 0)
   1221			return ret;
   1222
   1223		tr->allocated_snapshot = true;
   1224	}
   1225
   1226	return 0;
   1227}
   1228
   1229static void free_snapshot(struct trace_array *tr)
   1230{
   1231	/*
   1232	 * We don't free the ring buffer. instead, resize it because
   1233	 * The max_tr ring buffer has some state (e.g. ring->clock) and
   1234	 * we want preserve it.
   1235	 */
   1236	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
   1237	set_buffer_entries(&tr->max_buffer, 1);
   1238	tracing_reset_online_cpus(&tr->max_buffer);
   1239	tr->allocated_snapshot = false;
   1240}
   1241
   1242/**
   1243 * tracing_alloc_snapshot - allocate snapshot buffer.
   1244 *
   1245 * This only allocates the snapshot buffer if it isn't already
   1246 * allocated - it doesn't also take a snapshot.
   1247 *
   1248 * This is meant to be used in cases where the snapshot buffer needs
   1249 * to be set up for events that can't sleep but need to be able to
   1250 * trigger a snapshot.
   1251 */
   1252int tracing_alloc_snapshot(void)
   1253{
   1254	struct trace_array *tr = &global_trace;
   1255	int ret;
   1256
   1257	ret = tracing_alloc_snapshot_instance(tr);
   1258	WARN_ON(ret < 0);
   1259
   1260	return ret;
   1261}
   1262EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
   1263
   1264/**
   1265 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
   1266 *
   1267 * This is similar to tracing_snapshot(), but it will allocate the
   1268 * snapshot buffer if it isn't already allocated. Use this only
   1269 * where it is safe to sleep, as the allocation may sleep.
   1270 *
   1271 * This causes a swap between the snapshot buffer and the current live
   1272 * tracing buffer. You can use this to take snapshots of the live
   1273 * trace when some condition is triggered, but continue to trace.
   1274 */
   1275void tracing_snapshot_alloc(void)
   1276{
   1277	int ret;
   1278
   1279	ret = tracing_alloc_snapshot();
   1280	if (ret < 0)
   1281		return;
   1282
   1283	tracing_snapshot();
   1284}
   1285EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
   1286
   1287/**
   1288 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
   1289 * @tr:		The tracing instance
   1290 * @cond_data:	User data to associate with the snapshot
   1291 * @update:	Implementation of the cond_snapshot update function
   1292 *
   1293 * Check whether the conditional snapshot for the given instance has
   1294 * already been enabled, or if the current tracer is already using a
   1295 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
   1296 * save the cond_data and update function inside.
   1297 *
   1298 * Returns 0 if successful, error otherwise.
   1299 */
   1300int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
   1301				 cond_update_fn_t update)
   1302{
   1303	struct cond_snapshot *cond_snapshot;
   1304	int ret = 0;
   1305
   1306	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
   1307	if (!cond_snapshot)
   1308		return -ENOMEM;
   1309
   1310	cond_snapshot->cond_data = cond_data;
   1311	cond_snapshot->update = update;
   1312
   1313	mutex_lock(&trace_types_lock);
   1314
   1315	ret = tracing_alloc_snapshot_instance(tr);
   1316	if (ret)
   1317		goto fail_unlock;
   1318
   1319	if (tr->current_trace->use_max_tr) {
   1320		ret = -EBUSY;
   1321		goto fail_unlock;
   1322	}
   1323
   1324	/*
   1325	 * The cond_snapshot can only change to NULL without the
   1326	 * trace_types_lock. We don't care if we race with it going
   1327	 * to NULL, but we want to make sure that it's not set to
   1328	 * something other than NULL when we get here, which we can
   1329	 * do safely with only holding the trace_types_lock and not
   1330	 * having to take the max_lock.
   1331	 */
   1332	if (tr->cond_snapshot) {
   1333		ret = -EBUSY;
   1334		goto fail_unlock;
   1335	}
   1336
   1337	arch_spin_lock(&tr->max_lock);
   1338	tr->cond_snapshot = cond_snapshot;
   1339	arch_spin_unlock(&tr->max_lock);
   1340
   1341	mutex_unlock(&trace_types_lock);
   1342
   1343	return ret;
   1344
   1345 fail_unlock:
   1346	mutex_unlock(&trace_types_lock);
   1347	kfree(cond_snapshot);
   1348	return ret;
   1349}
   1350EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
   1351
   1352/**
   1353 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
   1354 * @tr:		The tracing instance
   1355 *
   1356 * Check whether the conditional snapshot for the given instance is
   1357 * enabled; if so, free the cond_snapshot associated with it,
   1358 * otherwise return -EINVAL.
   1359 *
   1360 * Returns 0 if successful, error otherwise.
   1361 */
   1362int tracing_snapshot_cond_disable(struct trace_array *tr)
   1363{
   1364	int ret = 0;
   1365
   1366	arch_spin_lock(&tr->max_lock);
   1367
   1368	if (!tr->cond_snapshot)
   1369		ret = -EINVAL;
   1370	else {
   1371		kfree(tr->cond_snapshot);
   1372		tr->cond_snapshot = NULL;
   1373	}
   1374
   1375	arch_spin_unlock(&tr->max_lock);
   1376
   1377	return ret;
   1378}
   1379EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
   1380#else
   1381void tracing_snapshot(void)
   1382{
   1383	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
   1384}
   1385EXPORT_SYMBOL_GPL(tracing_snapshot);
   1386void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
   1387{
   1388	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
   1389}
   1390EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
   1391int tracing_alloc_snapshot(void)
   1392{
   1393	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
   1394	return -ENODEV;
   1395}
   1396EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
   1397void tracing_snapshot_alloc(void)
   1398{
   1399	/* Give warning */
   1400	tracing_snapshot();
   1401}
   1402EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
   1403void *tracing_cond_snapshot_data(struct trace_array *tr)
   1404{
   1405	return NULL;
   1406}
   1407EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
   1408int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
   1409{
   1410	return -ENODEV;
   1411}
   1412EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
   1413int tracing_snapshot_cond_disable(struct trace_array *tr)
   1414{
   1415	return false;
   1416}
   1417EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
   1418#endif /* CONFIG_TRACER_SNAPSHOT */
   1419
   1420void tracer_tracing_off(struct trace_array *tr)
   1421{
   1422	if (tr->array_buffer.buffer)
   1423		ring_buffer_record_off(tr->array_buffer.buffer);
   1424	/*
   1425	 * This flag is looked at when buffers haven't been allocated
   1426	 * yet, or by some tracers (like irqsoff), that just want to
   1427	 * know if the ring buffer has been disabled, but it can handle
   1428	 * races of where it gets disabled but we still do a record.
   1429	 * As the check is in the fast path of the tracers, it is more
   1430	 * important to be fast than accurate.
   1431	 */
   1432	tr->buffer_disabled = 1;
   1433	/* Make the flag seen by readers */
   1434	smp_wmb();
   1435}
   1436
   1437/**
   1438 * tracing_off - turn off tracing buffers
   1439 *
   1440 * This function stops the tracing buffers from recording data.
   1441 * It does not disable any overhead the tracers themselves may
   1442 * be causing. This function simply causes all recording to
   1443 * the ring buffers to fail.
   1444 */
   1445void tracing_off(void)
   1446{
   1447	tracer_tracing_off(&global_trace);
   1448}
   1449EXPORT_SYMBOL_GPL(tracing_off);
   1450
   1451void disable_trace_on_warning(void)
   1452{
   1453	if (__disable_trace_on_warning) {
   1454		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
   1455			"Disabling tracing due to warning\n");
   1456		tracing_off();
   1457	}
   1458}
   1459
   1460/**
   1461 * tracer_tracing_is_on - show real state of ring buffer enabled
   1462 * @tr : the trace array to know if ring buffer is enabled
   1463 *
   1464 * Shows real state of the ring buffer if it is enabled or not.
   1465 */
   1466bool tracer_tracing_is_on(struct trace_array *tr)
   1467{
   1468	if (tr->array_buffer.buffer)
   1469		return ring_buffer_record_is_on(tr->array_buffer.buffer);
   1470	return !tr->buffer_disabled;
   1471}
   1472
   1473/**
   1474 * tracing_is_on - show state of ring buffers enabled
   1475 */
   1476int tracing_is_on(void)
   1477{
   1478	return tracer_tracing_is_on(&global_trace);
   1479}
   1480EXPORT_SYMBOL_GPL(tracing_is_on);
   1481
   1482static int __init set_buf_size(char *str)
   1483{
   1484	unsigned long buf_size;
   1485
   1486	if (!str)
   1487		return 0;
   1488	buf_size = memparse(str, &str);
   1489	/*
   1490	 * nr_entries can not be zero and the startup
   1491	 * tests require some buffer space. Therefore
   1492	 * ensure we have at least 4096 bytes of buffer.
   1493	 */
   1494	trace_buf_size = max(4096UL, buf_size);
   1495	return 1;
   1496}
   1497__setup("trace_buf_size=", set_buf_size);
   1498
   1499static int __init set_tracing_thresh(char *str)
   1500{
   1501	unsigned long threshold;
   1502	int ret;
   1503
   1504	if (!str)
   1505		return 0;
   1506	ret = kstrtoul(str, 0, &threshold);
   1507	if (ret < 0)
   1508		return 0;
   1509	tracing_thresh = threshold * 1000;
   1510	return 1;
   1511}
   1512__setup("tracing_thresh=", set_tracing_thresh);
   1513
   1514unsigned long nsecs_to_usecs(unsigned long nsecs)
   1515{
   1516	return nsecs / 1000;
   1517}
   1518
   1519/*
   1520 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
   1521 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
   1522 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
   1523 * of strings in the order that the evals (enum) were defined.
   1524 */
   1525#undef C
   1526#define C(a, b) b
   1527
   1528/* These must match the bit positions in trace_iterator_flags */
   1529static const char *trace_options[] = {
   1530	TRACE_FLAGS
   1531	NULL
   1532};
   1533
   1534static struct {
   1535	u64 (*func)(void);
   1536	const char *name;
   1537	int in_ns;		/* is this clock in nanoseconds? */
   1538} trace_clocks[] = {
   1539	{ trace_clock_local,		"local",	1 },
   1540	{ trace_clock_global,		"global",	1 },
   1541	{ trace_clock_counter,		"counter",	0 },
   1542	{ trace_clock_jiffies,		"uptime",	0 },
   1543	{ trace_clock,			"perf",		1 },
   1544	{ ktime_get_mono_fast_ns,	"mono",		1 },
   1545	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
   1546	{ ktime_get_boot_fast_ns,	"boot",		1 },
   1547	{ ktime_get_tai_fast_ns,	"tai",		1 },
   1548	ARCH_TRACE_CLOCKS
   1549};
   1550
   1551bool trace_clock_in_ns(struct trace_array *tr)
   1552{
   1553	if (trace_clocks[tr->clock_id].in_ns)
   1554		return true;
   1555
   1556	return false;
   1557}
   1558
   1559/*
   1560 * trace_parser_get_init - gets the buffer for trace parser
   1561 */
   1562int trace_parser_get_init(struct trace_parser *parser, int size)
   1563{
   1564	memset(parser, 0, sizeof(*parser));
   1565
   1566	parser->buffer = kmalloc(size, GFP_KERNEL);
   1567	if (!parser->buffer)
   1568		return 1;
   1569
   1570	parser->size = size;
   1571	return 0;
   1572}
   1573
   1574/*
   1575 * trace_parser_put - frees the buffer for trace parser
   1576 */
   1577void trace_parser_put(struct trace_parser *parser)
   1578{
   1579	kfree(parser->buffer);
   1580	parser->buffer = NULL;
   1581}
   1582
   1583/*
   1584 * trace_get_user - reads the user input string separated by  space
   1585 * (matched by isspace(ch))
   1586 *
   1587 * For each string found the 'struct trace_parser' is updated,
   1588 * and the function returns.
   1589 *
   1590 * Returns number of bytes read.
   1591 *
   1592 * See kernel/trace/trace.h for 'struct trace_parser' details.
   1593 */
   1594int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
   1595	size_t cnt, loff_t *ppos)
   1596{
   1597	char ch;
   1598	size_t read = 0;
   1599	ssize_t ret;
   1600
   1601	if (!*ppos)
   1602		trace_parser_clear(parser);
   1603
   1604	ret = get_user(ch, ubuf++);
   1605	if (ret)
   1606		goto out;
   1607
   1608	read++;
   1609	cnt--;
   1610
   1611	/*
   1612	 * The parser is not finished with the last write,
   1613	 * continue reading the user input without skipping spaces.
   1614	 */
   1615	if (!parser->cont) {
   1616		/* skip white space */
   1617		while (cnt && isspace(ch)) {
   1618			ret = get_user(ch, ubuf++);
   1619			if (ret)
   1620				goto out;
   1621			read++;
   1622			cnt--;
   1623		}
   1624
   1625		parser->idx = 0;
   1626
   1627		/* only spaces were written */
   1628		if (isspace(ch) || !ch) {
   1629			*ppos += read;
   1630			ret = read;
   1631			goto out;
   1632		}
   1633	}
   1634
   1635	/* read the non-space input */
   1636	while (cnt && !isspace(ch) && ch) {
   1637		if (parser->idx < parser->size - 1)
   1638			parser->buffer[parser->idx++] = ch;
   1639		else {
   1640			ret = -EINVAL;
   1641			goto out;
   1642		}
   1643		ret = get_user(ch, ubuf++);
   1644		if (ret)
   1645			goto out;
   1646		read++;
   1647		cnt--;
   1648	}
   1649
   1650	/* We either got finished input or we have to wait for another call. */
   1651	if (isspace(ch) || !ch) {
   1652		parser->buffer[parser->idx] = 0;
   1653		parser->cont = false;
   1654	} else if (parser->idx < parser->size - 1) {
   1655		parser->cont = true;
   1656		parser->buffer[parser->idx++] = ch;
   1657		/* Make sure the parsed string always terminates with '\0'. */
   1658		parser->buffer[parser->idx] = 0;
   1659	} else {
   1660		ret = -EINVAL;
   1661		goto out;
   1662	}
   1663
   1664	*ppos += read;
   1665	ret = read;
   1666
   1667out:
   1668	return ret;
   1669}
   1670
   1671/* TODO add a seq_buf_to_buffer() */
   1672static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
   1673{
   1674	int len;
   1675
   1676	if (trace_seq_used(s) <= s->seq.readpos)
   1677		return -EBUSY;
   1678
   1679	len = trace_seq_used(s) - s->seq.readpos;
   1680	if (cnt > len)
   1681		cnt = len;
   1682	memcpy(buf, s->buffer + s->seq.readpos, cnt);
   1683
   1684	s->seq.readpos += cnt;
   1685	return cnt;
   1686}
   1687
   1688unsigned long __read_mostly	tracing_thresh;
   1689static const struct file_operations tracing_max_lat_fops;
   1690
   1691#ifdef LATENCY_FS_NOTIFY
   1692
   1693static struct workqueue_struct *fsnotify_wq;
   1694
   1695static void latency_fsnotify_workfn(struct work_struct *work)
   1696{
   1697	struct trace_array *tr = container_of(work, struct trace_array,
   1698					      fsnotify_work);
   1699	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
   1700}
   1701
   1702static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
   1703{
   1704	struct trace_array *tr = container_of(iwork, struct trace_array,
   1705					      fsnotify_irqwork);
   1706	queue_work(fsnotify_wq, &tr->fsnotify_work);
   1707}
   1708
   1709static void trace_create_maxlat_file(struct trace_array *tr,
   1710				     struct dentry *d_tracer)
   1711{
   1712	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
   1713	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
   1714	tr->d_max_latency = trace_create_file("tracing_max_latency",
   1715					      TRACE_MODE_WRITE,
   1716					      d_tracer, &tr->max_latency,
   1717					      &tracing_max_lat_fops);
   1718}
   1719
   1720__init static int latency_fsnotify_init(void)
   1721{
   1722	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
   1723				      WQ_UNBOUND | WQ_HIGHPRI, 0);
   1724	if (!fsnotify_wq) {
   1725		pr_err("Unable to allocate tr_max_lat_wq\n");
   1726		return -ENOMEM;
   1727	}
   1728	return 0;
   1729}
   1730
   1731late_initcall_sync(latency_fsnotify_init);
   1732
   1733void latency_fsnotify(struct trace_array *tr)
   1734{
   1735	if (!fsnotify_wq)
   1736		return;
   1737	/*
   1738	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
   1739	 * possible that we are called from __schedule() or do_idle(), which
   1740	 * could cause a deadlock.
   1741	 */
   1742	irq_work_queue(&tr->fsnotify_irqwork);
   1743}
   1744
   1745#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
   1746	|| defined(CONFIG_OSNOISE_TRACER)
   1747
   1748#define trace_create_maxlat_file(tr, d_tracer)				\
   1749	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
   1750			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
   1751
   1752#else
   1753#define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
   1754#endif
   1755
   1756#ifdef CONFIG_TRACER_MAX_TRACE
   1757/*
   1758 * Copy the new maximum trace into the separate maximum-trace
   1759 * structure. (this way the maximum trace is permanently saved,
   1760 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
   1761 */
   1762static void
   1763__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
   1764{
   1765	struct array_buffer *trace_buf = &tr->array_buffer;
   1766	struct array_buffer *max_buf = &tr->max_buffer;
   1767	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
   1768	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
   1769
   1770	max_buf->cpu = cpu;
   1771	max_buf->time_start = data->preempt_timestamp;
   1772
   1773	max_data->saved_latency = tr->max_latency;
   1774	max_data->critical_start = data->critical_start;
   1775	max_data->critical_end = data->critical_end;
   1776
   1777	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
   1778	max_data->pid = tsk->pid;
   1779	/*
   1780	 * If tsk == current, then use current_uid(), as that does not use
   1781	 * RCU. The irq tracer can be called out of RCU scope.
   1782	 */
   1783	if (tsk == current)
   1784		max_data->uid = current_uid();
   1785	else
   1786		max_data->uid = task_uid(tsk);
   1787
   1788	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
   1789	max_data->policy = tsk->policy;
   1790	max_data->rt_priority = tsk->rt_priority;
   1791
   1792	/* record this tasks comm */
   1793	tracing_record_cmdline(tsk);
   1794	latency_fsnotify(tr);
   1795}
   1796
   1797/**
   1798 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
   1799 * @tr: tracer
   1800 * @tsk: the task with the latency
   1801 * @cpu: The cpu that initiated the trace.
   1802 * @cond_data: User data associated with a conditional snapshot
   1803 *
   1804 * Flip the buffers between the @tr and the max_tr and record information
   1805 * about which task was the cause of this latency.
   1806 */
   1807void
   1808update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
   1809	      void *cond_data)
   1810{
   1811	if (tr->stop_count)
   1812		return;
   1813
   1814	WARN_ON_ONCE(!irqs_disabled());
   1815
   1816	if (!tr->allocated_snapshot) {
   1817		/* Only the nop tracer should hit this when disabling */
   1818		WARN_ON_ONCE(tr->current_trace != &nop_trace);
   1819		return;
   1820	}
   1821
   1822	arch_spin_lock(&tr->max_lock);
   1823
   1824	/* Inherit the recordable setting from array_buffer */
   1825	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
   1826		ring_buffer_record_on(tr->max_buffer.buffer);
   1827	else
   1828		ring_buffer_record_off(tr->max_buffer.buffer);
   1829
   1830#ifdef CONFIG_TRACER_SNAPSHOT
   1831	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
   1832		goto out_unlock;
   1833#endif
   1834	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
   1835
   1836	__update_max_tr(tr, tsk, cpu);
   1837
   1838 out_unlock:
   1839	arch_spin_unlock(&tr->max_lock);
   1840}
   1841
   1842/**
   1843 * update_max_tr_single - only copy one trace over, and reset the rest
   1844 * @tr: tracer
   1845 * @tsk: task with the latency
   1846 * @cpu: the cpu of the buffer to copy.
   1847 *
   1848 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
   1849 */
   1850void
   1851update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
   1852{
   1853	int ret;
   1854
   1855	if (tr->stop_count)
   1856		return;
   1857
   1858	WARN_ON_ONCE(!irqs_disabled());
   1859	if (!tr->allocated_snapshot) {
   1860		/* Only the nop tracer should hit this when disabling */
   1861		WARN_ON_ONCE(tr->current_trace != &nop_trace);
   1862		return;
   1863	}
   1864
   1865	arch_spin_lock(&tr->max_lock);
   1866
   1867	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
   1868
   1869	if (ret == -EBUSY) {
   1870		/*
   1871		 * We failed to swap the buffer due to a commit taking
   1872		 * place on this CPU. We fail to record, but we reset
   1873		 * the max trace buffer (no one writes directly to it)
   1874		 * and flag that it failed.
   1875		 */
   1876		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
   1877			"Failed to swap buffers due to commit in progress\n");
   1878	}
   1879
   1880	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
   1881
   1882	__update_max_tr(tr, tsk, cpu);
   1883	arch_spin_unlock(&tr->max_lock);
   1884}
   1885#endif /* CONFIG_TRACER_MAX_TRACE */
   1886
   1887static int wait_on_pipe(struct trace_iterator *iter, int full)
   1888{
   1889	/* Iterators are static, they should be filled or empty */
   1890	if (trace_buffer_iter(iter, iter->cpu_file))
   1891		return 0;
   1892
   1893	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
   1894				full);
   1895}
   1896
   1897#ifdef CONFIG_FTRACE_STARTUP_TEST
   1898static bool selftests_can_run;
   1899
   1900struct trace_selftests {
   1901	struct list_head		list;
   1902	struct tracer			*type;
   1903};
   1904
   1905static LIST_HEAD(postponed_selftests);
   1906
   1907static int save_selftest(struct tracer *type)
   1908{
   1909	struct trace_selftests *selftest;
   1910
   1911	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
   1912	if (!selftest)
   1913		return -ENOMEM;
   1914
   1915	selftest->type = type;
   1916	list_add(&selftest->list, &postponed_selftests);
   1917	return 0;
   1918}
   1919
   1920static int run_tracer_selftest(struct tracer *type)
   1921{
   1922	struct trace_array *tr = &global_trace;
   1923	struct tracer *saved_tracer = tr->current_trace;
   1924	int ret;
   1925
   1926	if (!type->selftest || tracing_selftest_disabled)
   1927		return 0;
   1928
   1929	/*
   1930	 * If a tracer registers early in boot up (before scheduling is
   1931	 * initialized and such), then do not run its selftests yet.
   1932	 * Instead, run it a little later in the boot process.
   1933	 */
   1934	if (!selftests_can_run)
   1935		return save_selftest(type);
   1936
   1937	if (!tracing_is_on()) {
   1938		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
   1939			type->name);
   1940		return 0;
   1941	}
   1942
   1943	/*
   1944	 * Run a selftest on this tracer.
   1945	 * Here we reset the trace buffer, and set the current
   1946	 * tracer to be this tracer. The tracer can then run some
   1947	 * internal tracing to verify that everything is in order.
   1948	 * If we fail, we do not register this tracer.
   1949	 */
   1950	tracing_reset_online_cpus(&tr->array_buffer);
   1951
   1952	tr->current_trace = type;
   1953
   1954#ifdef CONFIG_TRACER_MAX_TRACE
   1955	if (type->use_max_tr) {
   1956		/* If we expanded the buffers, make sure the max is expanded too */
   1957		if (ring_buffer_expanded)
   1958			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
   1959					   RING_BUFFER_ALL_CPUS);
   1960		tr->allocated_snapshot = true;
   1961	}
   1962#endif
   1963
   1964	/* the test is responsible for initializing and enabling */
   1965	pr_info("Testing tracer %s: ", type->name);
   1966	ret = type->selftest(type, tr);
   1967	/* the test is responsible for resetting too */
   1968	tr->current_trace = saved_tracer;
   1969	if (ret) {
   1970		printk(KERN_CONT "FAILED!\n");
   1971		/* Add the warning after printing 'FAILED' */
   1972		WARN_ON(1);
   1973		return -1;
   1974	}
   1975	/* Only reset on passing, to avoid touching corrupted buffers */
   1976	tracing_reset_online_cpus(&tr->array_buffer);
   1977
   1978#ifdef CONFIG_TRACER_MAX_TRACE
   1979	if (type->use_max_tr) {
   1980		tr->allocated_snapshot = false;
   1981
   1982		/* Shrink the max buffer again */
   1983		if (ring_buffer_expanded)
   1984			ring_buffer_resize(tr->max_buffer.buffer, 1,
   1985					   RING_BUFFER_ALL_CPUS);
   1986	}
   1987#endif
   1988
   1989	printk(KERN_CONT "PASSED\n");
   1990	return 0;
   1991}
   1992
   1993static __init int init_trace_selftests(void)
   1994{
   1995	struct trace_selftests *p, *n;
   1996	struct tracer *t, **last;
   1997	int ret;
   1998
   1999	selftests_can_run = true;
   2000
   2001	mutex_lock(&trace_types_lock);
   2002
   2003	if (list_empty(&postponed_selftests))
   2004		goto out;
   2005
   2006	pr_info("Running postponed tracer tests:\n");
   2007
   2008	tracing_selftest_running = true;
   2009	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
   2010		/* This loop can take minutes when sanitizers are enabled, so
   2011		 * lets make sure we allow RCU processing.
   2012		 */
   2013		cond_resched();
   2014		ret = run_tracer_selftest(p->type);
   2015		/* If the test fails, then warn and remove from available_tracers */
   2016		if (ret < 0) {
   2017			WARN(1, "tracer: %s failed selftest, disabling\n",
   2018			     p->type->name);
   2019			last = &trace_types;
   2020			for (t = trace_types; t; t = t->next) {
   2021				if (t == p->type) {
   2022					*last = t->next;
   2023					break;
   2024				}
   2025				last = &t->next;
   2026			}
   2027		}
   2028		list_del(&p->list);
   2029		kfree(p);
   2030	}
   2031	tracing_selftest_running = false;
   2032
   2033 out:
   2034	mutex_unlock(&trace_types_lock);
   2035
   2036	return 0;
   2037}
   2038core_initcall(init_trace_selftests);
   2039#else
   2040static inline int run_tracer_selftest(struct tracer *type)
   2041{
   2042	return 0;
   2043}
   2044#endif /* CONFIG_FTRACE_STARTUP_TEST */
   2045
   2046static void add_tracer_options(struct trace_array *tr, struct tracer *t);
   2047
   2048static void __init apply_trace_boot_options(void);
   2049
   2050/**
   2051 * register_tracer - register a tracer with the ftrace system.
   2052 * @type: the plugin for the tracer
   2053 *
   2054 * Register a new plugin tracer.
   2055 */
   2056int __init register_tracer(struct tracer *type)
   2057{
   2058	struct tracer *t;
   2059	int ret = 0;
   2060
   2061	if (!type->name) {
   2062		pr_info("Tracer must have a name\n");
   2063		return -1;
   2064	}
   2065
   2066	if (strlen(type->name) >= MAX_TRACER_SIZE) {
   2067		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
   2068		return -1;
   2069	}
   2070
   2071	if (security_locked_down(LOCKDOWN_TRACEFS)) {
   2072		pr_warn("Can not register tracer %s due to lockdown\n",
   2073			   type->name);
   2074		return -EPERM;
   2075	}
   2076
   2077	mutex_lock(&trace_types_lock);
   2078
   2079	tracing_selftest_running = true;
   2080
   2081	for (t = trace_types; t; t = t->next) {
   2082		if (strcmp(type->name, t->name) == 0) {
   2083			/* already found */
   2084			pr_info("Tracer %s already registered\n",
   2085				type->name);
   2086			ret = -1;
   2087			goto out;
   2088		}
   2089	}
   2090
   2091	if (!type->set_flag)
   2092		type->set_flag = &dummy_set_flag;
   2093	if (!type->flags) {
   2094		/*allocate a dummy tracer_flags*/
   2095		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
   2096		if (!type->flags) {
   2097			ret = -ENOMEM;
   2098			goto out;
   2099		}
   2100		type->flags->val = 0;
   2101		type->flags->opts = dummy_tracer_opt;
   2102	} else
   2103		if (!type->flags->opts)
   2104			type->flags->opts = dummy_tracer_opt;
   2105
   2106	/* store the tracer for __set_tracer_option */
   2107	type->flags->trace = type;
   2108
   2109	ret = run_tracer_selftest(type);
   2110	if (ret < 0)
   2111		goto out;
   2112
   2113	type->next = trace_types;
   2114	trace_types = type;
   2115	add_tracer_options(&global_trace, type);
   2116
   2117 out:
   2118	tracing_selftest_running = false;
   2119	mutex_unlock(&trace_types_lock);
   2120
   2121	if (ret || !default_bootup_tracer)
   2122		goto out_unlock;
   2123
   2124	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
   2125		goto out_unlock;
   2126
   2127	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
   2128	/* Do we want this tracer to start on bootup? */
   2129	tracing_set_tracer(&global_trace, type->name);
   2130	default_bootup_tracer = NULL;
   2131
   2132	apply_trace_boot_options();
   2133
   2134	/* disable other selftests, since this will break it. */
   2135	disable_tracing_selftest("running a tracer");
   2136
   2137 out_unlock:
   2138	return ret;
   2139}
   2140
   2141static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
   2142{
   2143	struct trace_buffer *buffer = buf->buffer;
   2144
   2145	if (!buffer)
   2146		return;
   2147
   2148	ring_buffer_record_disable(buffer);
   2149
   2150	/* Make sure all commits have finished */
   2151	synchronize_rcu();
   2152	ring_buffer_reset_cpu(buffer, cpu);
   2153
   2154	ring_buffer_record_enable(buffer);
   2155}
   2156
   2157void tracing_reset_online_cpus(struct array_buffer *buf)
   2158{
   2159	struct trace_buffer *buffer = buf->buffer;
   2160
   2161	if (!buffer)
   2162		return;
   2163
   2164	ring_buffer_record_disable(buffer);
   2165
   2166	/* Make sure all commits have finished */
   2167	synchronize_rcu();
   2168
   2169	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
   2170
   2171	ring_buffer_reset_online_cpus(buffer);
   2172
   2173	ring_buffer_record_enable(buffer);
   2174}
   2175
   2176/* Must have trace_types_lock held */
   2177void tracing_reset_all_online_cpus(void)
   2178{
   2179	struct trace_array *tr;
   2180
   2181	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
   2182		if (!tr->clear_trace)
   2183			continue;
   2184		tr->clear_trace = false;
   2185		tracing_reset_online_cpus(&tr->array_buffer);
   2186#ifdef CONFIG_TRACER_MAX_TRACE
   2187		tracing_reset_online_cpus(&tr->max_buffer);
   2188#endif
   2189	}
   2190}
   2191
   2192/*
   2193 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
   2194 * is the tgid last observed corresponding to pid=i.
   2195 */
   2196static int *tgid_map;
   2197
   2198/* The maximum valid index into tgid_map. */
   2199static size_t tgid_map_max;
   2200
   2201#define SAVED_CMDLINES_DEFAULT 128
   2202#define NO_CMDLINE_MAP UINT_MAX
   2203static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
   2204struct saved_cmdlines_buffer {
   2205	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
   2206	unsigned *map_cmdline_to_pid;
   2207	unsigned cmdline_num;
   2208	int cmdline_idx;
   2209	char *saved_cmdlines;
   2210};
   2211static struct saved_cmdlines_buffer *savedcmd;
   2212
   2213static inline char *get_saved_cmdlines(int idx)
   2214{
   2215	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
   2216}
   2217
   2218static inline void set_cmdline(int idx, const char *cmdline)
   2219{
   2220	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
   2221}
   2222
   2223static int allocate_cmdlines_buffer(unsigned int val,
   2224				    struct saved_cmdlines_buffer *s)
   2225{
   2226	s->map_cmdline_to_pid = kmalloc_array(val,
   2227					      sizeof(*s->map_cmdline_to_pid),
   2228					      GFP_KERNEL);
   2229	if (!s->map_cmdline_to_pid)
   2230		return -ENOMEM;
   2231
   2232	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
   2233	if (!s->saved_cmdlines) {
   2234		kfree(s->map_cmdline_to_pid);
   2235		return -ENOMEM;
   2236	}
   2237
   2238	s->cmdline_idx = 0;
   2239	s->cmdline_num = val;
   2240	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
   2241	       sizeof(s->map_pid_to_cmdline));
   2242	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
   2243	       val * sizeof(*s->map_cmdline_to_pid));
   2244
   2245	return 0;
   2246}
   2247
   2248static int trace_create_savedcmd(void)
   2249{
   2250	int ret;
   2251
   2252	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
   2253	if (!savedcmd)
   2254		return -ENOMEM;
   2255
   2256	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
   2257	if (ret < 0) {
   2258		kfree(savedcmd);
   2259		savedcmd = NULL;
   2260		return -ENOMEM;
   2261	}
   2262
   2263	return 0;
   2264}
   2265
   2266int is_tracing_stopped(void)
   2267{
   2268	return global_trace.stop_count;
   2269}
   2270
   2271/**
   2272 * tracing_start - quick start of the tracer
   2273 *
   2274 * If tracing is enabled but was stopped by tracing_stop,
   2275 * this will start the tracer back up.
   2276 */
   2277void tracing_start(void)
   2278{
   2279	struct trace_buffer *buffer;
   2280	unsigned long flags;
   2281
   2282	if (tracing_disabled)
   2283		return;
   2284
   2285	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
   2286	if (--global_trace.stop_count) {
   2287		if (global_trace.stop_count < 0) {
   2288			/* Someone screwed up their debugging */
   2289			WARN_ON_ONCE(1);
   2290			global_trace.stop_count = 0;
   2291		}
   2292		goto out;
   2293	}
   2294
   2295	/* Prevent the buffers from switching */
   2296	arch_spin_lock(&global_trace.max_lock);
   2297
   2298	buffer = global_trace.array_buffer.buffer;
   2299	if (buffer)
   2300		ring_buffer_record_enable(buffer);
   2301
   2302#ifdef CONFIG_TRACER_MAX_TRACE
   2303	buffer = global_trace.max_buffer.buffer;
   2304	if (buffer)
   2305		ring_buffer_record_enable(buffer);
   2306#endif
   2307
   2308	arch_spin_unlock(&global_trace.max_lock);
   2309
   2310 out:
   2311	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
   2312}
   2313
   2314static void tracing_start_tr(struct trace_array *tr)
   2315{
   2316	struct trace_buffer *buffer;
   2317	unsigned long flags;
   2318
   2319	if (tracing_disabled)
   2320		return;
   2321
   2322	/* If global, we need to also start the max tracer */
   2323	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
   2324		return tracing_start();
   2325
   2326	raw_spin_lock_irqsave(&tr->start_lock, flags);
   2327
   2328	if (--tr->stop_count) {
   2329		if (tr->stop_count < 0) {
   2330			/* Someone screwed up their debugging */
   2331			WARN_ON_ONCE(1);
   2332			tr->stop_count = 0;
   2333		}
   2334		goto out;
   2335	}
   2336
   2337	buffer = tr->array_buffer.buffer;
   2338	if (buffer)
   2339		ring_buffer_record_enable(buffer);
   2340
   2341 out:
   2342	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
   2343}
   2344
   2345/**
   2346 * tracing_stop - quick stop of the tracer
   2347 *
   2348 * Light weight way to stop tracing. Use in conjunction with
   2349 * tracing_start.
   2350 */
   2351void tracing_stop(void)
   2352{
   2353	struct trace_buffer *buffer;
   2354	unsigned long flags;
   2355
   2356	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
   2357	if (global_trace.stop_count++)
   2358		goto out;
   2359
   2360	/* Prevent the buffers from switching */
   2361	arch_spin_lock(&global_trace.max_lock);
   2362
   2363	buffer = global_trace.array_buffer.buffer;
   2364	if (buffer)
   2365		ring_buffer_record_disable(buffer);
   2366
   2367#ifdef CONFIG_TRACER_MAX_TRACE
   2368	buffer = global_trace.max_buffer.buffer;
   2369	if (buffer)
   2370		ring_buffer_record_disable(buffer);
   2371#endif
   2372
   2373	arch_spin_unlock(&global_trace.max_lock);
   2374
   2375 out:
   2376	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
   2377}
   2378
   2379static void tracing_stop_tr(struct trace_array *tr)
   2380{
   2381	struct trace_buffer *buffer;
   2382	unsigned long flags;
   2383
   2384	/* If global, we need to also stop the max tracer */
   2385	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
   2386		return tracing_stop();
   2387
   2388	raw_spin_lock_irqsave(&tr->start_lock, flags);
   2389	if (tr->stop_count++)
   2390		goto out;
   2391
   2392	buffer = tr->array_buffer.buffer;
   2393	if (buffer)
   2394		ring_buffer_record_disable(buffer);
   2395
   2396 out:
   2397	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
   2398}
   2399
   2400static int trace_save_cmdline(struct task_struct *tsk)
   2401{
   2402	unsigned tpid, idx;
   2403
   2404	/* treat recording of idle task as a success */
   2405	if (!tsk->pid)
   2406		return 1;
   2407
   2408	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
   2409
   2410	/*
   2411	 * It's not the end of the world if we don't get
   2412	 * the lock, but we also don't want to spin
   2413	 * nor do we want to disable interrupts,
   2414	 * so if we miss here, then better luck next time.
   2415	 */
   2416	if (!arch_spin_trylock(&trace_cmdline_lock))
   2417		return 0;
   2418
   2419	idx = savedcmd->map_pid_to_cmdline[tpid];
   2420	if (idx == NO_CMDLINE_MAP) {
   2421		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
   2422
   2423		savedcmd->map_pid_to_cmdline[tpid] = idx;
   2424		savedcmd->cmdline_idx = idx;
   2425	}
   2426
   2427	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
   2428	set_cmdline(idx, tsk->comm);
   2429
   2430	arch_spin_unlock(&trace_cmdline_lock);
   2431
   2432	return 1;
   2433}
   2434
   2435static void __trace_find_cmdline(int pid, char comm[])
   2436{
   2437	unsigned map;
   2438	int tpid;
   2439
   2440	if (!pid) {
   2441		strcpy(comm, "<idle>");
   2442		return;
   2443	}
   2444
   2445	if (WARN_ON_ONCE(pid < 0)) {
   2446		strcpy(comm, "<XXX>");
   2447		return;
   2448	}
   2449
   2450	tpid = pid & (PID_MAX_DEFAULT - 1);
   2451	map = savedcmd->map_pid_to_cmdline[tpid];
   2452	if (map != NO_CMDLINE_MAP) {
   2453		tpid = savedcmd->map_cmdline_to_pid[map];
   2454		if (tpid == pid) {
   2455			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
   2456			return;
   2457		}
   2458	}
   2459	strcpy(comm, "<...>");
   2460}
   2461
   2462void trace_find_cmdline(int pid, char comm[])
   2463{
   2464	preempt_disable();
   2465	arch_spin_lock(&trace_cmdline_lock);
   2466
   2467	__trace_find_cmdline(pid, comm);
   2468
   2469	arch_spin_unlock(&trace_cmdline_lock);
   2470	preempt_enable();
   2471}
   2472
   2473static int *trace_find_tgid_ptr(int pid)
   2474{
   2475	/*
   2476	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
   2477	 * if we observe a non-NULL tgid_map then we also observe the correct
   2478	 * tgid_map_max.
   2479	 */
   2480	int *map = smp_load_acquire(&tgid_map);
   2481
   2482	if (unlikely(!map || pid > tgid_map_max))
   2483		return NULL;
   2484
   2485	return &map[pid];
   2486}
   2487
   2488int trace_find_tgid(int pid)
   2489{
   2490	int *ptr = trace_find_tgid_ptr(pid);
   2491
   2492	return ptr ? *ptr : 0;
   2493}
   2494
   2495static int trace_save_tgid(struct task_struct *tsk)
   2496{
   2497	int *ptr;
   2498
   2499	/* treat recording of idle task as a success */
   2500	if (!tsk->pid)
   2501		return 1;
   2502
   2503	ptr = trace_find_tgid_ptr(tsk->pid);
   2504	if (!ptr)
   2505		return 0;
   2506
   2507	*ptr = tsk->tgid;
   2508	return 1;
   2509}
   2510
   2511static bool tracing_record_taskinfo_skip(int flags)
   2512{
   2513	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
   2514		return true;
   2515	if (!__this_cpu_read(trace_taskinfo_save))
   2516		return true;
   2517	return false;
   2518}
   2519
   2520/**
   2521 * tracing_record_taskinfo - record the task info of a task
   2522 *
   2523 * @task:  task to record
   2524 * @flags: TRACE_RECORD_CMDLINE for recording comm
   2525 *         TRACE_RECORD_TGID for recording tgid
   2526 */
   2527void tracing_record_taskinfo(struct task_struct *task, int flags)
   2528{
   2529	bool done;
   2530
   2531	if (tracing_record_taskinfo_skip(flags))
   2532		return;
   2533
   2534	/*
   2535	 * Record as much task information as possible. If some fail, continue
   2536	 * to try to record the others.
   2537	 */
   2538	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
   2539	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
   2540
   2541	/* If recording any information failed, retry again soon. */
   2542	if (!done)
   2543		return;
   2544
   2545	__this_cpu_write(trace_taskinfo_save, false);
   2546}
   2547
   2548/**
   2549 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
   2550 *
   2551 * @prev: previous task during sched_switch
   2552 * @next: next task during sched_switch
   2553 * @flags: TRACE_RECORD_CMDLINE for recording comm
   2554 *         TRACE_RECORD_TGID for recording tgid
   2555 */
   2556void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
   2557					  struct task_struct *next, int flags)
   2558{
   2559	bool done;
   2560
   2561	if (tracing_record_taskinfo_skip(flags))
   2562		return;
   2563
   2564	/*
   2565	 * Record as much task information as possible. If some fail, continue
   2566	 * to try to record the others.
   2567	 */
   2568	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
   2569	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
   2570	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
   2571	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
   2572
   2573	/* If recording any information failed, retry again soon. */
   2574	if (!done)
   2575		return;
   2576
   2577	__this_cpu_write(trace_taskinfo_save, false);
   2578}
   2579
   2580/* Helpers to record a specific task information */
   2581void tracing_record_cmdline(struct task_struct *task)
   2582{
   2583	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
   2584}
   2585
   2586void tracing_record_tgid(struct task_struct *task)
   2587{
   2588	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
   2589}
   2590
   2591/*
   2592 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
   2593 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
   2594 * simplifies those functions and keeps them in sync.
   2595 */
   2596enum print_line_t trace_handle_return(struct trace_seq *s)
   2597{
   2598	return trace_seq_has_overflowed(s) ?
   2599		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
   2600}
   2601EXPORT_SYMBOL_GPL(trace_handle_return);
   2602
   2603static unsigned short migration_disable_value(void)
   2604{
   2605#if defined(CONFIG_SMP)
   2606	return current->migration_disabled;
   2607#else
   2608	return 0;
   2609#endif
   2610}
   2611
   2612unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
   2613{
   2614	unsigned int trace_flags = irqs_status;
   2615	unsigned int pc;
   2616
   2617	pc = preempt_count();
   2618
   2619	if (pc & NMI_MASK)
   2620		trace_flags |= TRACE_FLAG_NMI;
   2621	if (pc & HARDIRQ_MASK)
   2622		trace_flags |= TRACE_FLAG_HARDIRQ;
   2623	if (in_serving_softirq())
   2624		trace_flags |= TRACE_FLAG_SOFTIRQ;
   2625	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
   2626		trace_flags |= TRACE_FLAG_BH_OFF;
   2627
   2628	if (tif_need_resched())
   2629		trace_flags |= TRACE_FLAG_NEED_RESCHED;
   2630	if (test_preempt_need_resched())
   2631		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
   2632	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
   2633		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
   2634}
   2635
   2636struct ring_buffer_event *
   2637trace_buffer_lock_reserve(struct trace_buffer *buffer,
   2638			  int type,
   2639			  unsigned long len,
   2640			  unsigned int trace_ctx)
   2641{
   2642	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
   2643}
   2644
   2645DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
   2646DEFINE_PER_CPU(int, trace_buffered_event_cnt);
   2647static int trace_buffered_event_ref;
   2648
   2649/**
   2650 * trace_buffered_event_enable - enable buffering events
   2651 *
   2652 * When events are being filtered, it is quicker to use a temporary
   2653 * buffer to write the event data into if there's a likely chance
   2654 * that it will not be committed. The discard of the ring buffer
   2655 * is not as fast as committing, and is much slower than copying
   2656 * a commit.
   2657 *
   2658 * When an event is to be filtered, allocate per cpu buffers to
   2659 * write the event data into, and if the event is filtered and discarded
   2660 * it is simply dropped, otherwise, the entire data is to be committed
   2661 * in one shot.
   2662 */
   2663void trace_buffered_event_enable(void)
   2664{
   2665	struct ring_buffer_event *event;
   2666	struct page *page;
   2667	int cpu;
   2668
   2669	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
   2670
   2671	if (trace_buffered_event_ref++)
   2672		return;
   2673
   2674	for_each_tracing_cpu(cpu) {
   2675		page = alloc_pages_node(cpu_to_node(cpu),
   2676					GFP_KERNEL | __GFP_NORETRY, 0);
   2677		if (!page)
   2678			goto failed;
   2679
   2680		event = page_address(page);
   2681		memset(event, 0, sizeof(*event));
   2682
   2683		per_cpu(trace_buffered_event, cpu) = event;
   2684
   2685		preempt_disable();
   2686		if (cpu == smp_processor_id() &&
   2687		    __this_cpu_read(trace_buffered_event) !=
   2688		    per_cpu(trace_buffered_event, cpu))
   2689			WARN_ON_ONCE(1);
   2690		preempt_enable();
   2691	}
   2692
   2693	return;
   2694 failed:
   2695	trace_buffered_event_disable();
   2696}
   2697
   2698static void enable_trace_buffered_event(void *data)
   2699{
   2700	/* Probably not needed, but do it anyway */
   2701	smp_rmb();
   2702	this_cpu_dec(trace_buffered_event_cnt);
   2703}
   2704
   2705static void disable_trace_buffered_event(void *data)
   2706{
   2707	this_cpu_inc(trace_buffered_event_cnt);
   2708}
   2709
   2710/**
   2711 * trace_buffered_event_disable - disable buffering events
   2712 *
   2713 * When a filter is removed, it is faster to not use the buffered
   2714 * events, and to commit directly into the ring buffer. Free up
   2715 * the temp buffers when there are no more users. This requires
   2716 * special synchronization with current events.
   2717 */
   2718void trace_buffered_event_disable(void)
   2719{
   2720	int cpu;
   2721
   2722	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
   2723
   2724	if (WARN_ON_ONCE(!trace_buffered_event_ref))
   2725		return;
   2726
   2727	if (--trace_buffered_event_ref)
   2728		return;
   2729
   2730	preempt_disable();
   2731	/* For each CPU, set the buffer as used. */
   2732	smp_call_function_many(tracing_buffer_mask,
   2733			       disable_trace_buffered_event, NULL, 1);
   2734	preempt_enable();
   2735
   2736	/* Wait for all current users to finish */
   2737	synchronize_rcu();
   2738
   2739	for_each_tracing_cpu(cpu) {
   2740		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
   2741		per_cpu(trace_buffered_event, cpu) = NULL;
   2742	}
   2743	/*
   2744	 * Make sure trace_buffered_event is NULL before clearing
   2745	 * trace_buffered_event_cnt.
   2746	 */
   2747	smp_wmb();
   2748
   2749	preempt_disable();
   2750	/* Do the work on each cpu */
   2751	smp_call_function_many(tracing_buffer_mask,
   2752			       enable_trace_buffered_event, NULL, 1);
   2753	preempt_enable();
   2754}
   2755
   2756static struct trace_buffer *temp_buffer;
   2757
   2758struct ring_buffer_event *
   2759trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
   2760			  struct trace_event_file *trace_file,
   2761			  int type, unsigned long len,
   2762			  unsigned int trace_ctx)
   2763{
   2764	struct ring_buffer_event *entry;
   2765	struct trace_array *tr = trace_file->tr;
   2766	int val;
   2767
   2768	*current_rb = tr->array_buffer.buffer;
   2769
   2770	if (!tr->no_filter_buffering_ref &&
   2771	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
   2772		preempt_disable_notrace();
   2773		/*
   2774		 * Filtering is on, so try to use the per cpu buffer first.
   2775		 * This buffer will simulate a ring_buffer_event,
   2776		 * where the type_len is zero and the array[0] will
   2777		 * hold the full length.
   2778		 * (see include/linux/ring-buffer.h for details on
   2779		 *  how the ring_buffer_event is structured).
   2780		 *
   2781		 * Using a temp buffer during filtering and copying it
   2782		 * on a matched filter is quicker than writing directly
   2783		 * into the ring buffer and then discarding it when
   2784		 * it doesn't match. That is because the discard
   2785		 * requires several atomic operations to get right.
   2786		 * Copying on match and doing nothing on a failed match
   2787		 * is still quicker than no copy on match, but having
   2788		 * to discard out of the ring buffer on a failed match.
   2789		 */
   2790		if ((entry = __this_cpu_read(trace_buffered_event))) {
   2791			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
   2792
   2793			val = this_cpu_inc_return(trace_buffered_event_cnt);
   2794
   2795			/*
   2796			 * Preemption is disabled, but interrupts and NMIs
   2797			 * can still come in now. If that happens after
   2798			 * the above increment, then it will have to go
   2799			 * back to the old method of allocating the event
   2800			 * on the ring buffer, and if the filter fails, it
   2801			 * will have to call ring_buffer_discard_commit()
   2802			 * to remove it.
   2803			 *
   2804			 * Need to also check the unlikely case that the
   2805			 * length is bigger than the temp buffer size.
   2806			 * If that happens, then the reserve is pretty much
   2807			 * guaranteed to fail, as the ring buffer currently
   2808			 * only allows events less than a page. But that may
   2809			 * change in the future, so let the ring buffer reserve
   2810			 * handle the failure in that case.
   2811			 */
   2812			if (val == 1 && likely(len <= max_len)) {
   2813				trace_event_setup(entry, type, trace_ctx);
   2814				entry->array[0] = len;
   2815				/* Return with preemption disabled */
   2816				return entry;
   2817			}
   2818			this_cpu_dec(trace_buffered_event_cnt);
   2819		}
   2820		/* __trace_buffer_lock_reserve() disables preemption */
   2821		preempt_enable_notrace();
   2822	}
   2823
   2824	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
   2825					    trace_ctx);
   2826	/*
   2827	 * If tracing is off, but we have triggers enabled
   2828	 * we still need to look at the event data. Use the temp_buffer
   2829	 * to store the trace event for the trigger to use. It's recursive
   2830	 * safe and will not be recorded anywhere.
   2831	 */
   2832	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
   2833		*current_rb = temp_buffer;
   2834		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
   2835						    trace_ctx);
   2836	}
   2837	return entry;
   2838}
   2839EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
   2840
   2841static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
   2842static DEFINE_MUTEX(tracepoint_printk_mutex);
   2843
   2844static void output_printk(struct trace_event_buffer *fbuffer)
   2845{
   2846	struct trace_event_call *event_call;
   2847	struct trace_event_file *file;
   2848	struct trace_event *event;
   2849	unsigned long flags;
   2850	struct trace_iterator *iter = tracepoint_print_iter;
   2851
   2852	/* We should never get here if iter is NULL */
   2853	if (WARN_ON_ONCE(!iter))
   2854		return;
   2855
   2856	event_call = fbuffer->trace_file->event_call;
   2857	if (!event_call || !event_call->event.funcs ||
   2858	    !event_call->event.funcs->trace)
   2859		return;
   2860
   2861	file = fbuffer->trace_file;
   2862	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
   2863	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
   2864	     !filter_match_preds(file->filter, fbuffer->entry)))
   2865		return;
   2866
   2867	event = &fbuffer->trace_file->event_call->event;
   2868
   2869	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
   2870	trace_seq_init(&iter->seq);
   2871	iter->ent = fbuffer->entry;
   2872	event_call->event.funcs->trace(iter, 0, event);
   2873	trace_seq_putc(&iter->seq, 0);
   2874	printk("%s", iter->seq.buffer);
   2875
   2876	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
   2877}
   2878
   2879int tracepoint_printk_sysctl(struct ctl_table *table, int write,
   2880			     void *buffer, size_t *lenp,
   2881			     loff_t *ppos)
   2882{
   2883	int save_tracepoint_printk;
   2884	int ret;
   2885
   2886	mutex_lock(&tracepoint_printk_mutex);
   2887	save_tracepoint_printk = tracepoint_printk;
   2888
   2889	ret = proc_dointvec(table, write, buffer, lenp, ppos);
   2890
   2891	/*
   2892	 * This will force exiting early, as tracepoint_printk
   2893	 * is always zero when tracepoint_printk_iter is not allocated
   2894	 */
   2895	if (!tracepoint_print_iter)
   2896		tracepoint_printk = 0;
   2897
   2898	if (save_tracepoint_printk == tracepoint_printk)
   2899		goto out;
   2900
   2901	if (tracepoint_printk)
   2902		static_key_enable(&tracepoint_printk_key.key);
   2903	else
   2904		static_key_disable(&tracepoint_printk_key.key);
   2905
   2906 out:
   2907	mutex_unlock(&tracepoint_printk_mutex);
   2908
   2909	return ret;
   2910}
   2911
   2912void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
   2913{
   2914	enum event_trigger_type tt = ETT_NONE;
   2915	struct trace_event_file *file = fbuffer->trace_file;
   2916
   2917	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
   2918			fbuffer->entry, &tt))
   2919		goto discard;
   2920
   2921	if (static_key_false(&tracepoint_printk_key.key))
   2922		output_printk(fbuffer);
   2923
   2924	if (static_branch_unlikely(&trace_event_exports_enabled))
   2925		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
   2926
   2927	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
   2928			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
   2929
   2930discard:
   2931	if (tt)
   2932		event_triggers_post_call(file, tt);
   2933
   2934}
   2935EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
   2936
   2937/*
   2938 * Skip 3:
   2939 *
   2940 *   trace_buffer_unlock_commit_regs()
   2941 *   trace_event_buffer_commit()
   2942 *   trace_event_raw_event_xxx()
   2943 */
   2944# define STACK_SKIP 3
   2945
   2946void trace_buffer_unlock_commit_regs(struct trace_array *tr,
   2947				     struct trace_buffer *buffer,
   2948				     struct ring_buffer_event *event,
   2949				     unsigned int trace_ctx,
   2950				     struct pt_regs *regs)
   2951{
   2952	__buffer_unlock_commit(buffer, event);
   2953
   2954	/*
   2955	 * If regs is not set, then skip the necessary functions.
   2956	 * Note, we can still get here via blktrace, wakeup tracer
   2957	 * and mmiotrace, but that's ok if they lose a function or
   2958	 * two. They are not that meaningful.
   2959	 */
   2960	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
   2961	ftrace_trace_userstack(tr, buffer, trace_ctx);
   2962}
   2963
   2964/*
   2965 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
   2966 */
   2967void
   2968trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
   2969				   struct ring_buffer_event *event)
   2970{
   2971	__buffer_unlock_commit(buffer, event);
   2972}
   2973
   2974void
   2975trace_function(struct trace_array *tr, unsigned long ip, unsigned long
   2976	       parent_ip, unsigned int trace_ctx)
   2977{
   2978	struct trace_event_call *call = &event_function;
   2979	struct trace_buffer *buffer = tr->array_buffer.buffer;
   2980	struct ring_buffer_event *event;
   2981	struct ftrace_entry *entry;
   2982
   2983	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
   2984					    trace_ctx);
   2985	if (!event)
   2986		return;
   2987	entry	= ring_buffer_event_data(event);
   2988	entry->ip			= ip;
   2989	entry->parent_ip		= parent_ip;
   2990
   2991	if (!call_filter_check_discard(call, entry, buffer, event)) {
   2992		if (static_branch_unlikely(&trace_function_exports_enabled))
   2993			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
   2994		__buffer_unlock_commit(buffer, event);
   2995	}
   2996}
   2997
   2998#ifdef CONFIG_STACKTRACE
   2999
   3000/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
   3001#define FTRACE_KSTACK_NESTING	4
   3002
   3003#define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
   3004
   3005struct ftrace_stack {
   3006	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
   3007};
   3008
   3009
   3010struct ftrace_stacks {
   3011	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
   3012};
   3013
   3014static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
   3015static DEFINE_PER_CPU(int, ftrace_stack_reserve);
   3016
   3017static void __ftrace_trace_stack(struct trace_buffer *buffer,
   3018				 unsigned int trace_ctx,
   3019				 int skip, struct pt_regs *regs)
   3020{
   3021	struct trace_event_call *call = &event_kernel_stack;
   3022	struct ring_buffer_event *event;
   3023	unsigned int size, nr_entries;
   3024	struct ftrace_stack *fstack;
   3025	struct stack_entry *entry;
   3026	int stackidx;
   3027
   3028	/*
   3029	 * Add one, for this function and the call to save_stack_trace()
   3030	 * If regs is set, then these functions will not be in the way.
   3031	 */
   3032#ifndef CONFIG_UNWINDER_ORC
   3033	if (!regs)
   3034		skip++;
   3035#endif
   3036
   3037	preempt_disable_notrace();
   3038
   3039	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
   3040
   3041	/* This should never happen. If it does, yell once and skip */
   3042	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
   3043		goto out;
   3044
   3045	/*
   3046	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
   3047	 * interrupt will either see the value pre increment or post
   3048	 * increment. If the interrupt happens pre increment it will have
   3049	 * restored the counter when it returns.  We just need a barrier to
   3050	 * keep gcc from moving things around.
   3051	 */
   3052	barrier();
   3053
   3054	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
   3055	size = ARRAY_SIZE(fstack->calls);
   3056
   3057	if (regs) {
   3058		nr_entries = stack_trace_save_regs(regs, fstack->calls,
   3059						   size, skip);
   3060	} else {
   3061		nr_entries = stack_trace_save(fstack->calls, size, skip);
   3062	}
   3063
   3064	size = nr_entries * sizeof(unsigned long);
   3065	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
   3066				    (sizeof(*entry) - sizeof(entry->caller)) + size,
   3067				    trace_ctx);
   3068	if (!event)
   3069		goto out;
   3070	entry = ring_buffer_event_data(event);
   3071
   3072	memcpy(&entry->caller, fstack->calls, size);
   3073	entry->size = nr_entries;
   3074
   3075	if (!call_filter_check_discard(call, entry, buffer, event))
   3076		__buffer_unlock_commit(buffer, event);
   3077
   3078 out:
   3079	/* Again, don't let gcc optimize things here */
   3080	barrier();
   3081	__this_cpu_dec(ftrace_stack_reserve);
   3082	preempt_enable_notrace();
   3083
   3084}
   3085
   3086static inline void ftrace_trace_stack(struct trace_array *tr,
   3087				      struct trace_buffer *buffer,
   3088				      unsigned int trace_ctx,
   3089				      int skip, struct pt_regs *regs)
   3090{
   3091	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
   3092		return;
   3093
   3094	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
   3095}
   3096
   3097void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
   3098		   int skip)
   3099{
   3100	struct trace_buffer *buffer = tr->array_buffer.buffer;
   3101
   3102	if (rcu_is_watching()) {
   3103		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
   3104		return;
   3105	}
   3106
   3107	/*
   3108	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
   3109	 * but if the above rcu_is_watching() failed, then the NMI
   3110	 * triggered someplace critical, and rcu_irq_enter() should
   3111	 * not be called from NMI.
   3112	 */
   3113	if (unlikely(in_nmi()))
   3114		return;
   3115
   3116	rcu_irq_enter_irqson();
   3117	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
   3118	rcu_irq_exit_irqson();
   3119}
   3120
   3121/**
   3122 * trace_dump_stack - record a stack back trace in the trace buffer
   3123 * @skip: Number of functions to skip (helper handlers)
   3124 */
   3125void trace_dump_stack(int skip)
   3126{
   3127	if (tracing_disabled || tracing_selftest_running)
   3128		return;
   3129
   3130#ifndef CONFIG_UNWINDER_ORC
   3131	/* Skip 1 to skip this function. */
   3132	skip++;
   3133#endif
   3134	__ftrace_trace_stack(global_trace.array_buffer.buffer,
   3135			     tracing_gen_ctx(), skip, NULL);
   3136}
   3137EXPORT_SYMBOL_GPL(trace_dump_stack);
   3138
   3139#ifdef CONFIG_USER_STACKTRACE_SUPPORT
   3140static DEFINE_PER_CPU(int, user_stack_count);
   3141
   3142static void
   3143ftrace_trace_userstack(struct trace_array *tr,
   3144		       struct trace_buffer *buffer, unsigned int trace_ctx)
   3145{
   3146	struct trace_event_call *call = &event_user_stack;
   3147	struct ring_buffer_event *event;
   3148	struct userstack_entry *entry;
   3149
   3150	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
   3151		return;
   3152
   3153	/*
   3154	 * NMIs can not handle page faults, even with fix ups.
   3155	 * The save user stack can (and often does) fault.
   3156	 */
   3157	if (unlikely(in_nmi()))
   3158		return;
   3159
   3160	/*
   3161	 * prevent recursion, since the user stack tracing may
   3162	 * trigger other kernel events.
   3163	 */
   3164	preempt_disable();
   3165	if (__this_cpu_read(user_stack_count))
   3166		goto out;
   3167
   3168	__this_cpu_inc(user_stack_count);
   3169
   3170	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
   3171					    sizeof(*entry), trace_ctx);
   3172	if (!event)
   3173		goto out_drop_count;
   3174	entry	= ring_buffer_event_data(event);
   3175
   3176	entry->tgid		= current->tgid;
   3177	memset(&entry->caller, 0, sizeof(entry->caller));
   3178
   3179	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
   3180	if (!call_filter_check_discard(call, entry, buffer, event))
   3181		__buffer_unlock_commit(buffer, event);
   3182
   3183 out_drop_count:
   3184	__this_cpu_dec(user_stack_count);
   3185 out:
   3186	preempt_enable();
   3187}
   3188#else /* CONFIG_USER_STACKTRACE_SUPPORT */
   3189static void ftrace_trace_userstack(struct trace_array *tr,
   3190				   struct trace_buffer *buffer,
   3191				   unsigned int trace_ctx)
   3192{
   3193}
   3194#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
   3195
   3196#endif /* CONFIG_STACKTRACE */
   3197
   3198static inline void
   3199func_repeats_set_delta_ts(struct func_repeats_entry *entry,
   3200			  unsigned long long delta)
   3201{
   3202	entry->bottom_delta_ts = delta & U32_MAX;
   3203	entry->top_delta_ts = (delta >> 32);
   3204}
   3205
   3206void trace_last_func_repeats(struct trace_array *tr,
   3207			     struct trace_func_repeats *last_info,
   3208			     unsigned int trace_ctx)
   3209{
   3210	struct trace_buffer *buffer = tr->array_buffer.buffer;
   3211	struct func_repeats_entry *entry;
   3212	struct ring_buffer_event *event;
   3213	u64 delta;
   3214
   3215	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
   3216					    sizeof(*entry), trace_ctx);
   3217	if (!event)
   3218		return;
   3219
   3220	delta = ring_buffer_event_time_stamp(buffer, event) -
   3221		last_info->ts_last_call;
   3222
   3223	entry = ring_buffer_event_data(event);
   3224	entry->ip = last_info->ip;
   3225	entry->parent_ip = last_info->parent_ip;
   3226	entry->count = last_info->count;
   3227	func_repeats_set_delta_ts(entry, delta);
   3228
   3229	__buffer_unlock_commit(buffer, event);
   3230}
   3231
   3232/* created for use with alloc_percpu */
   3233struct trace_buffer_struct {
   3234	int nesting;
   3235	char buffer[4][TRACE_BUF_SIZE];
   3236};
   3237
   3238static struct trace_buffer_struct __percpu *trace_percpu_buffer;
   3239
   3240/*
   3241 * This allows for lockless recording.  If we're nested too deeply, then
   3242 * this returns NULL.
   3243 */
   3244static char *get_trace_buf(void)
   3245{
   3246	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
   3247
   3248	if (!trace_percpu_buffer || buffer->nesting >= 4)
   3249		return NULL;
   3250
   3251	buffer->nesting++;
   3252
   3253	/* Interrupts must see nesting incremented before we use the buffer */
   3254	barrier();
   3255	return &buffer->buffer[buffer->nesting - 1][0];
   3256}
   3257
   3258static void put_trace_buf(void)
   3259{
   3260	/* Don't let the decrement of nesting leak before this */
   3261	barrier();
   3262	this_cpu_dec(trace_percpu_buffer->nesting);
   3263}
   3264
   3265static int alloc_percpu_trace_buffer(void)
   3266{
   3267	struct trace_buffer_struct __percpu *buffers;
   3268
   3269	if (trace_percpu_buffer)
   3270		return 0;
   3271
   3272	buffers = alloc_percpu(struct trace_buffer_struct);
   3273	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
   3274		return -ENOMEM;
   3275
   3276	trace_percpu_buffer = buffers;
   3277	return 0;
   3278}
   3279
   3280static int buffers_allocated;
   3281
   3282void trace_printk_init_buffers(void)
   3283{
   3284	if (buffers_allocated)
   3285		return;
   3286
   3287	if (alloc_percpu_trace_buffer())
   3288		return;
   3289
   3290	/* trace_printk() is for debug use only. Don't use it in production. */
   3291
   3292	pr_warn("\n");
   3293	pr_warn("**********************************************************\n");
   3294	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
   3295	pr_warn("**                                                      **\n");
   3296	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
   3297	pr_warn("**                                                      **\n");
   3298	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
   3299	pr_warn("** unsafe for production use.                           **\n");
   3300	pr_warn("**                                                      **\n");
   3301	pr_warn("** If you see this message and you are not debugging    **\n");
   3302	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
   3303	pr_warn("**                                                      **\n");
   3304	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
   3305	pr_warn("**********************************************************\n");
   3306
   3307	/* Expand the buffers to set size */
   3308	tracing_update_buffers();
   3309
   3310	buffers_allocated = 1;
   3311
   3312	/*
   3313	 * trace_printk_init_buffers() can be called by modules.
   3314	 * If that happens, then we need to start cmdline recording
   3315	 * directly here. If the global_trace.buffer is already
   3316	 * allocated here, then this was called by module code.
   3317	 */
   3318	if (global_trace.array_buffer.buffer)
   3319		tracing_start_cmdline_record();
   3320}
   3321EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
   3322
   3323void trace_printk_start_comm(void)
   3324{
   3325	/* Start tracing comms if trace printk is set */
   3326	if (!buffers_allocated)
   3327		return;
   3328	tracing_start_cmdline_record();
   3329}
   3330
   3331static void trace_printk_start_stop_comm(int enabled)
   3332{
   3333	if (!buffers_allocated)
   3334		return;
   3335
   3336	if (enabled)
   3337		tracing_start_cmdline_record();
   3338	else
   3339		tracing_stop_cmdline_record();
   3340}
   3341
   3342/**
   3343 * trace_vbprintk - write binary msg to tracing buffer
   3344 * @ip:    The address of the caller
   3345 * @fmt:   The string format to write to the buffer
   3346 * @args:  Arguments for @fmt
   3347 */
   3348int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
   3349{
   3350	struct trace_event_call *call = &event_bprint;
   3351	struct ring_buffer_event *event;
   3352	struct trace_buffer *buffer;
   3353	struct trace_array *tr = &global_trace;
   3354	struct bprint_entry *entry;
   3355	unsigned int trace_ctx;
   3356	char *tbuffer;
   3357	int len = 0, size;
   3358
   3359	if (unlikely(tracing_selftest_running || tracing_disabled))
   3360		return 0;
   3361
   3362	/* Don't pollute graph traces with trace_vprintk internals */
   3363	pause_graph_tracing();
   3364
   3365	trace_ctx = tracing_gen_ctx();
   3366	preempt_disable_notrace();
   3367
   3368	tbuffer = get_trace_buf();
   3369	if (!tbuffer) {
   3370		len = 0;
   3371		goto out_nobuffer;
   3372	}
   3373
   3374	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
   3375
   3376	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
   3377		goto out_put;
   3378
   3379	size = sizeof(*entry) + sizeof(u32) * len;
   3380	buffer = tr->array_buffer.buffer;
   3381	ring_buffer_nest_start(buffer);
   3382	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
   3383					    trace_ctx);
   3384	if (!event)
   3385		goto out;
   3386	entry = ring_buffer_event_data(event);
   3387	entry->ip			= ip;
   3388	entry->fmt			= fmt;
   3389
   3390	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
   3391	if (!call_filter_check_discard(call, entry, buffer, event)) {
   3392		__buffer_unlock_commit(buffer, event);
   3393		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
   3394	}
   3395
   3396out:
   3397	ring_buffer_nest_end(buffer);
   3398out_put:
   3399	put_trace_buf();
   3400
   3401out_nobuffer:
   3402	preempt_enable_notrace();
   3403	unpause_graph_tracing();
   3404
   3405	return len;
   3406}
   3407EXPORT_SYMBOL_GPL(trace_vbprintk);
   3408
   3409__printf(3, 0)
   3410static int
   3411__trace_array_vprintk(struct trace_buffer *buffer,
   3412		      unsigned long ip, const char *fmt, va_list args)
   3413{
   3414	struct trace_event_call *call = &event_print;
   3415	struct ring_buffer_event *event;
   3416	int len = 0, size;
   3417	struct print_entry *entry;
   3418	unsigned int trace_ctx;
   3419	char *tbuffer;
   3420
   3421	if (tracing_disabled || tracing_selftest_running)
   3422		return 0;
   3423
   3424	/* Don't pollute graph traces with trace_vprintk internals */
   3425	pause_graph_tracing();
   3426
   3427	trace_ctx = tracing_gen_ctx();
   3428	preempt_disable_notrace();
   3429
   3430
   3431	tbuffer = get_trace_buf();
   3432	if (!tbuffer) {
   3433		len = 0;
   3434		goto out_nobuffer;
   3435	}
   3436
   3437	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
   3438
   3439	size = sizeof(*entry) + len + 1;
   3440	ring_buffer_nest_start(buffer);
   3441	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
   3442					    trace_ctx);
   3443	if (!event)
   3444		goto out;
   3445	entry = ring_buffer_event_data(event);
   3446	entry->ip = ip;
   3447
   3448	memcpy(&entry->buf, tbuffer, len + 1);
   3449	if (!call_filter_check_discard(call, entry, buffer, event)) {
   3450		__buffer_unlock_commit(buffer, event);
   3451		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
   3452	}
   3453
   3454out:
   3455	ring_buffer_nest_end(buffer);
   3456	put_trace_buf();
   3457
   3458out_nobuffer:
   3459	preempt_enable_notrace();
   3460	unpause_graph_tracing();
   3461
   3462	return len;
   3463}
   3464
   3465__printf(3, 0)
   3466int trace_array_vprintk(struct trace_array *tr,
   3467			unsigned long ip, const char *fmt, va_list args)
   3468{
   3469	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
   3470}
   3471
   3472/**
   3473 * trace_array_printk - Print a message to a specific instance
   3474 * @tr: The instance trace_array descriptor
   3475 * @ip: The instruction pointer that this is called from.
   3476 * @fmt: The format to print (printf format)
   3477 *
   3478 * If a subsystem sets up its own instance, they have the right to
   3479 * printk strings into their tracing instance buffer using this
   3480 * function. Note, this function will not write into the top level
   3481 * buffer (use trace_printk() for that), as writing into the top level
   3482 * buffer should only have events that can be individually disabled.
   3483 * trace_printk() is only used for debugging a kernel, and should not
   3484 * be ever incorporated in normal use.
   3485 *
   3486 * trace_array_printk() can be used, as it will not add noise to the
   3487 * top level tracing buffer.
   3488 *
   3489 * Note, trace_array_init_printk() must be called on @tr before this
   3490 * can be used.
   3491 */
   3492__printf(3, 0)
   3493int trace_array_printk(struct trace_array *tr,
   3494		       unsigned long ip, const char *fmt, ...)
   3495{
   3496	int ret;
   3497	va_list ap;
   3498
   3499	if (!tr)
   3500		return -ENOENT;
   3501
   3502	/* This is only allowed for created instances */
   3503	if (tr == &global_trace)
   3504		return 0;
   3505
   3506	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
   3507		return 0;
   3508
   3509	va_start(ap, fmt);
   3510	ret = trace_array_vprintk(tr, ip, fmt, ap);
   3511	va_end(ap);
   3512	return ret;
   3513}
   3514EXPORT_SYMBOL_GPL(trace_array_printk);
   3515
   3516/**
   3517 * trace_array_init_printk - Initialize buffers for trace_array_printk()
   3518 * @tr: The trace array to initialize the buffers for
   3519 *
   3520 * As trace_array_printk() only writes into instances, they are OK to
   3521 * have in the kernel (unlike trace_printk()). This needs to be called
   3522 * before trace_array_printk() can be used on a trace_array.
   3523 */
   3524int trace_array_init_printk(struct trace_array *tr)
   3525{
   3526	if (!tr)
   3527		return -ENOENT;
   3528
   3529	/* This is only allowed for created instances */
   3530	if (tr == &global_trace)
   3531		return -EINVAL;
   3532
   3533	return alloc_percpu_trace_buffer();
   3534}
   3535EXPORT_SYMBOL_GPL(trace_array_init_printk);
   3536
   3537__printf(3, 4)
   3538int trace_array_printk_buf(struct trace_buffer *buffer,
   3539			   unsigned long ip, const char *fmt, ...)
   3540{
   3541	int ret;
   3542	va_list ap;
   3543
   3544	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
   3545		return 0;
   3546
   3547	va_start(ap, fmt);
   3548	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
   3549	va_end(ap);
   3550	return ret;
   3551}
   3552
   3553__printf(2, 0)
   3554int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
   3555{
   3556	return trace_array_vprintk(&global_trace, ip, fmt, args);
   3557}
   3558EXPORT_SYMBOL_GPL(trace_vprintk);
   3559
   3560static void trace_iterator_increment(struct trace_iterator *iter)
   3561{
   3562	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
   3563
   3564	iter->idx++;
   3565	if (buf_iter)
   3566		ring_buffer_iter_advance(buf_iter);
   3567}
   3568
   3569static struct trace_entry *
   3570peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
   3571		unsigned long *lost_events)
   3572{
   3573	struct ring_buffer_event *event;
   3574	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
   3575
   3576	if (buf_iter) {
   3577		event = ring_buffer_iter_peek(buf_iter, ts);
   3578		if (lost_events)
   3579			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
   3580				(unsigned long)-1 : 0;
   3581	} else {
   3582		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
   3583					 lost_events);
   3584	}
   3585
   3586	if (event) {
   3587		iter->ent_size = ring_buffer_event_length(event);
   3588		return ring_buffer_event_data(event);
   3589	}
   3590	iter->ent_size = 0;
   3591	return NULL;
   3592}
   3593
   3594static struct trace_entry *
   3595__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
   3596		  unsigned long *missing_events, u64 *ent_ts)
   3597{
   3598	struct trace_buffer *buffer = iter->array_buffer->buffer;
   3599	struct trace_entry *ent, *next = NULL;
   3600	unsigned long lost_events = 0, next_lost = 0;
   3601	int cpu_file = iter->cpu_file;
   3602	u64 next_ts = 0, ts;
   3603	int next_cpu = -1;
   3604	int next_size = 0;
   3605	int cpu;
   3606
   3607	/*
   3608	 * If we are in a per_cpu trace file, don't bother by iterating over
   3609	 * all cpu and peek directly.
   3610	 */
   3611	if (cpu_file > RING_BUFFER_ALL_CPUS) {
   3612		if (ring_buffer_empty_cpu(buffer, cpu_file))
   3613			return NULL;
   3614		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
   3615		if (ent_cpu)
   3616			*ent_cpu = cpu_file;
   3617
   3618		return ent;
   3619	}
   3620
   3621	for_each_tracing_cpu(cpu) {
   3622
   3623		if (ring_buffer_empty_cpu(buffer, cpu))
   3624			continue;
   3625
   3626		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
   3627
   3628		/*
   3629		 * Pick the entry with the smallest timestamp:
   3630		 */
   3631		if (ent && (!next || ts < next_ts)) {
   3632			next = ent;
   3633			next_cpu = cpu;
   3634			next_ts = ts;
   3635			next_lost = lost_events;
   3636			next_size = iter->ent_size;
   3637		}
   3638	}
   3639
   3640	iter->ent_size = next_size;
   3641
   3642	if (ent_cpu)
   3643		*ent_cpu = next_cpu;
   3644
   3645	if (ent_ts)
   3646		*ent_ts = next_ts;
   3647
   3648	if (missing_events)
   3649		*missing_events = next_lost;
   3650
   3651	return next;
   3652}
   3653
   3654#define STATIC_FMT_BUF_SIZE	128
   3655static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
   3656
   3657static char *trace_iter_expand_format(struct trace_iterator *iter)
   3658{
   3659	char *tmp;
   3660
   3661	/*
   3662	 * iter->tr is NULL when used with tp_printk, which makes
   3663	 * this get called where it is not safe to call krealloc().
   3664	 */
   3665	if (!iter->tr || iter->fmt == static_fmt_buf)
   3666		return NULL;
   3667
   3668	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
   3669		       GFP_KERNEL);
   3670	if (tmp) {
   3671		iter->fmt_size += STATIC_FMT_BUF_SIZE;
   3672		iter->fmt = tmp;
   3673	}
   3674
   3675	return tmp;
   3676}
   3677
   3678/* Returns true if the string is safe to dereference from an event */
   3679static bool trace_safe_str(struct trace_iterator *iter, const char *str,
   3680			   bool star, int len)
   3681{
   3682	unsigned long addr = (unsigned long)str;
   3683	struct trace_event *trace_event;
   3684	struct trace_event_call *event;
   3685
   3686	/* Ignore strings with no length */
   3687	if (star && !len)
   3688		return true;
   3689
   3690	/* OK if part of the event data */
   3691	if ((addr >= (unsigned long)iter->ent) &&
   3692	    (addr < (unsigned long)iter->ent + iter->ent_size))
   3693		return true;
   3694
   3695	/* OK if part of the temp seq buffer */
   3696	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
   3697	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
   3698		return true;
   3699
   3700	/* Core rodata can not be freed */
   3701	if (is_kernel_rodata(addr))
   3702		return true;
   3703
   3704	if (trace_is_tracepoint_string(str))
   3705		return true;
   3706
   3707	/*
   3708	 * Now this could be a module event, referencing core module
   3709	 * data, which is OK.
   3710	 */
   3711	if (!iter->ent)
   3712		return false;
   3713
   3714	trace_event = ftrace_find_event(iter->ent->type);
   3715	if (!trace_event)
   3716		return false;
   3717
   3718	event = container_of(trace_event, struct trace_event_call, event);
   3719	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
   3720		return false;
   3721
   3722	/* Would rather have rodata, but this will suffice */
   3723	if (within_module_core(addr, event->module))
   3724		return true;
   3725
   3726	return false;
   3727}
   3728
   3729static const char *show_buffer(struct trace_seq *s)
   3730{
   3731	struct seq_buf *seq = &s->seq;
   3732
   3733	seq_buf_terminate(seq);
   3734
   3735	return seq->buffer;
   3736}
   3737
   3738static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
   3739
   3740static int test_can_verify_check(const char *fmt, ...)
   3741{
   3742	char buf[16];
   3743	va_list ap;
   3744	int ret;
   3745
   3746	/*
   3747	 * The verifier is dependent on vsnprintf() modifies the va_list
   3748	 * passed to it, where it is sent as a reference. Some architectures
   3749	 * (like x86_32) passes it by value, which means that vsnprintf()
   3750	 * does not modify the va_list passed to it, and the verifier
   3751	 * would then need to be able to understand all the values that
   3752	 * vsnprintf can use. If it is passed by value, then the verifier
   3753	 * is disabled.
   3754	 */
   3755	va_start(ap, fmt);
   3756	vsnprintf(buf, 16, "%d", ap);
   3757	ret = va_arg(ap, int);
   3758	va_end(ap);
   3759
   3760	return ret;
   3761}
   3762
   3763static void test_can_verify(void)
   3764{
   3765	if (!test_can_verify_check("%d %d", 0, 1)) {
   3766		pr_info("trace event string verifier disabled\n");
   3767		static_branch_inc(&trace_no_verify);
   3768	}
   3769}
   3770
   3771/**
   3772 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
   3773 * @iter: The iterator that holds the seq buffer and the event being printed
   3774 * @fmt: The format used to print the event
   3775 * @ap: The va_list holding the data to print from @fmt.
   3776 *
   3777 * This writes the data into the @iter->seq buffer using the data from
   3778 * @fmt and @ap. If the format has a %s, then the source of the string
   3779 * is examined to make sure it is safe to print, otherwise it will
   3780 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
   3781 * pointer.
   3782 */
   3783void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
   3784			 va_list ap)
   3785{
   3786	const char *p = fmt;
   3787	const char *str;
   3788	int i, j;
   3789
   3790	if (WARN_ON_ONCE(!fmt))
   3791		return;
   3792
   3793	if (static_branch_unlikely(&trace_no_verify))
   3794		goto print;
   3795
   3796	/* Don't bother checking when doing a ftrace_dump() */
   3797	if (iter->fmt == static_fmt_buf)
   3798		goto print;
   3799
   3800	while (*p) {
   3801		bool star = false;
   3802		int len = 0;
   3803
   3804		j = 0;
   3805
   3806		/* We only care about %s and variants */
   3807		for (i = 0; p[i]; i++) {
   3808			if (i + 1 >= iter->fmt_size) {
   3809				/*
   3810				 * If we can't expand the copy buffer,
   3811				 * just print it.
   3812				 */
   3813				if (!trace_iter_expand_format(iter))
   3814					goto print;
   3815			}
   3816
   3817			if (p[i] == '\\' && p[i+1]) {
   3818				i++;
   3819				continue;
   3820			}
   3821			if (p[i] == '%') {
   3822				/* Need to test cases like %08.*s */
   3823				for (j = 1; p[i+j]; j++) {
   3824					if (isdigit(p[i+j]) ||
   3825					    p[i+j] == '.')
   3826						continue;
   3827					if (p[i+j] == '*') {
   3828						star = true;
   3829						continue;
   3830					}
   3831					break;
   3832				}
   3833				if (p[i+j] == 's')
   3834					break;
   3835				star = false;
   3836			}
   3837			j = 0;
   3838		}
   3839		/* If no %s found then just print normally */
   3840		if (!p[i])
   3841			break;
   3842
   3843		/* Copy up to the %s, and print that */
   3844		strncpy(iter->fmt, p, i);
   3845		iter->fmt[i] = '\0';
   3846		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
   3847
   3848		/*
   3849		 * If iter->seq is full, the above call no longer guarantees
   3850		 * that ap is in sync with fmt processing, and further calls
   3851		 * to va_arg() can return wrong positional arguments.
   3852		 *
   3853		 * Ensure that ap is no longer used in this case.
   3854		 */
   3855		if (iter->seq.full) {
   3856			p = "";
   3857			break;
   3858		}
   3859
   3860		if (star)
   3861			len = va_arg(ap, int);
   3862
   3863		/* The ap now points to the string data of the %s */
   3864		str = va_arg(ap, const char *);
   3865
   3866		/*
   3867		 * If you hit this warning, it is likely that the
   3868		 * trace event in question used %s on a string that
   3869		 * was saved at the time of the event, but may not be
   3870		 * around when the trace is read. Use __string(),
   3871		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
   3872		 * instead. See samples/trace_events/trace-events-sample.h
   3873		 * for reference.
   3874		 */
   3875		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
   3876			      "fmt: '%s' current_buffer: '%s'",
   3877			      fmt, show_buffer(&iter->seq))) {
   3878			int ret;
   3879
   3880			/* Try to safely read the string */
   3881			if (star) {
   3882				if (len + 1 > iter->fmt_size)
   3883					len = iter->fmt_size - 1;
   3884				if (len < 0)
   3885					len = 0;
   3886				ret = copy_from_kernel_nofault(iter->fmt, str, len);
   3887				iter->fmt[len] = 0;
   3888				star = false;
   3889			} else {
   3890				ret = strncpy_from_kernel_nofault(iter->fmt, str,
   3891								  iter->fmt_size);
   3892			}
   3893			if (ret < 0)
   3894				trace_seq_printf(&iter->seq, "(0x%px)", str);
   3895			else
   3896				trace_seq_printf(&iter->seq, "(0x%px:%s)",
   3897						 str, iter->fmt);
   3898			str = "[UNSAFE-MEMORY]";
   3899			strcpy(iter->fmt, "%s");
   3900		} else {
   3901			strncpy(iter->fmt, p + i, j + 1);
   3902			iter->fmt[j+1] = '\0';
   3903		}
   3904		if (star)
   3905			trace_seq_printf(&iter->seq, iter->fmt, len, str);
   3906		else
   3907			trace_seq_printf(&iter->seq, iter->fmt, str);
   3908
   3909		p += i + j + 1;
   3910	}
   3911 print:
   3912	if (*p)
   3913		trace_seq_vprintf(&iter->seq, p, ap);
   3914}
   3915
   3916const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
   3917{
   3918	const char *p, *new_fmt;
   3919	char *q;
   3920
   3921	if (WARN_ON_ONCE(!fmt))
   3922		return fmt;
   3923
   3924	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
   3925		return fmt;
   3926
   3927	p = fmt;
   3928	new_fmt = q = iter->fmt;
   3929	while (*p) {
   3930		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
   3931			if (!trace_iter_expand_format(iter))
   3932				return fmt;
   3933
   3934			q += iter->fmt - new_fmt;
   3935			new_fmt = iter->fmt;
   3936		}
   3937
   3938		*q++ = *p++;
   3939
   3940		/* Replace %p with %px */
   3941		if (p[-1] == '%') {
   3942			if (p[0] == '%') {
   3943				*q++ = *p++;
   3944			} else if (p[0] == 'p' && !isalnum(p[1])) {
   3945				*q++ = *p++;
   3946				*q++ = 'x';
   3947			}
   3948		}
   3949	}
   3950	*q = '\0';
   3951
   3952	return new_fmt;
   3953}
   3954
   3955#define STATIC_TEMP_BUF_SIZE	128
   3956static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
   3957
   3958/* Find the next real entry, without updating the iterator itself */
   3959struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
   3960					  int *ent_cpu, u64 *ent_ts)
   3961{
   3962	/* __find_next_entry will reset ent_size */
   3963	int ent_size = iter->ent_size;
   3964	struct trace_entry *entry;
   3965
   3966	/*
   3967	 * If called from ftrace_dump(), then the iter->temp buffer
   3968	 * will be the static_temp_buf and not created from kmalloc.
   3969	 * If the entry size is greater than the buffer, we can
   3970	 * not save it. Just return NULL in that case. This is only
   3971	 * used to add markers when two consecutive events' time
   3972	 * stamps have a large delta. See trace_print_lat_context()
   3973	 */
   3974	if (iter->temp == static_temp_buf &&
   3975	    STATIC_TEMP_BUF_SIZE < ent_size)
   3976		return NULL;
   3977
   3978	/*
   3979	 * The __find_next_entry() may call peek_next_entry(), which may
   3980	 * call ring_buffer_peek() that may make the contents of iter->ent
   3981	 * undefined. Need to copy iter->ent now.
   3982	 */
   3983	if (iter->ent && iter->ent != iter->temp) {
   3984		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
   3985		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
   3986			void *temp;
   3987			temp = kmalloc(iter->ent_size, GFP_KERNEL);
   3988			if (!temp)
   3989				return NULL;
   3990			kfree(iter->temp);
   3991			iter->temp = temp;
   3992			iter->temp_size = iter->ent_size;
   3993		}
   3994		memcpy(iter->temp, iter->ent, iter->ent_size);
   3995		iter->ent = iter->temp;
   3996	}
   3997	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
   3998	/* Put back the original ent_size */
   3999	iter->ent_size = ent_size;
   4000
   4001	return entry;
   4002}
   4003
   4004/* Find the next real entry, and increment the iterator to the next entry */
   4005void *trace_find_next_entry_inc(struct trace_iterator *iter)
   4006{
   4007	iter->ent = __find_next_entry(iter, &iter->cpu,
   4008				      &iter->lost_events, &iter->ts);
   4009
   4010	if (iter->ent)
   4011		trace_iterator_increment(iter);
   4012
   4013	return iter->ent ? iter : NULL;
   4014}
   4015
   4016static void trace_consume(struct trace_iterator *iter)
   4017{
   4018	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
   4019			    &iter->lost_events);
   4020}
   4021
   4022static void *s_next(struct seq_file *m, void *v, loff_t *pos)
   4023{
   4024	struct trace_iterator *iter = m->private;
   4025	int i = (int)*pos;
   4026	void *ent;
   4027
   4028	WARN_ON_ONCE(iter->leftover);
   4029
   4030	(*pos)++;
   4031
   4032	/* can't go backwards */
   4033	if (iter->idx > i)
   4034		return NULL;
   4035
   4036	if (iter->idx < 0)
   4037		ent = trace_find_next_entry_inc(iter);
   4038	else
   4039		ent = iter;
   4040
   4041	while (ent && iter->idx < i)
   4042		ent = trace_find_next_entry_inc(iter);
   4043
   4044	iter->pos = *pos;
   4045
   4046	return ent;
   4047}
   4048
   4049void tracing_iter_reset(struct trace_iterator *iter, int cpu)
   4050{
   4051	struct ring_buffer_iter *buf_iter;
   4052	unsigned long entries = 0;
   4053	u64 ts;
   4054
   4055	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
   4056
   4057	buf_iter = trace_buffer_iter(iter, cpu);
   4058	if (!buf_iter)
   4059		return;
   4060
   4061	ring_buffer_iter_reset(buf_iter);
   4062
   4063	/*
   4064	 * We could have the case with the max latency tracers
   4065	 * that a reset never took place on a cpu. This is evident
   4066	 * by the timestamp being before the start of the buffer.
   4067	 */
   4068	while (ring_buffer_iter_peek(buf_iter, &ts)) {
   4069		if (ts >= iter->array_buffer->time_start)
   4070			break;
   4071		entries++;
   4072		ring_buffer_iter_advance(buf_iter);
   4073	}
   4074
   4075	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
   4076}
   4077
   4078/*
   4079 * The current tracer is copied to avoid a global locking
   4080 * all around.
   4081 */
   4082static void *s_start(struct seq_file *m, loff_t *pos)
   4083{
   4084	struct trace_iterator *iter = m->private;
   4085	struct trace_array *tr = iter->tr;
   4086	int cpu_file = iter->cpu_file;
   4087	void *p = NULL;
   4088	loff_t l = 0;
   4089	int cpu;
   4090
   4091	/*
   4092	 * copy the tracer to avoid using a global lock all around.
   4093	 * iter->trace is a copy of current_trace, the pointer to the
   4094	 * name may be used instead of a strcmp(), as iter->trace->name
   4095	 * will point to the same string as current_trace->name.
   4096	 */
   4097	mutex_lock(&trace_types_lock);
   4098	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
   4099		*iter->trace = *tr->current_trace;
   4100	mutex_unlock(&trace_types_lock);
   4101
   4102#ifdef CONFIG_TRACER_MAX_TRACE
   4103	if (iter->snapshot && iter->trace->use_max_tr)
   4104		return ERR_PTR(-EBUSY);
   4105#endif
   4106
   4107	if (*pos != iter->pos) {
   4108		iter->ent = NULL;
   4109		iter->cpu = 0;
   4110		iter->idx = -1;
   4111
   4112		if (cpu_file == RING_BUFFER_ALL_CPUS) {
   4113			for_each_tracing_cpu(cpu)
   4114				tracing_iter_reset(iter, cpu);
   4115		} else
   4116			tracing_iter_reset(iter, cpu_file);
   4117
   4118		iter->leftover = 0;
   4119		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
   4120			;
   4121
   4122	} else {
   4123		/*
   4124		 * If we overflowed the seq_file before, then we want
   4125		 * to just reuse the trace_seq buffer again.
   4126		 */
   4127		if (iter->leftover)
   4128			p = iter;
   4129		else {
   4130			l = *pos - 1;
   4131			p = s_next(m, p, &l);
   4132		}
   4133	}
   4134
   4135	trace_event_read_lock();
   4136	trace_access_lock(cpu_file);
   4137	return p;
   4138}
   4139
   4140static void s_stop(struct seq_file *m, void *p)
   4141{
   4142	struct trace_iterator *iter = m->private;
   4143
   4144#ifdef CONFIG_TRACER_MAX_TRACE
   4145	if (iter->snapshot && iter->trace->use_max_tr)
   4146		return;
   4147#endif
   4148
   4149	trace_access_unlock(iter->cpu_file);
   4150	trace_event_read_unlock();
   4151}
   4152
   4153static void
   4154get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
   4155		      unsigned long *entries, int cpu)
   4156{
   4157	unsigned long count;
   4158
   4159	count = ring_buffer_entries_cpu(buf->buffer, cpu);
   4160	/*
   4161	 * If this buffer has skipped entries, then we hold all
   4162	 * entries for the trace and we need to ignore the
   4163	 * ones before the time stamp.
   4164	 */
   4165	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
   4166		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
   4167		/* total is the same as the entries */
   4168		*total = count;
   4169	} else
   4170		*total = count +
   4171			ring_buffer_overrun_cpu(buf->buffer, cpu);
   4172	*entries = count;
   4173}
   4174
   4175static void
   4176get_total_entries(struct array_buffer *buf,
   4177		  unsigned long *total, unsigned long *entries)
   4178{
   4179	unsigned long t, e;
   4180	int cpu;
   4181
   4182	*total = 0;
   4183	*entries = 0;
   4184
   4185	for_each_tracing_cpu(cpu) {
   4186		get_total_entries_cpu(buf, &t, &e, cpu);
   4187		*total += t;
   4188		*entries += e;
   4189	}
   4190}
   4191
   4192unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
   4193{
   4194	unsigned long total, entries;
   4195
   4196	if (!tr)
   4197		tr = &global_trace;
   4198
   4199	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
   4200
   4201	return entries;
   4202}
   4203
   4204unsigned long trace_total_entries(struct trace_array *tr)
   4205{
   4206	unsigned long total, entries;
   4207
   4208	if (!tr)
   4209		tr = &global_trace;
   4210
   4211	get_total_entries(&tr->array_buffer, &total, &entries);
   4212
   4213	return entries;
   4214}
   4215
   4216static void print_lat_help_header(struct seq_file *m)
   4217{
   4218	seq_puts(m, "#                    _------=> CPU#            \n"
   4219		    "#                   / _-----=> irqs-off/BH-disabled\n"
   4220		    "#                  | / _----=> need-resched    \n"
   4221		    "#                  || / _---=> hardirq/softirq \n"
   4222		    "#                  ||| / _--=> preempt-depth   \n"
   4223		    "#                  |||| / _-=> migrate-disable \n"
   4224		    "#                  ||||| /     delay           \n"
   4225		    "#  cmd     pid     |||||| time  |   caller     \n"
   4226		    "#     \\   /        ||||||  \\    |    /       \n");
   4227}
   4228
   4229static void print_event_info(struct array_buffer *buf, struct seq_file *m)
   4230{
   4231	unsigned long total;
   4232	unsigned long entries;
   4233
   4234	get_total_entries(buf, &total, &entries);
   4235	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
   4236		   entries, total, num_online_cpus());
   4237	seq_puts(m, "#\n");
   4238}
   4239
   4240static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
   4241				   unsigned int flags)
   4242{
   4243	bool tgid = flags & TRACE_ITER_RECORD_TGID;
   4244
   4245	print_event_info(buf, m);
   4246
   4247	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
   4248	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
   4249}
   4250
   4251static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
   4252				       unsigned int flags)
   4253{
   4254	bool tgid = flags & TRACE_ITER_RECORD_TGID;
   4255	static const char space[] = "            ";
   4256	int prec = tgid ? 12 : 2;
   4257
   4258	print_event_info(buf, m);
   4259
   4260	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
   4261	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
   4262	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
   4263	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
   4264	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
   4265	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
   4266	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
   4267	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
   4268}
   4269
   4270void
   4271print_trace_header(struct seq_file *m, struct trace_iterator *iter)
   4272{
   4273	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
   4274	struct array_buffer *buf = iter->array_buffer;
   4275	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
   4276	struct tracer *type = iter->trace;
   4277	unsigned long entries;
   4278	unsigned long total;
   4279	const char *name = type->name;
   4280
   4281	get_total_entries(buf, &total, &entries);
   4282
   4283	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
   4284		   name, UTS_RELEASE);
   4285	seq_puts(m, "# -----------------------------------"
   4286		 "---------------------------------\n");
   4287	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
   4288		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
   4289		   nsecs_to_usecs(data->saved_latency),
   4290		   entries,
   4291		   total,
   4292		   buf->cpu,
   4293		   preempt_model_none()      ? "server" :
   4294		   preempt_model_voluntary() ? "desktop" :
   4295		   preempt_model_full()      ? "preempt" :
   4296		   preempt_model_rt()        ? "preempt_rt" :
   4297		   "unknown",
   4298		   /* These are reserved for later use */
   4299		   0, 0, 0, 0);
   4300#ifdef CONFIG_SMP
   4301	seq_printf(m, " #P:%d)\n", num_online_cpus());
   4302#else
   4303	seq_puts(m, ")\n");
   4304#endif
   4305	seq_puts(m, "#    -----------------\n");
   4306	seq_printf(m, "#    | task: %.16s-%d "
   4307		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
   4308		   data->comm, data->pid,
   4309		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
   4310		   data->policy, data->rt_priority);
   4311	seq_puts(m, "#    -----------------\n");
   4312
   4313	if (data->critical_start) {
   4314		seq_puts(m, "#  => started at: ");
   4315		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
   4316		trace_print_seq(m, &iter->seq);
   4317		seq_puts(m, "\n#  => ended at:   ");
   4318		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
   4319		trace_print_seq(m, &iter->seq);
   4320		seq_puts(m, "\n#\n");
   4321	}
   4322
   4323	seq_puts(m, "#\n");
   4324}
   4325
   4326static void test_cpu_buff_start(struct trace_iterator *iter)
   4327{
   4328	struct trace_seq *s = &iter->seq;
   4329	struct trace_array *tr = iter->tr;
   4330
   4331	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
   4332		return;
   4333
   4334	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
   4335		return;
   4336
   4337	if (cpumask_available(iter->started) &&
   4338	    cpumask_test_cpu(iter->cpu, iter->started))
   4339		return;
   4340
   4341	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
   4342		return;
   4343
   4344	if (cpumask_available(iter->started))
   4345		cpumask_set_cpu(iter->cpu, iter->started);
   4346
   4347	/* Don't print started cpu buffer for the first entry of the trace */
   4348	if (iter->idx > 1)
   4349		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
   4350				iter->cpu);
   4351}
   4352
   4353static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
   4354{
   4355	struct trace_array *tr = iter->tr;
   4356	struct trace_seq *s = &iter->seq;
   4357	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
   4358	struct trace_entry *entry;
   4359	struct trace_event *event;
   4360
   4361	entry = iter->ent;
   4362
   4363	test_cpu_buff_start(iter);
   4364
   4365	event = ftrace_find_event(entry->type);
   4366
   4367	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
   4368		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
   4369			trace_print_lat_context(iter);
   4370		else
   4371			trace_print_context(iter);
   4372	}
   4373
   4374	if (trace_seq_has_overflowed(s))
   4375		return TRACE_TYPE_PARTIAL_LINE;
   4376
   4377	if (event)
   4378		return event->funcs->trace(iter, sym_flags, event);
   4379
   4380	trace_seq_printf(s, "Unknown type %d\n", entry->type);
   4381
   4382	return trace_handle_return(s);
   4383}
   4384
   4385static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
   4386{
   4387	struct trace_array *tr = iter->tr;
   4388	struct trace_seq *s = &iter->seq;
   4389	struct trace_entry *entry;
   4390	struct trace_event *event;
   4391
   4392	entry = iter->ent;
   4393
   4394	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
   4395		trace_seq_printf(s, "%d %d %llu ",
   4396				 entry->pid, iter->cpu, iter->ts);
   4397
   4398	if (trace_seq_has_overflowed(s))
   4399		return TRACE_TYPE_PARTIAL_LINE;
   4400
   4401	event = ftrace_find_event(entry->type);
   4402	if (event)
   4403		return event->funcs->raw(iter, 0, event);
   4404
   4405	trace_seq_printf(s, "%d ?\n", entry->type);
   4406
   4407	return trace_handle_return(s);
   4408}
   4409
   4410static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
   4411{
   4412	struct trace_array *tr = iter->tr;
   4413	struct trace_seq *s = &iter->seq;
   4414	unsigned char newline = '\n';
   4415	struct trace_entry *entry;
   4416	struct trace_event *event;
   4417
   4418	entry = iter->ent;
   4419
   4420	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
   4421		SEQ_PUT_HEX_FIELD(s, entry->pid);
   4422		SEQ_PUT_HEX_FIELD(s, iter->cpu);
   4423		SEQ_PUT_HEX_FIELD(s, iter->ts);
   4424		if (trace_seq_has_overflowed(s))
   4425			return TRACE_TYPE_PARTIAL_LINE;
   4426	}
   4427
   4428	event = ftrace_find_event(entry->type);
   4429	if (event) {
   4430		enum print_line_t ret = event->funcs->hex(iter, 0, event);
   4431		if (ret != TRACE_TYPE_HANDLED)
   4432			return ret;
   4433	}
   4434
   4435	SEQ_PUT_FIELD(s, newline);
   4436
   4437	return trace_handle_return(s);
   4438}
   4439
   4440static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
   4441{
   4442	struct trace_array *tr = iter->tr;
   4443	struct trace_seq *s = &iter->seq;
   4444	struct trace_entry *entry;
   4445	struct trace_event *event;
   4446
   4447	entry = iter->ent;
   4448
   4449	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
   4450		SEQ_PUT_FIELD(s, entry->pid);
   4451		SEQ_PUT_FIELD(s, iter->cpu);
   4452		SEQ_PUT_FIELD(s, iter->ts);
   4453		if (trace_seq_has_overflowed(s))
   4454			return TRACE_TYPE_PARTIAL_LINE;
   4455	}
   4456
   4457	event = ftrace_find_event(entry->type);
   4458	return event ? event->funcs->binary(iter, 0, event) :
   4459		TRACE_TYPE_HANDLED;
   4460}
   4461
   4462int trace_empty(struct trace_iterator *iter)
   4463{
   4464	struct ring_buffer_iter *buf_iter;
   4465	int cpu;
   4466
   4467	/* If we are looking at one CPU buffer, only check that one */
   4468	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
   4469		cpu = iter->cpu_file;
   4470		buf_iter = trace_buffer_iter(iter, cpu);
   4471		if (buf_iter) {
   4472			if (!ring_buffer_iter_empty(buf_iter))
   4473				return 0;
   4474		} else {
   4475			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
   4476				return 0;
   4477		}
   4478		return 1;
   4479	}
   4480
   4481	for_each_tracing_cpu(cpu) {
   4482		buf_iter = trace_buffer_iter(iter, cpu);
   4483		if (buf_iter) {
   4484			if (!ring_buffer_iter_empty(buf_iter))
   4485				return 0;
   4486		} else {
   4487			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
   4488				return 0;
   4489		}
   4490	}
   4491
   4492	return 1;
   4493}
   4494
   4495/*  Called with trace_event_read_lock() held. */
   4496enum print_line_t print_trace_line(struct trace_iterator *iter)
   4497{
   4498	struct trace_array *tr = iter->tr;
   4499	unsigned long trace_flags = tr->trace_flags;
   4500	enum print_line_t ret;
   4501
   4502	if (iter->lost_events) {
   4503		if (iter->lost_events == (unsigned long)-1)
   4504			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
   4505					 iter->cpu);
   4506		else
   4507			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
   4508					 iter->cpu, iter->lost_events);
   4509		if (trace_seq_has_overflowed(&iter->seq))
   4510			return TRACE_TYPE_PARTIAL_LINE;
   4511	}
   4512
   4513	if (iter->trace && iter->trace->print_line) {
   4514		ret = iter->trace->print_line(iter);
   4515		if (ret != TRACE_TYPE_UNHANDLED)
   4516			return ret;
   4517	}
   4518
   4519	if (iter->ent->type == TRACE_BPUTS &&
   4520			trace_flags & TRACE_ITER_PRINTK &&
   4521			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
   4522		return trace_print_bputs_msg_only(iter);
   4523
   4524	if (iter->ent->type == TRACE_BPRINT &&
   4525			trace_flags & TRACE_ITER_PRINTK &&
   4526			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
   4527		return trace_print_bprintk_msg_only(iter);
   4528
   4529	if (iter->ent->type == TRACE_PRINT &&
   4530			trace_flags & TRACE_ITER_PRINTK &&
   4531			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
   4532		return trace_print_printk_msg_only(iter);
   4533
   4534	if (trace_flags & TRACE_ITER_BIN)
   4535		return print_bin_fmt(iter);
   4536
   4537	if (trace_flags & TRACE_ITER_HEX)
   4538		return print_hex_fmt(iter);
   4539
   4540	if (trace_flags & TRACE_ITER_RAW)
   4541		return print_raw_fmt(iter);
   4542
   4543	return print_trace_fmt(iter);
   4544}
   4545
   4546void trace_latency_header(struct seq_file *m)
   4547{
   4548	struct trace_iterator *iter = m->private;
   4549	struct trace_array *tr = iter->tr;
   4550
   4551	/* print nothing if the buffers are empty */
   4552	if (trace_empty(iter))
   4553		return;
   4554
   4555	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
   4556		print_trace_header(m, iter);
   4557
   4558	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
   4559		print_lat_help_header(m);
   4560}
   4561
   4562void trace_default_header(struct seq_file *m)
   4563{
   4564	struct trace_iterator *iter = m->private;
   4565	struct trace_array *tr = iter->tr;
   4566	unsigned long trace_flags = tr->trace_flags;
   4567
   4568	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
   4569		return;
   4570
   4571	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
   4572		/* print nothing if the buffers are empty */
   4573		if (trace_empty(iter))
   4574			return;
   4575		print_trace_header(m, iter);
   4576		if (!(trace_flags & TRACE_ITER_VERBOSE))
   4577			print_lat_help_header(m);
   4578	} else {
   4579		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
   4580			if (trace_flags & TRACE_ITER_IRQ_INFO)
   4581				print_func_help_header_irq(iter->array_buffer,
   4582							   m, trace_flags);
   4583			else
   4584				print_func_help_header(iter->array_buffer, m,
   4585						       trace_flags);
   4586		}
   4587	}
   4588}
   4589
   4590static void test_ftrace_alive(struct seq_file *m)
   4591{
   4592	if (!ftrace_is_dead())
   4593		return;
   4594	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
   4595		    "#          MAY BE MISSING FUNCTION EVENTS\n");
   4596}
   4597
   4598#ifdef CONFIG_TRACER_MAX_TRACE
   4599static void show_snapshot_main_help(struct seq_file *m)
   4600{
   4601	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
   4602		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
   4603		    "#                      Takes a snapshot of the main buffer.\n"
   4604		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
   4605		    "#                      (Doesn't have to be '2' works with any number that\n"
   4606		    "#                       is not a '0' or '1')\n");
   4607}
   4608
   4609static void show_snapshot_percpu_help(struct seq_file *m)
   4610{
   4611	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
   4612#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
   4613	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
   4614		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
   4615#else
   4616	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
   4617		    "#                     Must use main snapshot file to allocate.\n");
   4618#endif
   4619	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
   4620		    "#                      (Doesn't have to be '2' works with any number that\n"
   4621		    "#                       is not a '0' or '1')\n");
   4622}
   4623
   4624static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
   4625{
   4626	if (iter->tr->allocated_snapshot)
   4627		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
   4628	else
   4629		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
   4630
   4631	seq_puts(m, "# Snapshot commands:\n");
   4632	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
   4633		show_snapshot_main_help(m);
   4634	else
   4635		show_snapshot_percpu_help(m);
   4636}
   4637#else
   4638/* Should never be called */
   4639static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
   4640#endif
   4641
   4642static int s_show(struct seq_file *m, void *v)
   4643{
   4644	struct trace_iterator *iter = v;
   4645	int ret;
   4646
   4647	if (iter->ent == NULL) {
   4648		if (iter->tr) {
   4649			seq_printf(m, "# tracer: %s\n", iter->trace->name);
   4650			seq_puts(m, "#\n");
   4651			test_ftrace_alive(m);
   4652		}
   4653		if (iter->snapshot && trace_empty(iter))
   4654			print_snapshot_help(m, iter);
   4655		else if (iter->trace && iter->trace->print_header)
   4656			iter->trace->print_header(m);
   4657		else
   4658			trace_default_header(m);
   4659
   4660	} else if (iter->leftover) {
   4661		/*
   4662		 * If we filled the seq_file buffer earlier, we
   4663		 * want to just show it now.
   4664		 */
   4665		ret = trace_print_seq(m, &iter->seq);
   4666
   4667		/* ret should this time be zero, but you never know */
   4668		iter->leftover = ret;
   4669
   4670	} else {
   4671		print_trace_line(iter);
   4672		ret = trace_print_seq(m, &iter->seq);
   4673		/*
   4674		 * If we overflow the seq_file buffer, then it will
   4675		 * ask us for this data again at start up.
   4676		 * Use that instead.
   4677		 *  ret is 0 if seq_file write succeeded.
   4678		 *        -1 otherwise.
   4679		 */
   4680		iter->leftover = ret;
   4681	}
   4682
   4683	return 0;
   4684}
   4685
   4686/*
   4687 * Should be used after trace_array_get(), trace_types_lock
   4688 * ensures that i_cdev was already initialized.
   4689 */
   4690static inline int tracing_get_cpu(struct inode *inode)
   4691{
   4692	if (inode->i_cdev) /* See trace_create_cpu_file() */
   4693		return (long)inode->i_cdev - 1;
   4694	return RING_BUFFER_ALL_CPUS;
   4695}
   4696
   4697static const struct seq_operations tracer_seq_ops = {
   4698	.start		= s_start,
   4699	.next		= s_next,
   4700	.stop		= s_stop,
   4701	.show		= s_show,
   4702};
   4703
   4704static struct trace_iterator *
   4705__tracing_open(struct inode *inode, struct file *file, bool snapshot)
   4706{
   4707	struct trace_array *tr = inode->i_private;
   4708	struct trace_iterator *iter;
   4709	int cpu;
   4710
   4711	if (tracing_disabled)
   4712		return ERR_PTR(-ENODEV);
   4713
   4714	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
   4715	if (!iter)
   4716		return ERR_PTR(-ENOMEM);
   4717
   4718	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
   4719				    GFP_KERNEL);
   4720	if (!iter->buffer_iter)
   4721		goto release;
   4722
   4723	/*
   4724	 * trace_find_next_entry() may need to save off iter->ent.
   4725	 * It will place it into the iter->temp buffer. As most
   4726	 * events are less than 128, allocate a buffer of that size.
   4727	 * If one is greater, then trace_find_next_entry() will
   4728	 * allocate a new buffer to adjust for the bigger iter->ent.
   4729	 * It's not critical if it fails to get allocated here.
   4730	 */
   4731	iter->temp = kmalloc(128, GFP_KERNEL);
   4732	if (iter->temp)
   4733		iter->temp_size = 128;
   4734
   4735	/*
   4736	 * trace_event_printf() may need to modify given format
   4737	 * string to replace %p with %px so that it shows real address
   4738	 * instead of hash value. However, that is only for the event
   4739	 * tracing, other tracer may not need. Defer the allocation
   4740	 * until it is needed.
   4741	 */
   4742	iter->fmt = NULL;
   4743	iter->fmt_size = 0;
   4744
   4745	/*
   4746	 * We make a copy of the current tracer to avoid concurrent
   4747	 * changes on it while we are reading.
   4748	 */
   4749	mutex_lock(&trace_types_lock);
   4750	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
   4751	if (!iter->trace)
   4752		goto fail;
   4753
   4754	*iter->trace = *tr->current_trace;
   4755
   4756	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
   4757		goto fail;
   4758
   4759	iter->tr = tr;
   4760
   4761#ifdef CONFIG_TRACER_MAX_TRACE
   4762	/* Currently only the top directory has a snapshot */
   4763	if (tr->current_trace->print_max || snapshot)
   4764		iter->array_buffer = &tr->max_buffer;
   4765	else
   4766#endif
   4767		iter->array_buffer = &tr->array_buffer;
   4768	iter->snapshot = snapshot;
   4769	iter->pos = -1;
   4770	iter->cpu_file = tracing_get_cpu(inode);
   4771	mutex_init(&iter->mutex);
   4772
   4773	/* Notify the tracer early; before we stop tracing. */
   4774	if (iter->trace->open)
   4775		iter->trace->open(iter);
   4776
   4777	/* Annotate start of buffers if we had overruns */
   4778	if (ring_buffer_overruns(iter->array_buffer->buffer))
   4779		iter->iter_flags |= TRACE_FILE_ANNOTATE;
   4780
   4781	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
   4782	if (trace_clocks[tr->clock_id].in_ns)
   4783		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
   4784
   4785	/*
   4786	 * If pause-on-trace is enabled, then stop the trace while
   4787	 * dumping, unless this is the "snapshot" file
   4788	 */
   4789	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
   4790		tracing_stop_tr(tr);
   4791
   4792	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
   4793		for_each_tracing_cpu(cpu) {
   4794			iter->buffer_iter[cpu] =
   4795				ring_buffer_read_prepare(iter->array_buffer->buffer,
   4796							 cpu, GFP_KERNEL);
   4797		}
   4798		ring_buffer_read_prepare_sync();
   4799		for_each_tracing_cpu(cpu) {
   4800			ring_buffer_read_start(iter->buffer_iter[cpu]);
   4801			tracing_iter_reset(iter, cpu);
   4802		}
   4803	} else {
   4804		cpu = iter->cpu_file;
   4805		iter->buffer_iter[cpu] =
   4806			ring_buffer_read_prepare(iter->array_buffer->buffer,
   4807						 cpu, GFP_KERNEL);
   4808		ring_buffer_read_prepare_sync();
   4809		ring_buffer_read_start(iter->buffer_iter[cpu]);
   4810		tracing_iter_reset(iter, cpu);
   4811	}
   4812
   4813	mutex_unlock(&trace_types_lock);
   4814
   4815	return iter;
   4816
   4817 fail:
   4818	mutex_unlock(&trace_types_lock);
   4819	kfree(iter->trace);
   4820	kfree(iter->temp);
   4821	kfree(iter->buffer_iter);
   4822release:
   4823	seq_release_private(inode, file);
   4824	return ERR_PTR(-ENOMEM);
   4825}
   4826
   4827int tracing_open_generic(struct inode *inode, struct file *filp)
   4828{
   4829	int ret;
   4830
   4831	ret = tracing_check_open_get_tr(NULL);
   4832	if (ret)
   4833		return ret;
   4834
   4835	filp->private_data = inode->i_private;
   4836	return 0;
   4837}
   4838
   4839bool tracing_is_disabled(void)
   4840{
   4841	return (tracing_disabled) ? true: false;
   4842}
   4843
   4844/*
   4845 * Open and update trace_array ref count.
   4846 * Must have the current trace_array passed to it.
   4847 */
   4848int tracing_open_generic_tr(struct inode *inode, struct file *filp)
   4849{
   4850	struct trace_array *tr = inode->i_private;
   4851	int ret;
   4852
   4853	ret = tracing_check_open_get_tr(tr);
   4854	if (ret)
   4855		return ret;
   4856
   4857	filp->private_data = inode->i_private;
   4858
   4859	return 0;
   4860}
   4861
   4862static int tracing_mark_open(struct inode *inode, struct file *filp)
   4863{
   4864	stream_open(inode, filp);
   4865	return tracing_open_generic_tr(inode, filp);
   4866}
   4867
   4868static int tracing_release(struct inode *inode, struct file *file)
   4869{
   4870	struct trace_array *tr = inode->i_private;
   4871	struct seq_file *m = file->private_data;
   4872	struct trace_iterator *iter;
   4873	int cpu;
   4874
   4875	if (!(file->f_mode & FMODE_READ)) {
   4876		trace_array_put(tr);
   4877		return 0;
   4878	}
   4879
   4880	/* Writes do not use seq_file */
   4881	iter = m->private;
   4882	mutex_lock(&trace_types_lock);
   4883
   4884	for_each_tracing_cpu(cpu) {
   4885		if (iter->buffer_iter[cpu])
   4886			ring_buffer_read_finish(iter->buffer_iter[cpu]);
   4887	}
   4888
   4889	if (iter->trace && iter->trace->close)
   4890		iter->trace->close(iter);
   4891
   4892	if (!iter->snapshot && tr->stop_count)
   4893		/* reenable tracing if it was previously enabled */
   4894		tracing_start_tr(tr);
   4895
   4896	__trace_array_put(tr);
   4897
   4898	mutex_unlock(&trace_types_lock);
   4899
   4900	mutex_destroy(&iter->mutex);
   4901	free_cpumask_var(iter->started);
   4902	kfree(iter->fmt);
   4903	kfree(iter->temp);
   4904	kfree(iter->trace);
   4905	kfree(iter->buffer_iter);
   4906	seq_release_private(inode, file);
   4907
   4908	return 0;
   4909}
   4910
   4911static int tracing_release_generic_tr(struct inode *inode, struct file *file)
   4912{
   4913	struct trace_array *tr = inode->i_private;
   4914
   4915	trace_array_put(tr);
   4916	return 0;
   4917}
   4918
   4919static int tracing_single_release_tr(struct inode *inode, struct file *file)
   4920{
   4921	struct trace_array *tr = inode->i_private;
   4922
   4923	trace_array_put(tr);
   4924
   4925	return single_release(inode, file);
   4926}
   4927
   4928static int tracing_open(struct inode *inode, struct file *file)
   4929{
   4930	struct trace_array *tr = inode->i_private;
   4931	struct trace_iterator *iter;
   4932	int ret;
   4933
   4934	ret = tracing_check_open_get_tr(tr);
   4935	if (ret)
   4936		return ret;
   4937
   4938	/* If this file was open for write, then erase contents */
   4939	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
   4940		int cpu = tracing_get_cpu(inode);
   4941		struct array_buffer *trace_buf = &tr->array_buffer;
   4942
   4943#ifdef CONFIG_TRACER_MAX_TRACE
   4944		if (tr->current_trace->print_max)
   4945			trace_buf = &tr->max_buffer;
   4946#endif
   4947
   4948		if (cpu == RING_BUFFER_ALL_CPUS)
   4949			tracing_reset_online_cpus(trace_buf);
   4950		else
   4951			tracing_reset_cpu(trace_buf, cpu);
   4952	}
   4953
   4954	if (file->f_mode & FMODE_READ) {
   4955		iter = __tracing_open(inode, file, false);
   4956		if (IS_ERR(iter))
   4957			ret = PTR_ERR(iter);
   4958		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
   4959			iter->iter_flags |= TRACE_FILE_LAT_FMT;
   4960	}
   4961
   4962	if (ret < 0)
   4963		trace_array_put(tr);
   4964
   4965	return ret;
   4966}
   4967
   4968/*
   4969 * Some tracers are not suitable for instance buffers.
   4970 * A tracer is always available for the global array (toplevel)
   4971 * or if it explicitly states that it is.
   4972 */
   4973static bool
   4974trace_ok_for_array(struct tracer *t, struct trace_array *tr)
   4975{
   4976	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
   4977}
   4978
   4979/* Find the next tracer that this trace array may use */
   4980static struct tracer *
   4981get_tracer_for_array(struct trace_array *tr, struct tracer *t)
   4982{
   4983	while (t && !trace_ok_for_array(t, tr))
   4984		t = t->next;
   4985
   4986	return t;
   4987}
   4988
   4989static void *
   4990t_next(struct seq_file *m, void *v, loff_t *pos)
   4991{
   4992	struct trace_array *tr = m->private;
   4993	struct tracer *t = v;
   4994
   4995	(*pos)++;
   4996
   4997	if (t)
   4998		t = get_tracer_for_array(tr, t->next);
   4999
   5000	return t;
   5001}
   5002
   5003static void *t_start(struct seq_file *m, loff_t *pos)
   5004{
   5005	struct trace_array *tr = m->private;
   5006	struct tracer *t;
   5007	loff_t l = 0;
   5008
   5009	mutex_lock(&trace_types_lock);
   5010
   5011	t = get_tracer_for_array(tr, trace_types);
   5012	for (; t && l < *pos; t = t_next(m, t, &l))
   5013			;
   5014
   5015	return t;
   5016}
   5017
   5018static void t_stop(struct seq_file *m, void *p)
   5019{
   5020	mutex_unlock(&trace_types_lock);
   5021}
   5022
   5023static int t_show(struct seq_file *m, void *v)
   5024{
   5025	struct tracer *t = v;
   5026
   5027	if (!t)
   5028		return 0;
   5029
   5030	seq_puts(m, t->name);
   5031	if (t->next)
   5032		seq_putc(m, ' ');
   5033	else
   5034		seq_putc(m, '\n');
   5035
   5036	return 0;
   5037}
   5038
   5039static const struct seq_operations show_traces_seq_ops = {
   5040	.start		= t_start,
   5041	.next		= t_next,
   5042	.stop		= t_stop,
   5043	.show		= t_show,
   5044};
   5045
   5046static int show_traces_open(struct inode *inode, struct file *file)
   5047{
   5048	struct trace_array *tr = inode->i_private;
   5049	struct seq_file *m;
   5050	int ret;
   5051
   5052	ret = tracing_check_open_get_tr(tr);
   5053	if (ret)
   5054		return ret;
   5055
   5056	ret = seq_open(file, &show_traces_seq_ops);
   5057	if (ret) {
   5058		trace_array_put(tr);
   5059		return ret;
   5060	}
   5061
   5062	m = file->private_data;
   5063	m->private = tr;
   5064
   5065	return 0;
   5066}
   5067
   5068static int show_traces_release(struct inode *inode, struct file *file)
   5069{
   5070	struct trace_array *tr = inode->i_private;
   5071
   5072	trace_array_put(tr);
   5073	return seq_release(inode, file);
   5074}
   5075
   5076static ssize_t
   5077tracing_write_stub(struct file *filp, const char __user *ubuf,
   5078		   size_t count, loff_t *ppos)
   5079{
   5080	return count;
   5081}
   5082
   5083loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
   5084{
   5085	int ret;
   5086
   5087	if (file->f_mode & FMODE_READ)
   5088		ret = seq_lseek(file, offset, whence);
   5089	else
   5090		file->f_pos = ret = 0;
   5091
   5092	return ret;
   5093}
   5094
   5095static const struct file_operations tracing_fops = {
   5096	.open		= tracing_open,
   5097	.read		= seq_read,
   5098	.write		= tracing_write_stub,
   5099	.llseek		= tracing_lseek,
   5100	.release	= tracing_release,
   5101};
   5102
   5103static const struct file_operations show_traces_fops = {
   5104	.open		= show_traces_open,
   5105	.read		= seq_read,
   5106	.llseek		= seq_lseek,
   5107	.release	= show_traces_release,
   5108};
   5109
   5110static ssize_t
   5111tracing_cpumask_read(struct file *filp, char __user *ubuf,
   5112		     size_t count, loff_t *ppos)
   5113{
   5114	struct trace_array *tr = file_inode(filp)->i_private;
   5115	char *mask_str;
   5116	int len;
   5117
   5118	len = snprintf(NULL, 0, "%*pb\n",
   5119		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
   5120	mask_str = kmalloc(len, GFP_KERNEL);
   5121	if (!mask_str)
   5122		return -ENOMEM;
   5123
   5124	len = snprintf(mask_str, len, "%*pb\n",
   5125		       cpumask_pr_args(tr->tracing_cpumask));
   5126	if (len >= count) {
   5127		count = -EINVAL;
   5128		goto out_err;
   5129	}
   5130	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
   5131
   5132out_err:
   5133	kfree(mask_str);
   5134
   5135	return count;
   5136}
   5137
   5138int tracing_set_cpumask(struct trace_array *tr,
   5139			cpumask_var_t tracing_cpumask_new)
   5140{
   5141	int cpu;
   5142
   5143	if (!tr)
   5144		return -EINVAL;
   5145
   5146	local_irq_disable();
   5147	arch_spin_lock(&tr->max_lock);
   5148	for_each_tracing_cpu(cpu) {
   5149		/*
   5150		 * Increase/decrease the disabled counter if we are
   5151		 * about to flip a bit in the cpumask:
   5152		 */
   5153		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
   5154				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
   5155			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
   5156			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
   5157		}
   5158		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
   5159				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
   5160			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
   5161			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
   5162		}
   5163	}
   5164	arch_spin_unlock(&tr->max_lock);
   5165	local_irq_enable();
   5166
   5167	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
   5168
   5169	return 0;
   5170}
   5171
   5172static ssize_t
   5173tracing_cpumask_write(struct file *filp, const char __user *ubuf,
   5174		      size_t count, loff_t *ppos)
   5175{
   5176	struct trace_array *tr = file_inode(filp)->i_private;
   5177	cpumask_var_t tracing_cpumask_new;
   5178	int err;
   5179
   5180	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
   5181		return -ENOMEM;
   5182
   5183	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
   5184	if (err)
   5185		goto err_free;
   5186
   5187	err = tracing_set_cpumask(tr, tracing_cpumask_new);
   5188	if (err)
   5189		goto err_free;
   5190
   5191	free_cpumask_var(tracing_cpumask_new);
   5192
   5193	return count;
   5194
   5195err_free:
   5196	free_cpumask_var(tracing_cpumask_new);
   5197
   5198	return err;
   5199}
   5200
   5201static const struct file_operations tracing_cpumask_fops = {
   5202	.open		= tracing_open_generic_tr,
   5203	.read		= tracing_cpumask_read,
   5204	.write		= tracing_cpumask_write,
   5205	.release	= tracing_release_generic_tr,
   5206	.llseek		= generic_file_llseek,
   5207};
   5208
   5209static int tracing_trace_options_show(struct seq_file *m, void *v)
   5210{
   5211	struct tracer_opt *trace_opts;
   5212	struct trace_array *tr = m->private;
   5213	u32 tracer_flags;
   5214	int i;
   5215
   5216	mutex_lock(&trace_types_lock);
   5217	tracer_flags = tr->current_trace->flags->val;
   5218	trace_opts = tr->current_trace->flags->opts;
   5219
   5220	for (i = 0; trace_options[i]; i++) {
   5221		if (tr->trace_flags & (1 << i))
   5222			seq_printf(m, "%s\n", trace_options[i]);
   5223		else
   5224			seq_printf(m, "no%s\n", trace_options[i]);
   5225	}
   5226
   5227	for (i = 0; trace_opts[i].name; i++) {
   5228		if (tracer_flags & trace_opts[i].bit)
   5229			seq_printf(m, "%s\n", trace_opts[i].name);
   5230		else
   5231			seq_printf(m, "no%s\n", trace_opts[i].name);
   5232	}
   5233	mutex_unlock(&trace_types_lock);
   5234
   5235	return 0;
   5236}
   5237
   5238static int __set_tracer_option(struct trace_array *tr,
   5239			       struct tracer_flags *tracer_flags,
   5240			       struct tracer_opt *opts, int neg)
   5241{
   5242	struct tracer *trace = tracer_flags->trace;
   5243	int ret;
   5244
   5245	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
   5246	if (ret)
   5247		return ret;
   5248
   5249	if (neg)
   5250		tracer_flags->val &= ~opts->bit;
   5251	else
   5252		tracer_flags->val |= opts->bit;
   5253	return 0;
   5254}
   5255
   5256/* Try to assign a tracer specific option */
   5257static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
   5258{
   5259	struct tracer *trace = tr->current_trace;
   5260	struct tracer_flags *tracer_flags = trace->flags;
   5261	struct tracer_opt *opts = NULL;
   5262	int i;
   5263
   5264	for (i = 0; tracer_flags->opts[i].name; i++) {
   5265		opts = &tracer_flags->opts[i];
   5266
   5267		if (strcmp(cmp, opts->name) == 0)
   5268			return __set_tracer_option(tr, trace->flags, opts, neg);
   5269	}
   5270
   5271	return -EINVAL;
   5272}
   5273
   5274/* Some tracers require overwrite to stay enabled */
   5275int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
   5276{
   5277	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
   5278		return -1;
   5279
   5280	return 0;
   5281}
   5282
   5283int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
   5284{
   5285	int *map;
   5286
   5287	if ((mask == TRACE_ITER_RECORD_TGID) ||
   5288	    (mask == TRACE_ITER_RECORD_CMD))
   5289		lockdep_assert_held(&event_mutex);
   5290
   5291	/* do nothing if flag is already set */
   5292	if (!!(tr->trace_flags & mask) == !!enabled)
   5293		return 0;
   5294
   5295	/* Give the tracer a chance to approve the change */
   5296	if (tr->current_trace->flag_changed)
   5297		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
   5298			return -EINVAL;
   5299
   5300	if (enabled)
   5301		tr->trace_flags |= mask;
   5302	else
   5303		tr->trace_flags &= ~mask;
   5304
   5305	if (mask == TRACE_ITER_RECORD_CMD)
   5306		trace_event_enable_cmd_record(enabled);
   5307
   5308	if (mask == TRACE_ITER_RECORD_TGID) {
   5309		if (!tgid_map) {
   5310			tgid_map_max = pid_max;
   5311			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
   5312				       GFP_KERNEL);
   5313
   5314			/*
   5315			 * Pairs with smp_load_acquire() in
   5316			 * trace_find_tgid_ptr() to ensure that if it observes
   5317			 * the tgid_map we just allocated then it also observes
   5318			 * the corresponding tgid_map_max value.
   5319			 */
   5320			smp_store_release(&tgid_map, map);
   5321		}
   5322		if (!tgid_map) {
   5323			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
   5324			return -ENOMEM;
   5325		}
   5326
   5327		trace_event_enable_tgid_record(enabled);
   5328	}
   5329
   5330	if (mask == TRACE_ITER_EVENT_FORK)
   5331		trace_event_follow_fork(tr, enabled);
   5332
   5333	if (mask == TRACE_ITER_FUNC_FORK)
   5334		ftrace_pid_follow_fork(tr, enabled);
   5335
   5336	if (mask == TRACE_ITER_OVERWRITE) {
   5337		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
   5338#ifdef CONFIG_TRACER_MAX_TRACE
   5339		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
   5340#endif
   5341	}
   5342
   5343	if (mask == TRACE_ITER_PRINTK) {
   5344		trace_printk_start_stop_comm(enabled);
   5345		trace_printk_control(enabled);
   5346	}
   5347
   5348	return 0;
   5349}
   5350
   5351int trace_set_options(struct trace_array *tr, char *option)
   5352{
   5353	char *cmp;
   5354	int neg = 0;
   5355	int ret;
   5356	size_t orig_len = strlen(option);
   5357	int len;
   5358
   5359	cmp = strstrip(option);
   5360
   5361	len = str_has_prefix(cmp, "no");
   5362	if (len)
   5363		neg = 1;
   5364
   5365	cmp += len;
   5366
   5367	mutex_lock(&event_mutex);
   5368	mutex_lock(&trace_types_lock);
   5369
   5370	ret = match_string(trace_options, -1, cmp);
   5371	/* If no option could be set, test the specific tracer options */
   5372	if (ret < 0)
   5373		ret = set_tracer_option(tr, cmp, neg);
   5374	else
   5375		ret = set_tracer_flag(tr, 1 << ret, !neg);
   5376
   5377	mutex_unlock(&trace_types_lock);
   5378	mutex_unlock(&event_mutex);
   5379
   5380	/*
   5381	 * If the first trailing whitespace is replaced with '\0' by strstrip,
   5382	 * turn it back into a space.
   5383	 */
   5384	if (orig_len > strlen(option))
   5385		option[strlen(option)] = ' ';
   5386
   5387	return ret;
   5388}
   5389
   5390static void __init apply_trace_boot_options(void)
   5391{
   5392	char *buf = trace_boot_options_buf;
   5393	char *option;
   5394
   5395	while (true) {
   5396		option = strsep(&buf, ",");
   5397
   5398		if (!option)
   5399			break;
   5400
   5401		if (*option)
   5402			trace_set_options(&global_trace, option);
   5403
   5404		/* Put back the comma to allow this to be called again */
   5405		if (buf)
   5406			*(buf - 1) = ',';
   5407	}
   5408}
   5409
   5410static ssize_t
   5411tracing_trace_options_write(struct file *filp, const char __user *ubuf,
   5412			size_t cnt, loff_t *ppos)
   5413{
   5414	struct seq_file *m = filp->private_data;
   5415	struct trace_array *tr = m->private;
   5416	char buf[64];
   5417	int ret;
   5418
   5419	if (cnt >= sizeof(buf))
   5420		return -EINVAL;
   5421
   5422	if (copy_from_user(buf, ubuf, cnt))
   5423		return -EFAULT;
   5424
   5425	buf[cnt] = 0;
   5426
   5427	ret = trace_set_options(tr, buf);
   5428	if (ret < 0)
   5429		return ret;
   5430
   5431	*ppos += cnt;
   5432
   5433	return cnt;
   5434}
   5435
   5436static int tracing_trace_options_open(struct inode *inode, struct file *file)
   5437{
   5438	struct trace_array *tr = inode->i_private;
   5439	int ret;
   5440
   5441	ret = tracing_check_open_get_tr(tr);
   5442	if (ret)
   5443		return ret;
   5444
   5445	ret = single_open(file, tracing_trace_options_show, inode->i_private);
   5446	if (ret < 0)
   5447		trace_array_put(tr);
   5448
   5449	return ret;
   5450}
   5451
   5452static const struct file_operations tracing_iter_fops = {
   5453	.open		= tracing_trace_options_open,
   5454	.read		= seq_read,
   5455	.llseek		= seq_lseek,
   5456	.release	= tracing_single_release_tr,
   5457	.write		= tracing_trace_options_write,
   5458};
   5459
   5460static const char readme_msg[] =
   5461	"tracing mini-HOWTO:\n\n"
   5462	"# echo 0 > tracing_on : quick way to disable tracing\n"
   5463	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
   5464	" Important files:\n"
   5465	"  trace\t\t\t- The static contents of the buffer\n"
   5466	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
   5467	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
   5468	"  current_tracer\t- function and latency tracers\n"
   5469	"  available_tracers\t- list of configured tracers for current_tracer\n"
   5470	"  error_log\t- error log for failed commands (that support it)\n"
   5471	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
   5472	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
   5473	"  trace_clock\t\t- change the clock used to order events\n"
   5474	"       local:   Per cpu clock but may not be synced across CPUs\n"
   5475	"      global:   Synced across CPUs but slows tracing down.\n"
   5476	"     counter:   Not a clock, but just an increment\n"
   5477	"      uptime:   Jiffy counter from time of boot\n"
   5478	"        perf:   Same clock that perf events use\n"
   5479#ifdef CONFIG_X86_64
   5480	"     x86-tsc:   TSC cycle counter\n"
   5481#endif
   5482	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
   5483	"       delta:   Delta difference against a buffer-wide timestamp\n"
   5484	"    absolute:   Absolute (standalone) timestamp\n"
   5485	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
   5486	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
   5487	"  tracing_cpumask\t- Limit which CPUs to trace\n"
   5488	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
   5489	"\t\t\t  Remove sub-buffer with rmdir\n"
   5490	"  trace_options\t\t- Set format or modify how tracing happens\n"
   5491	"\t\t\t  Disable an option by prefixing 'no' to the\n"
   5492	"\t\t\t  option name\n"
   5493	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
   5494#ifdef CONFIG_DYNAMIC_FTRACE
   5495	"\n  available_filter_functions - list of functions that can be filtered on\n"
   5496	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
   5497	"\t\t\t  functions\n"
   5498	"\t     accepts: func_full_name or glob-matching-pattern\n"
   5499	"\t     modules: Can select a group via module\n"
   5500	"\t      Format: :mod:<module-name>\n"
   5501	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
   5502	"\t    triggers: a command to perform when function is hit\n"
   5503	"\t      Format: <function>:<trigger>[:count]\n"
   5504	"\t     trigger: traceon, traceoff\n"
   5505	"\t\t      enable_event:<system>:<event>\n"
   5506	"\t\t      disable_event:<system>:<event>\n"
   5507#ifdef CONFIG_STACKTRACE
   5508	"\t\t      stacktrace\n"
   5509#endif
   5510#ifdef CONFIG_TRACER_SNAPSHOT
   5511	"\t\t      snapshot\n"
   5512#endif
   5513	"\t\t      dump\n"
   5514	"\t\t      cpudump\n"
   5515	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
   5516	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
   5517	"\t     The first one will disable tracing every time do_fault is hit\n"
   5518	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
   5519	"\t       The first time do trap is hit and it disables tracing, the\n"
   5520	"\t       counter will decrement to 2. If tracing is already disabled,\n"
   5521	"\t       the counter will not decrement. It only decrements when the\n"
   5522	"\t       trigger did work\n"
   5523	"\t     To remove trigger without count:\n"
   5524	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
   5525	"\t     To remove trigger with a count:\n"
   5526	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
   5527	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
   5528	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
   5529	"\t    modules: Can select a group via module command :mod:\n"
   5530	"\t    Does not accept triggers\n"
   5531#endif /* CONFIG_DYNAMIC_FTRACE */
   5532#ifdef CONFIG_FUNCTION_TRACER
   5533	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
   5534	"\t\t    (function)\n"
   5535	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
   5536	"\t\t    (function)\n"
   5537#endif
   5538#ifdef CONFIG_FUNCTION_GRAPH_TRACER
   5539	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
   5540	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
   5541	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
   5542#endif
   5543#ifdef CONFIG_TRACER_SNAPSHOT
   5544	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
   5545	"\t\t\t  snapshot buffer. Read the contents for more\n"
   5546	"\t\t\t  information\n"
   5547#endif
   5548#ifdef CONFIG_STACK_TRACER
   5549	"  stack_trace\t\t- Shows the max stack trace when active\n"
   5550	"  stack_max_size\t- Shows current max stack size that was traced\n"
   5551	"\t\t\t  Write into this file to reset the max size (trigger a\n"
   5552	"\t\t\t  new trace)\n"
   5553#ifdef CONFIG_DYNAMIC_FTRACE
   5554	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
   5555	"\t\t\t  traces\n"
   5556#endif
   5557#endif /* CONFIG_STACK_TRACER */
   5558#ifdef CONFIG_DYNAMIC_EVENTS
   5559	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
   5560	"\t\t\t  Write into this file to define/undefine new trace events.\n"
   5561#endif
   5562#ifdef CONFIG_KPROBE_EVENTS
   5563	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
   5564	"\t\t\t  Write into this file to define/undefine new trace events.\n"
   5565#endif
   5566#ifdef CONFIG_UPROBE_EVENTS
   5567	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
   5568	"\t\t\t  Write into this file to define/undefine new trace events.\n"
   5569#endif
   5570#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
   5571	"\t  accepts: event-definitions (one definition per line)\n"
   5572	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
   5573	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
   5574#ifdef CONFIG_HIST_TRIGGERS
   5575	"\t           s:[synthetic/]<event> <field> [<field>]\n"
   5576#endif
   5577	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
   5578	"\t           -:[<group>/]<event>\n"
   5579#ifdef CONFIG_KPROBE_EVENTS
   5580	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
   5581  "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
   5582#endif
   5583#ifdef CONFIG_UPROBE_EVENTS
   5584  "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
   5585#endif
   5586	"\t     args: <name>=fetcharg[:type]\n"
   5587	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
   5588#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
   5589	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
   5590#else
   5591	"\t           $stack<index>, $stack, $retval, $comm,\n"
   5592#endif
   5593	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
   5594	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
   5595	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
   5596	"\t           <type>\\[<array-size>\\]\n"
   5597#ifdef CONFIG_HIST_TRIGGERS
   5598	"\t    field: <stype> <name>;\n"
   5599	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
   5600	"\t           [unsigned] char/int/long\n"
   5601#endif
   5602	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
   5603	"\t            of the <attached-group>/<attached-event>.\n"
   5604#endif
   5605	"  events/\t\t- Directory containing all trace event subsystems:\n"
   5606	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
   5607	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
   5608	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
   5609	"\t\t\t  events\n"
   5610	"      filter\t\t- If set, only events passing filter are traced\n"
   5611	"  events/<system>/<event>/\t- Directory containing control files for\n"
   5612	"\t\t\t  <event>:\n"
   5613	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
   5614	"      filter\t\t- If set, only events passing filter are traced\n"
   5615	"      trigger\t\t- If set, a command to perform when event is hit\n"
   5616	"\t    Format: <trigger>[:count][if <filter>]\n"
   5617	"\t   trigger: traceon, traceoff\n"
   5618	"\t            enable_event:<system>:<event>\n"
   5619	"\t            disable_event:<system>:<event>\n"
   5620#ifdef CONFIG_HIST_TRIGGERS
   5621	"\t            enable_hist:<system>:<event>\n"
   5622	"\t            disable_hist:<system>:<event>\n"
   5623#endif
   5624#ifdef CONFIG_STACKTRACE
   5625	"\t\t    stacktrace\n"
   5626#endif
   5627#ifdef CONFIG_TRACER_SNAPSHOT
   5628	"\t\t    snapshot\n"
   5629#endif
   5630#ifdef CONFIG_HIST_TRIGGERS
   5631	"\t\t    hist (see below)\n"
   5632#endif
   5633	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
   5634	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
   5635	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
   5636	"\t                  events/block/block_unplug/trigger\n"
   5637	"\t   The first disables tracing every time block_unplug is hit.\n"
   5638	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
   5639	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
   5640	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
   5641	"\t   Like function triggers, the counter is only decremented if it\n"
   5642	"\t    enabled or disabled tracing.\n"
   5643	"\t   To remove a trigger without a count:\n"
   5644	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
   5645	"\t   To remove a trigger with a count:\n"
   5646	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
   5647	"\t   Filters can be ignored when removing a trigger.\n"
   5648#ifdef CONFIG_HIST_TRIGGERS
   5649	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
   5650	"\t    Format: hist:keys=<field1[,field2,...]>\n"
   5651	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
   5652	"\t            [:values=<field1[,field2,...]>]\n"
   5653	"\t            [:sort=<field1[,field2,...]>]\n"
   5654	"\t            [:size=#entries]\n"
   5655	"\t            [:pause][:continue][:clear]\n"
   5656	"\t            [:name=histname1]\n"
   5657	"\t            [:<handler>.<action>]\n"
   5658	"\t            [if <filter>]\n\n"
   5659	"\t    Note, special fields can be used as well:\n"
   5660	"\t            common_timestamp - to record current timestamp\n"
   5661	"\t            common_cpu - to record the CPU the event happened on\n"
   5662	"\n"
   5663	"\t    A hist trigger variable can be:\n"
   5664	"\t        - a reference to a field e.g. x=current_timestamp,\n"
   5665	"\t        - a reference to another variable e.g. y=$x,\n"
   5666	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
   5667	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
   5668	"\n"
   5669	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
   5670	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
   5671	"\t    variable reference, field or numeric literal.\n"
   5672	"\n"
   5673	"\t    When a matching event is hit, an entry is added to a hash\n"
   5674	"\t    table using the key(s) and value(s) named, and the value of a\n"
   5675	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
   5676	"\t    correspond to fields in the event's format description.  Keys\n"
   5677	"\t    can be any field, or the special string 'stacktrace'.\n"
   5678	"\t    Compound keys consisting of up to two fields can be specified\n"
   5679	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
   5680	"\t    fields.  Sort keys consisting of up to two fields can be\n"
   5681	"\t    specified using the 'sort' keyword.  The sort direction can\n"
   5682	"\t    be modified by appending '.descending' or '.ascending' to a\n"
   5683	"\t    sort field.  The 'size' parameter can be used to specify more\n"
   5684	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
   5685	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
   5686	"\t    its histogram data will be shared with other triggers of the\n"
   5687	"\t    same name, and trigger hits will update this common data.\n\n"
   5688	"\t    Reading the 'hist' file for the event will dump the hash\n"
   5689	"\t    table in its entirety to stdout.  If there are multiple hist\n"
   5690	"\t    triggers attached to an event, there will be a table for each\n"
   5691	"\t    trigger in the output.  The table displayed for a named\n"
   5692	"\t    trigger will be the same as any other instance having the\n"
   5693	"\t    same name.  The default format used to display a given field\n"
   5694	"\t    can be modified by appending any of the following modifiers\n"
   5695	"\t    to the field name, as applicable:\n\n"
   5696	"\t            .hex        display a number as a hex value\n"
   5697	"\t            .sym        display an address as a symbol\n"
   5698	"\t            .sym-offset display an address as a symbol and offset\n"
   5699	"\t            .execname   display a common_pid as a program name\n"
   5700	"\t            .syscall    display a syscall id as a syscall name\n"
   5701	"\t            .log2       display log2 value rather than raw number\n"
   5702	"\t            .buckets=size  display values in groups of size rather than raw number\n"
   5703	"\t            .usecs      display a common_timestamp in microseconds\n\n"
   5704	"\t    The 'pause' parameter can be used to pause an existing hist\n"
   5705	"\t    trigger or to start a hist trigger but not log any events\n"
   5706	"\t    until told to do so.  'continue' can be used to start or\n"
   5707	"\t    restart a paused hist trigger.\n\n"
   5708	"\t    The 'clear' parameter will clear the contents of a running\n"
   5709	"\t    hist trigger and leave its current paused/active state\n"
   5710	"\t    unchanged.\n\n"
   5711	"\t    The enable_hist and disable_hist triggers can be used to\n"
   5712	"\t    have one event conditionally start and stop another event's\n"
   5713	"\t    already-attached hist trigger.  The syntax is analogous to\n"
   5714	"\t    the enable_event and disable_event triggers.\n\n"
   5715	"\t    Hist trigger handlers and actions are executed whenever a\n"
   5716	"\t    a histogram entry is added or updated.  They take the form:\n\n"
   5717	"\t        <handler>.<action>\n\n"
   5718	"\t    The available handlers are:\n\n"
   5719	"\t        onmatch(matching.event)  - invoke on addition or update\n"
   5720	"\t        onmax(var)               - invoke if var exceeds current max\n"
   5721	"\t        onchange(var)            - invoke action if var changes\n\n"
   5722	"\t    The available actions are:\n\n"
   5723	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
   5724	"\t        save(field,...)                      - save current event fields\n"
   5725#ifdef CONFIG_TRACER_SNAPSHOT
   5726	"\t        snapshot()                           - snapshot the trace buffer\n\n"
   5727#endif
   5728#ifdef CONFIG_SYNTH_EVENTS
   5729	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
   5730	"\t  Write into this file to define/undefine new synthetic events.\n"
   5731	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
   5732#endif
   5733#endif
   5734;
   5735
   5736static ssize_t
   5737tracing_readme_read(struct file *filp, char __user *ubuf,
   5738		       size_t cnt, loff_t *ppos)
   5739{
   5740	return simple_read_from_buffer(ubuf, cnt, ppos,
   5741					readme_msg, strlen(readme_msg));
   5742}
   5743
   5744static const struct file_operations tracing_readme_fops = {
   5745	.open		= tracing_open_generic,
   5746	.read		= tracing_readme_read,
   5747	.llseek		= generic_file_llseek,
   5748};
   5749
   5750static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
   5751{
   5752	int pid = ++(*pos);
   5753
   5754	return trace_find_tgid_ptr(pid);
   5755}
   5756
   5757static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
   5758{
   5759	int pid = *pos;
   5760
   5761	return trace_find_tgid_ptr(pid);
   5762}
   5763
   5764static void saved_tgids_stop(struct seq_file *m, void *v)
   5765{
   5766}
   5767
   5768static int saved_tgids_show(struct seq_file *m, void *v)
   5769{
   5770	int *entry = (int *)v;
   5771	int pid = entry - tgid_map;
   5772	int tgid = *entry;
   5773
   5774	if (tgid == 0)
   5775		return SEQ_SKIP;
   5776
   5777	seq_printf(m, "%d %d\n", pid, tgid);
   5778	return 0;
   5779}
   5780
   5781static const struct seq_operations tracing_saved_tgids_seq_ops = {
   5782	.start		= saved_tgids_start,
   5783	.stop		= saved_tgids_stop,
   5784	.next		= saved_tgids_next,
   5785	.show		= saved_tgids_show,
   5786};
   5787
   5788static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
   5789{
   5790	int ret;
   5791
   5792	ret = tracing_check_open_get_tr(NULL);
   5793	if (ret)
   5794		return ret;
   5795
   5796	return seq_open(filp, &tracing_saved_tgids_seq_ops);
   5797}
   5798
   5799
   5800static const struct file_operations tracing_saved_tgids_fops = {
   5801	.open		= tracing_saved_tgids_open,
   5802	.read		= seq_read,
   5803	.llseek		= seq_lseek,
   5804	.release	= seq_release,
   5805};
   5806
   5807static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
   5808{
   5809	unsigned int *ptr = v;
   5810
   5811	if (*pos || m->count)
   5812		ptr++;
   5813
   5814	(*pos)++;
   5815
   5816	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
   5817	     ptr++) {
   5818		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
   5819			continue;
   5820
   5821		return ptr;
   5822	}
   5823
   5824	return NULL;
   5825}
   5826
   5827static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
   5828{
   5829	void *v;
   5830	loff_t l = 0;
   5831
   5832	preempt_disable();
   5833	arch_spin_lock(&trace_cmdline_lock);
   5834
   5835	v = &savedcmd->map_cmdline_to_pid[0];
   5836	while (l <= *pos) {
   5837		v = saved_cmdlines_next(m, v, &l);
   5838		if (!v)
   5839			return NULL;
   5840	}
   5841
   5842	return v;
   5843}
   5844
   5845static void saved_cmdlines_stop(struct seq_file *m, void *v)
   5846{
   5847	arch_spin_unlock(&trace_cmdline_lock);
   5848	preempt_enable();
   5849}
   5850
   5851static int saved_cmdlines_show(struct seq_file *m, void *v)
   5852{
   5853	char buf[TASK_COMM_LEN];
   5854	unsigned int *pid = v;
   5855
   5856	__trace_find_cmdline(*pid, buf);
   5857	seq_printf(m, "%d %s\n", *pid, buf);
   5858	return 0;
   5859}
   5860
   5861static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
   5862	.start		= saved_cmdlines_start,
   5863	.next		= saved_cmdlines_next,
   5864	.stop		= saved_cmdlines_stop,
   5865	.show		= saved_cmdlines_show,
   5866};
   5867
   5868static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
   5869{
   5870	int ret;
   5871
   5872	ret = tracing_check_open_get_tr(NULL);
   5873	if (ret)
   5874		return ret;
   5875
   5876	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
   5877}
   5878
   5879static const struct file_operations tracing_saved_cmdlines_fops = {
   5880	.open		= tracing_saved_cmdlines_open,
   5881	.read		= seq_read,
   5882	.llseek		= seq_lseek,
   5883	.release	= seq_release,
   5884};
   5885
   5886static ssize_t
   5887tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
   5888				 size_t cnt, loff_t *ppos)
   5889{
   5890	char buf[64];
   5891	int r;
   5892
   5893	arch_spin_lock(&trace_cmdline_lock);
   5894	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
   5895	arch_spin_unlock(&trace_cmdline_lock);
   5896
   5897	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   5898}
   5899
   5900static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
   5901{
   5902	kfree(s->saved_cmdlines);
   5903	kfree(s->map_cmdline_to_pid);
   5904	kfree(s);
   5905}
   5906
   5907static int tracing_resize_saved_cmdlines(unsigned int val)
   5908{
   5909	struct saved_cmdlines_buffer *s, *savedcmd_temp;
   5910
   5911	s = kmalloc(sizeof(*s), GFP_KERNEL);
   5912	if (!s)
   5913		return -ENOMEM;
   5914
   5915	if (allocate_cmdlines_buffer(val, s) < 0) {
   5916		kfree(s);
   5917		return -ENOMEM;
   5918	}
   5919
   5920	arch_spin_lock(&trace_cmdline_lock);
   5921	savedcmd_temp = savedcmd;
   5922	savedcmd = s;
   5923	arch_spin_unlock(&trace_cmdline_lock);
   5924	free_saved_cmdlines_buffer(savedcmd_temp);
   5925
   5926	return 0;
   5927}
   5928
   5929static ssize_t
   5930tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
   5931				  size_t cnt, loff_t *ppos)
   5932{
   5933	unsigned long val;
   5934	int ret;
   5935
   5936	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   5937	if (ret)
   5938		return ret;
   5939
   5940	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
   5941	if (!val || val > PID_MAX_DEFAULT)
   5942		return -EINVAL;
   5943
   5944	ret = tracing_resize_saved_cmdlines((unsigned int)val);
   5945	if (ret < 0)
   5946		return ret;
   5947
   5948	*ppos += cnt;
   5949
   5950	return cnt;
   5951}
   5952
   5953static const struct file_operations tracing_saved_cmdlines_size_fops = {
   5954	.open		= tracing_open_generic,
   5955	.read		= tracing_saved_cmdlines_size_read,
   5956	.write		= tracing_saved_cmdlines_size_write,
   5957};
   5958
   5959#ifdef CONFIG_TRACE_EVAL_MAP_FILE
   5960static union trace_eval_map_item *
   5961update_eval_map(union trace_eval_map_item *ptr)
   5962{
   5963	if (!ptr->map.eval_string) {
   5964		if (ptr->tail.next) {
   5965			ptr = ptr->tail.next;
   5966			/* Set ptr to the next real item (skip head) */
   5967			ptr++;
   5968		} else
   5969			return NULL;
   5970	}
   5971	return ptr;
   5972}
   5973
   5974static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
   5975{
   5976	union trace_eval_map_item *ptr = v;
   5977
   5978	/*
   5979	 * Paranoid! If ptr points to end, we don't want to increment past it.
   5980	 * This really should never happen.
   5981	 */
   5982	(*pos)++;
   5983	ptr = update_eval_map(ptr);
   5984	if (WARN_ON_ONCE(!ptr))
   5985		return NULL;
   5986
   5987	ptr++;
   5988	ptr = update_eval_map(ptr);
   5989
   5990	return ptr;
   5991}
   5992
   5993static void *eval_map_start(struct seq_file *m, loff_t *pos)
   5994{
   5995	union trace_eval_map_item *v;
   5996	loff_t l = 0;
   5997
   5998	mutex_lock(&trace_eval_mutex);
   5999
   6000	v = trace_eval_maps;
   6001	if (v)
   6002		v++;
   6003
   6004	while (v && l < *pos) {
   6005		v = eval_map_next(m, v, &l);
   6006	}
   6007
   6008	return v;
   6009}
   6010
   6011static void eval_map_stop(struct seq_file *m, void *v)
   6012{
   6013	mutex_unlock(&trace_eval_mutex);
   6014}
   6015
   6016static int eval_map_show(struct seq_file *m, void *v)
   6017{
   6018	union trace_eval_map_item *ptr = v;
   6019
   6020	seq_printf(m, "%s %ld (%s)\n",
   6021		   ptr->map.eval_string, ptr->map.eval_value,
   6022		   ptr->map.system);
   6023
   6024	return 0;
   6025}
   6026
   6027static const struct seq_operations tracing_eval_map_seq_ops = {
   6028	.start		= eval_map_start,
   6029	.next		= eval_map_next,
   6030	.stop		= eval_map_stop,
   6031	.show		= eval_map_show,
   6032};
   6033
   6034static int tracing_eval_map_open(struct inode *inode, struct file *filp)
   6035{
   6036	int ret;
   6037
   6038	ret = tracing_check_open_get_tr(NULL);
   6039	if (ret)
   6040		return ret;
   6041
   6042	return seq_open(filp, &tracing_eval_map_seq_ops);
   6043}
   6044
   6045static const struct file_operations tracing_eval_map_fops = {
   6046	.open		= tracing_eval_map_open,
   6047	.read		= seq_read,
   6048	.llseek		= seq_lseek,
   6049	.release	= seq_release,
   6050};
   6051
   6052static inline union trace_eval_map_item *
   6053trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
   6054{
   6055	/* Return tail of array given the head */
   6056	return ptr + ptr->head.length + 1;
   6057}
   6058
   6059static void
   6060trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
   6061			   int len)
   6062{
   6063	struct trace_eval_map **stop;
   6064	struct trace_eval_map **map;
   6065	union trace_eval_map_item *map_array;
   6066	union trace_eval_map_item *ptr;
   6067
   6068	stop = start + len;
   6069
   6070	/*
   6071	 * The trace_eval_maps contains the map plus a head and tail item,
   6072	 * where the head holds the module and length of array, and the
   6073	 * tail holds a pointer to the next list.
   6074	 */
   6075	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
   6076	if (!map_array) {
   6077		pr_warn("Unable to allocate trace eval mapping\n");
   6078		return;
   6079	}
   6080
   6081	mutex_lock(&trace_eval_mutex);
   6082
   6083	if (!trace_eval_maps)
   6084		trace_eval_maps = map_array;
   6085	else {
   6086		ptr = trace_eval_maps;
   6087		for (;;) {
   6088			ptr = trace_eval_jmp_to_tail(ptr);
   6089			if (!ptr->tail.next)
   6090				break;
   6091			ptr = ptr->tail.next;
   6092
   6093		}
   6094		ptr->tail.next = map_array;
   6095	}
   6096	map_array->head.mod = mod;
   6097	map_array->head.length = len;
   6098	map_array++;
   6099
   6100	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
   6101		map_array->map = **map;
   6102		map_array++;
   6103	}
   6104	memset(map_array, 0, sizeof(*map_array));
   6105
   6106	mutex_unlock(&trace_eval_mutex);
   6107}
   6108
   6109static void trace_create_eval_file(struct dentry *d_tracer)
   6110{
   6111	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
   6112			  NULL, &tracing_eval_map_fops);
   6113}
   6114
   6115#else /* CONFIG_TRACE_EVAL_MAP_FILE */
   6116static inline void trace_create_eval_file(struct dentry *d_tracer) { }
   6117static inline void trace_insert_eval_map_file(struct module *mod,
   6118			      struct trace_eval_map **start, int len) { }
   6119#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
   6120
   6121static void trace_insert_eval_map(struct module *mod,
   6122				  struct trace_eval_map **start, int len)
   6123{
   6124	struct trace_eval_map **map;
   6125
   6126	if (len <= 0)
   6127		return;
   6128
   6129	map = start;
   6130
   6131	trace_event_eval_update(map, len);
   6132
   6133	trace_insert_eval_map_file(mod, start, len);
   6134}
   6135
   6136static ssize_t
   6137tracing_set_trace_read(struct file *filp, char __user *ubuf,
   6138		       size_t cnt, loff_t *ppos)
   6139{
   6140	struct trace_array *tr = filp->private_data;
   6141	char buf[MAX_TRACER_SIZE+2];
   6142	int r;
   6143
   6144	mutex_lock(&trace_types_lock);
   6145	r = sprintf(buf, "%s\n", tr->current_trace->name);
   6146	mutex_unlock(&trace_types_lock);
   6147
   6148	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   6149}
   6150
   6151int tracer_init(struct tracer *t, struct trace_array *tr)
   6152{
   6153	tracing_reset_online_cpus(&tr->array_buffer);
   6154	return t->init(tr);
   6155}
   6156
   6157static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
   6158{
   6159	int cpu;
   6160
   6161	for_each_tracing_cpu(cpu)
   6162		per_cpu_ptr(buf->data, cpu)->entries = val;
   6163}
   6164
   6165#ifdef CONFIG_TRACER_MAX_TRACE
   6166/* resize @tr's buffer to the size of @size_tr's entries */
   6167static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
   6168					struct array_buffer *size_buf, int cpu_id)
   6169{
   6170	int cpu, ret = 0;
   6171
   6172	if (cpu_id == RING_BUFFER_ALL_CPUS) {
   6173		for_each_tracing_cpu(cpu) {
   6174			ret = ring_buffer_resize(trace_buf->buffer,
   6175				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
   6176			if (ret < 0)
   6177				break;
   6178			per_cpu_ptr(trace_buf->data, cpu)->entries =
   6179				per_cpu_ptr(size_buf->data, cpu)->entries;
   6180		}
   6181	} else {
   6182		ret = ring_buffer_resize(trace_buf->buffer,
   6183				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
   6184		if (ret == 0)
   6185			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
   6186				per_cpu_ptr(size_buf->data, cpu_id)->entries;
   6187	}
   6188
   6189	return ret;
   6190}
   6191#endif /* CONFIG_TRACER_MAX_TRACE */
   6192
   6193static int __tracing_resize_ring_buffer(struct trace_array *tr,
   6194					unsigned long size, int cpu)
   6195{
   6196	int ret;
   6197
   6198	/*
   6199	 * If kernel or user changes the size of the ring buffer
   6200	 * we use the size that was given, and we can forget about
   6201	 * expanding it later.
   6202	 */
   6203	ring_buffer_expanded = true;
   6204
   6205	/* May be called before buffers are initialized */
   6206	if (!tr->array_buffer.buffer)
   6207		return 0;
   6208
   6209	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
   6210	if (ret < 0)
   6211		return ret;
   6212
   6213#ifdef CONFIG_TRACER_MAX_TRACE
   6214	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
   6215	    !tr->current_trace->use_max_tr)
   6216		goto out;
   6217
   6218	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
   6219	if (ret < 0) {
   6220		int r = resize_buffer_duplicate_size(&tr->array_buffer,
   6221						     &tr->array_buffer, cpu);
   6222		if (r < 0) {
   6223			/*
   6224			 * AARGH! We are left with different
   6225			 * size max buffer!!!!
   6226			 * The max buffer is our "snapshot" buffer.
   6227			 * When a tracer needs a snapshot (one of the
   6228			 * latency tracers), it swaps the max buffer
   6229			 * with the saved snap shot. We succeeded to
   6230			 * update the size of the main buffer, but failed to
   6231			 * update the size of the max buffer. But when we tried
   6232			 * to reset the main buffer to the original size, we
   6233			 * failed there too. This is very unlikely to
   6234			 * happen, but if it does, warn and kill all
   6235			 * tracing.
   6236			 */
   6237			WARN_ON(1);
   6238			tracing_disabled = 1;
   6239		}
   6240		return ret;
   6241	}
   6242
   6243	if (cpu == RING_BUFFER_ALL_CPUS)
   6244		set_buffer_entries(&tr->max_buffer, size);
   6245	else
   6246		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
   6247
   6248 out:
   6249#endif /* CONFIG_TRACER_MAX_TRACE */
   6250
   6251	if (cpu == RING_BUFFER_ALL_CPUS)
   6252		set_buffer_entries(&tr->array_buffer, size);
   6253	else
   6254		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
   6255
   6256	return ret;
   6257}
   6258
   6259ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
   6260				  unsigned long size, int cpu_id)
   6261{
   6262	int ret;
   6263
   6264	mutex_lock(&trace_types_lock);
   6265
   6266	if (cpu_id != RING_BUFFER_ALL_CPUS) {
   6267		/* make sure, this cpu is enabled in the mask */
   6268		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
   6269			ret = -EINVAL;
   6270			goto out;
   6271		}
   6272	}
   6273
   6274	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
   6275	if (ret < 0)
   6276		ret = -ENOMEM;
   6277
   6278out:
   6279	mutex_unlock(&trace_types_lock);
   6280
   6281	return ret;
   6282}
   6283
   6284
   6285/**
   6286 * tracing_update_buffers - used by tracing facility to expand ring buffers
   6287 *
   6288 * To save on memory when the tracing is never used on a system with it
   6289 * configured in. The ring buffers are set to a minimum size. But once
   6290 * a user starts to use the tracing facility, then they need to grow
   6291 * to their default size.
   6292 *
   6293 * This function is to be called when a tracer is about to be used.
   6294 */
   6295int tracing_update_buffers(void)
   6296{
   6297	int ret = 0;
   6298
   6299	mutex_lock(&trace_types_lock);
   6300	if (!ring_buffer_expanded)
   6301		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
   6302						RING_BUFFER_ALL_CPUS);
   6303	mutex_unlock(&trace_types_lock);
   6304
   6305	return ret;
   6306}
   6307
   6308struct trace_option_dentry;
   6309
   6310static void
   6311create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
   6312
   6313/*
   6314 * Used to clear out the tracer before deletion of an instance.
   6315 * Must have trace_types_lock held.
   6316 */
   6317static void tracing_set_nop(struct trace_array *tr)
   6318{
   6319	if (tr->current_trace == &nop_trace)
   6320		return;
   6321	
   6322	tr->current_trace->enabled--;
   6323
   6324	if (tr->current_trace->reset)
   6325		tr->current_trace->reset(tr);
   6326
   6327	tr->current_trace = &nop_trace;
   6328}
   6329
   6330static bool tracer_options_updated;
   6331
   6332static void add_tracer_options(struct trace_array *tr, struct tracer *t)
   6333{
   6334	/* Only enable if the directory has been created already. */
   6335	if (!tr->dir)
   6336		return;
   6337
   6338	/* Only create trace option files after update_tracer_options finish */
   6339	if (!tracer_options_updated)
   6340		return;
   6341
   6342	create_trace_option_files(tr, t);
   6343}
   6344
   6345int tracing_set_tracer(struct trace_array *tr, const char *buf)
   6346{
   6347	struct tracer *t;
   6348#ifdef CONFIG_TRACER_MAX_TRACE
   6349	bool had_max_tr;
   6350#endif
   6351	int ret = 0;
   6352
   6353	mutex_lock(&trace_types_lock);
   6354
   6355	if (!ring_buffer_expanded) {
   6356		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
   6357						RING_BUFFER_ALL_CPUS);
   6358		if (ret < 0)
   6359			goto out;
   6360		ret = 0;
   6361	}
   6362
   6363	for (t = trace_types; t; t = t->next) {
   6364		if (strcmp(t->name, buf) == 0)
   6365			break;
   6366	}
   6367	if (!t) {
   6368		ret = -EINVAL;
   6369		goto out;
   6370	}
   6371	if (t == tr->current_trace)
   6372		goto out;
   6373
   6374#ifdef CONFIG_TRACER_SNAPSHOT
   6375	if (t->use_max_tr) {
   6376		arch_spin_lock(&tr->max_lock);
   6377		if (tr->cond_snapshot)
   6378			ret = -EBUSY;
   6379		arch_spin_unlock(&tr->max_lock);
   6380		if (ret)
   6381			goto out;
   6382	}
   6383#endif
   6384	/* Some tracers won't work on kernel command line */
   6385	if (system_state < SYSTEM_RUNNING && t->noboot) {
   6386		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
   6387			t->name);
   6388		goto out;
   6389	}
   6390
   6391	/* Some tracers are only allowed for the top level buffer */
   6392	if (!trace_ok_for_array(t, tr)) {
   6393		ret = -EINVAL;
   6394		goto out;
   6395	}
   6396
   6397	/* If trace pipe files are being read, we can't change the tracer */
   6398	if (tr->trace_ref) {
   6399		ret = -EBUSY;
   6400		goto out;
   6401	}
   6402
   6403	trace_branch_disable();
   6404
   6405	tr->current_trace->enabled--;
   6406
   6407	if (tr->current_trace->reset)
   6408		tr->current_trace->reset(tr);
   6409
   6410	/* Current trace needs to be nop_trace before synchronize_rcu */
   6411	tr->current_trace = &nop_trace;
   6412
   6413#ifdef CONFIG_TRACER_MAX_TRACE
   6414	had_max_tr = tr->allocated_snapshot;
   6415
   6416	if (had_max_tr && !t->use_max_tr) {
   6417		/*
   6418		 * We need to make sure that the update_max_tr sees that
   6419		 * current_trace changed to nop_trace to keep it from
   6420		 * swapping the buffers after we resize it.
   6421		 * The update_max_tr is called from interrupts disabled
   6422		 * so a synchronized_sched() is sufficient.
   6423		 */
   6424		synchronize_rcu();
   6425		free_snapshot(tr);
   6426	}
   6427
   6428	if (t->use_max_tr && !had_max_tr) {
   6429		ret = tracing_alloc_snapshot_instance(tr);
   6430		if (ret < 0)
   6431			goto out;
   6432	}
   6433#endif
   6434
   6435	if (t->init) {
   6436		ret = tracer_init(t, tr);
   6437		if (ret)
   6438			goto out;
   6439	}
   6440
   6441	tr->current_trace = t;
   6442	tr->current_trace->enabled++;
   6443	trace_branch_enable(tr);
   6444 out:
   6445	mutex_unlock(&trace_types_lock);
   6446
   6447	return ret;
   6448}
   6449
   6450static ssize_t
   6451tracing_set_trace_write(struct file *filp, const char __user *ubuf,
   6452			size_t cnt, loff_t *ppos)
   6453{
   6454	struct trace_array *tr = filp->private_data;
   6455	char buf[MAX_TRACER_SIZE+1];
   6456	char *name;
   6457	size_t ret;
   6458	int err;
   6459
   6460	ret = cnt;
   6461
   6462	if (cnt > MAX_TRACER_SIZE)
   6463		cnt = MAX_TRACER_SIZE;
   6464
   6465	if (copy_from_user(buf, ubuf, cnt))
   6466		return -EFAULT;
   6467
   6468	buf[cnt] = 0;
   6469
   6470	name = strim(buf);
   6471
   6472	err = tracing_set_tracer(tr, name);
   6473	if (err)
   6474		return err;
   6475
   6476	*ppos += ret;
   6477
   6478	return ret;
   6479}
   6480
   6481static ssize_t
   6482tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
   6483		   size_t cnt, loff_t *ppos)
   6484{
   6485	char buf[64];
   6486	int r;
   6487
   6488	r = snprintf(buf, sizeof(buf), "%ld\n",
   6489		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
   6490	if (r > sizeof(buf))
   6491		r = sizeof(buf);
   6492	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   6493}
   6494
   6495static ssize_t
   6496tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
   6497		    size_t cnt, loff_t *ppos)
   6498{
   6499	unsigned long val;
   6500	int ret;
   6501
   6502	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   6503	if (ret)
   6504		return ret;
   6505
   6506	*ptr = val * 1000;
   6507
   6508	return cnt;
   6509}
   6510
   6511static ssize_t
   6512tracing_thresh_read(struct file *filp, char __user *ubuf,
   6513		    size_t cnt, loff_t *ppos)
   6514{
   6515	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
   6516}
   6517
   6518static ssize_t
   6519tracing_thresh_write(struct file *filp, const char __user *ubuf,
   6520		     size_t cnt, loff_t *ppos)
   6521{
   6522	struct trace_array *tr = filp->private_data;
   6523	int ret;
   6524
   6525	mutex_lock(&trace_types_lock);
   6526	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
   6527	if (ret < 0)
   6528		goto out;
   6529
   6530	if (tr->current_trace->update_thresh) {
   6531		ret = tr->current_trace->update_thresh(tr);
   6532		if (ret < 0)
   6533			goto out;
   6534	}
   6535
   6536	ret = cnt;
   6537out:
   6538	mutex_unlock(&trace_types_lock);
   6539
   6540	return ret;
   6541}
   6542
   6543#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
   6544
   6545static ssize_t
   6546tracing_max_lat_read(struct file *filp, char __user *ubuf,
   6547		     size_t cnt, loff_t *ppos)
   6548{
   6549	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
   6550}
   6551
   6552static ssize_t
   6553tracing_max_lat_write(struct file *filp, const char __user *ubuf,
   6554		      size_t cnt, loff_t *ppos)
   6555{
   6556	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
   6557}
   6558
   6559#endif
   6560
   6561static int tracing_open_pipe(struct inode *inode, struct file *filp)
   6562{
   6563	struct trace_array *tr = inode->i_private;
   6564	struct trace_iterator *iter;
   6565	int ret;
   6566
   6567	ret = tracing_check_open_get_tr(tr);
   6568	if (ret)
   6569		return ret;
   6570
   6571	mutex_lock(&trace_types_lock);
   6572
   6573	/* create a buffer to store the information to pass to userspace */
   6574	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
   6575	if (!iter) {
   6576		ret = -ENOMEM;
   6577		__trace_array_put(tr);
   6578		goto out;
   6579	}
   6580
   6581	trace_seq_init(&iter->seq);
   6582	iter->trace = tr->current_trace;
   6583
   6584	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
   6585		ret = -ENOMEM;
   6586		goto fail;
   6587	}
   6588
   6589	/* trace pipe does not show start of buffer */
   6590	cpumask_setall(iter->started);
   6591
   6592	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
   6593		iter->iter_flags |= TRACE_FILE_LAT_FMT;
   6594
   6595	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
   6596	if (trace_clocks[tr->clock_id].in_ns)
   6597		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
   6598
   6599	iter->tr = tr;
   6600	iter->array_buffer = &tr->array_buffer;
   6601	iter->cpu_file = tracing_get_cpu(inode);
   6602	mutex_init(&iter->mutex);
   6603	filp->private_data = iter;
   6604
   6605	if (iter->trace->pipe_open)
   6606		iter->trace->pipe_open(iter);
   6607
   6608	nonseekable_open(inode, filp);
   6609
   6610	tr->trace_ref++;
   6611out:
   6612	mutex_unlock(&trace_types_lock);
   6613	return ret;
   6614
   6615fail:
   6616	kfree(iter);
   6617	__trace_array_put(tr);
   6618	mutex_unlock(&trace_types_lock);
   6619	return ret;
   6620}
   6621
   6622static int tracing_release_pipe(struct inode *inode, struct file *file)
   6623{
   6624	struct trace_iterator *iter = file->private_data;
   6625	struct trace_array *tr = inode->i_private;
   6626
   6627	mutex_lock(&trace_types_lock);
   6628
   6629	tr->trace_ref--;
   6630
   6631	if (iter->trace->pipe_close)
   6632		iter->trace->pipe_close(iter);
   6633
   6634	mutex_unlock(&trace_types_lock);
   6635
   6636	free_cpumask_var(iter->started);
   6637	mutex_destroy(&iter->mutex);
   6638	kfree(iter);
   6639
   6640	trace_array_put(tr);
   6641
   6642	return 0;
   6643}
   6644
   6645static __poll_t
   6646trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
   6647{
   6648	struct trace_array *tr = iter->tr;
   6649
   6650	/* Iterators are static, they should be filled or empty */
   6651	if (trace_buffer_iter(iter, iter->cpu_file))
   6652		return EPOLLIN | EPOLLRDNORM;
   6653
   6654	if (tr->trace_flags & TRACE_ITER_BLOCK)
   6655		/*
   6656		 * Always select as readable when in blocking mode
   6657		 */
   6658		return EPOLLIN | EPOLLRDNORM;
   6659	else
   6660		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
   6661					     filp, poll_table);
   6662}
   6663
   6664static __poll_t
   6665tracing_poll_pipe(struct file *filp, poll_table *poll_table)
   6666{
   6667	struct trace_iterator *iter = filp->private_data;
   6668
   6669	return trace_poll(iter, filp, poll_table);
   6670}
   6671
   6672/* Must be called with iter->mutex held. */
   6673static int tracing_wait_pipe(struct file *filp)
   6674{
   6675	struct trace_iterator *iter = filp->private_data;
   6676	int ret;
   6677
   6678	while (trace_empty(iter)) {
   6679
   6680		if ((filp->f_flags & O_NONBLOCK)) {
   6681			return -EAGAIN;
   6682		}
   6683
   6684		/*
   6685		 * We block until we read something and tracing is disabled.
   6686		 * We still block if tracing is disabled, but we have never
   6687		 * read anything. This allows a user to cat this file, and
   6688		 * then enable tracing. But after we have read something,
   6689		 * we give an EOF when tracing is again disabled.
   6690		 *
   6691		 * iter->pos will be 0 if we haven't read anything.
   6692		 */
   6693		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
   6694			break;
   6695
   6696		mutex_unlock(&iter->mutex);
   6697
   6698		ret = wait_on_pipe(iter, 0);
   6699
   6700		mutex_lock(&iter->mutex);
   6701
   6702		if (ret)
   6703			return ret;
   6704	}
   6705
   6706	return 1;
   6707}
   6708
   6709/*
   6710 * Consumer reader.
   6711 */
   6712static ssize_t
   6713tracing_read_pipe(struct file *filp, char __user *ubuf,
   6714		  size_t cnt, loff_t *ppos)
   6715{
   6716	struct trace_iterator *iter = filp->private_data;
   6717	ssize_t sret;
   6718
   6719	/*
   6720	 * Avoid more than one consumer on a single file descriptor
   6721	 * This is just a matter of traces coherency, the ring buffer itself
   6722	 * is protected.
   6723	 */
   6724	mutex_lock(&iter->mutex);
   6725
   6726	/* return any leftover data */
   6727	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
   6728	if (sret != -EBUSY)
   6729		goto out;
   6730
   6731	trace_seq_init(&iter->seq);
   6732
   6733	if (iter->trace->read) {
   6734		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
   6735		if (sret)
   6736			goto out;
   6737	}
   6738
   6739waitagain:
   6740	sret = tracing_wait_pipe(filp);
   6741	if (sret <= 0)
   6742		goto out;
   6743
   6744	/* stop when tracing is finished */
   6745	if (trace_empty(iter)) {
   6746		sret = 0;
   6747		goto out;
   6748	}
   6749
   6750	if (cnt >= PAGE_SIZE)
   6751		cnt = PAGE_SIZE - 1;
   6752
   6753	/* reset all but tr, trace, and overruns */
   6754	trace_iterator_reset(iter);
   6755	cpumask_clear(iter->started);
   6756	trace_seq_init(&iter->seq);
   6757
   6758	trace_event_read_lock();
   6759	trace_access_lock(iter->cpu_file);
   6760	while (trace_find_next_entry_inc(iter) != NULL) {
   6761		enum print_line_t ret;
   6762		int save_len = iter->seq.seq.len;
   6763
   6764		ret = print_trace_line(iter);
   6765		if (ret == TRACE_TYPE_PARTIAL_LINE) {
   6766			/* don't print partial lines */
   6767			iter->seq.seq.len = save_len;
   6768			break;
   6769		}
   6770		if (ret != TRACE_TYPE_NO_CONSUME)
   6771			trace_consume(iter);
   6772
   6773		if (trace_seq_used(&iter->seq) >= cnt)
   6774			break;
   6775
   6776		/*
   6777		 * Setting the full flag means we reached the trace_seq buffer
   6778		 * size and we should leave by partial output condition above.
   6779		 * One of the trace_seq_* functions is not used properly.
   6780		 */
   6781		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
   6782			  iter->ent->type);
   6783	}
   6784	trace_access_unlock(iter->cpu_file);
   6785	trace_event_read_unlock();
   6786
   6787	/* Now copy what we have to the user */
   6788	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
   6789	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
   6790		trace_seq_init(&iter->seq);
   6791
   6792	/*
   6793	 * If there was nothing to send to user, in spite of consuming trace
   6794	 * entries, go back to wait for more entries.
   6795	 */
   6796	if (sret == -EBUSY)
   6797		goto waitagain;
   6798
   6799out:
   6800	mutex_unlock(&iter->mutex);
   6801
   6802	return sret;
   6803}
   6804
   6805static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
   6806				     unsigned int idx)
   6807{
   6808	__free_page(spd->pages[idx]);
   6809}
   6810
   6811static size_t
   6812tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
   6813{
   6814	size_t count;
   6815	int save_len;
   6816	int ret;
   6817
   6818	/* Seq buffer is page-sized, exactly what we need. */
   6819	for (;;) {
   6820		save_len = iter->seq.seq.len;
   6821		ret = print_trace_line(iter);
   6822
   6823		if (trace_seq_has_overflowed(&iter->seq)) {
   6824			iter->seq.seq.len = save_len;
   6825			break;
   6826		}
   6827
   6828		/*
   6829		 * This should not be hit, because it should only
   6830		 * be set if the iter->seq overflowed. But check it
   6831		 * anyway to be safe.
   6832		 */
   6833		if (ret == TRACE_TYPE_PARTIAL_LINE) {
   6834			iter->seq.seq.len = save_len;
   6835			break;
   6836		}
   6837
   6838		count = trace_seq_used(&iter->seq) - save_len;
   6839		if (rem < count) {
   6840			rem = 0;
   6841			iter->seq.seq.len = save_len;
   6842			break;
   6843		}
   6844
   6845		if (ret != TRACE_TYPE_NO_CONSUME)
   6846			trace_consume(iter);
   6847		rem -= count;
   6848		if (!trace_find_next_entry_inc(iter))	{
   6849			rem = 0;
   6850			iter->ent = NULL;
   6851			break;
   6852		}
   6853	}
   6854
   6855	return rem;
   6856}
   6857
   6858static ssize_t tracing_splice_read_pipe(struct file *filp,
   6859					loff_t *ppos,
   6860					struct pipe_inode_info *pipe,
   6861					size_t len,
   6862					unsigned int flags)
   6863{
   6864	struct page *pages_def[PIPE_DEF_BUFFERS];
   6865	struct partial_page partial_def[PIPE_DEF_BUFFERS];
   6866	struct trace_iterator *iter = filp->private_data;
   6867	struct splice_pipe_desc spd = {
   6868		.pages		= pages_def,
   6869		.partial	= partial_def,
   6870		.nr_pages	= 0, /* This gets updated below. */
   6871		.nr_pages_max	= PIPE_DEF_BUFFERS,
   6872		.ops		= &default_pipe_buf_ops,
   6873		.spd_release	= tracing_spd_release_pipe,
   6874	};
   6875	ssize_t ret;
   6876	size_t rem;
   6877	unsigned int i;
   6878
   6879	if (splice_grow_spd(pipe, &spd))
   6880		return -ENOMEM;
   6881
   6882	mutex_lock(&iter->mutex);
   6883
   6884	if (iter->trace->splice_read) {
   6885		ret = iter->trace->splice_read(iter, filp,
   6886					       ppos, pipe, len, flags);
   6887		if (ret)
   6888			goto out_err;
   6889	}
   6890
   6891	ret = tracing_wait_pipe(filp);
   6892	if (ret <= 0)
   6893		goto out_err;
   6894
   6895	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
   6896		ret = -EFAULT;
   6897		goto out_err;
   6898	}
   6899
   6900	trace_event_read_lock();
   6901	trace_access_lock(iter->cpu_file);
   6902
   6903	/* Fill as many pages as possible. */
   6904	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
   6905		spd.pages[i] = alloc_page(GFP_KERNEL);
   6906		if (!spd.pages[i])
   6907			break;
   6908
   6909		rem = tracing_fill_pipe_page(rem, iter);
   6910
   6911		/* Copy the data into the page, so we can start over. */
   6912		ret = trace_seq_to_buffer(&iter->seq,
   6913					  page_address(spd.pages[i]),
   6914					  trace_seq_used(&iter->seq));
   6915		if (ret < 0) {
   6916			__free_page(spd.pages[i]);
   6917			break;
   6918		}
   6919		spd.partial[i].offset = 0;
   6920		spd.partial[i].len = trace_seq_used(&iter->seq);
   6921
   6922		trace_seq_init(&iter->seq);
   6923	}
   6924
   6925	trace_access_unlock(iter->cpu_file);
   6926	trace_event_read_unlock();
   6927	mutex_unlock(&iter->mutex);
   6928
   6929	spd.nr_pages = i;
   6930
   6931	if (i)
   6932		ret = splice_to_pipe(pipe, &spd);
   6933	else
   6934		ret = 0;
   6935out:
   6936	splice_shrink_spd(&spd);
   6937	return ret;
   6938
   6939out_err:
   6940	mutex_unlock(&iter->mutex);
   6941	goto out;
   6942}
   6943
   6944static ssize_t
   6945tracing_entries_read(struct file *filp, char __user *ubuf,
   6946		     size_t cnt, loff_t *ppos)
   6947{
   6948	struct inode *inode = file_inode(filp);
   6949	struct trace_array *tr = inode->i_private;
   6950	int cpu = tracing_get_cpu(inode);
   6951	char buf[64];
   6952	int r = 0;
   6953	ssize_t ret;
   6954
   6955	mutex_lock(&trace_types_lock);
   6956
   6957	if (cpu == RING_BUFFER_ALL_CPUS) {
   6958		int cpu, buf_size_same;
   6959		unsigned long size;
   6960
   6961		size = 0;
   6962		buf_size_same = 1;
   6963		/* check if all cpu sizes are same */
   6964		for_each_tracing_cpu(cpu) {
   6965			/* fill in the size from first enabled cpu */
   6966			if (size == 0)
   6967				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
   6968			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
   6969				buf_size_same = 0;
   6970				break;
   6971			}
   6972		}
   6973
   6974		if (buf_size_same) {
   6975			if (!ring_buffer_expanded)
   6976				r = sprintf(buf, "%lu (expanded: %lu)\n",
   6977					    size >> 10,
   6978					    trace_buf_size >> 10);
   6979			else
   6980				r = sprintf(buf, "%lu\n", size >> 10);
   6981		} else
   6982			r = sprintf(buf, "X\n");
   6983	} else
   6984		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
   6985
   6986	mutex_unlock(&trace_types_lock);
   6987
   6988	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   6989	return ret;
   6990}
   6991
   6992static ssize_t
   6993tracing_entries_write(struct file *filp, const char __user *ubuf,
   6994		      size_t cnt, loff_t *ppos)
   6995{
   6996	struct inode *inode = file_inode(filp);
   6997	struct trace_array *tr = inode->i_private;
   6998	unsigned long val;
   6999	int ret;
   7000
   7001	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   7002	if (ret)
   7003		return ret;
   7004
   7005	/* must have at least 1 entry */
   7006	if (!val)
   7007		return -EINVAL;
   7008
   7009	/* value is in KB */
   7010	val <<= 10;
   7011	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
   7012	if (ret < 0)
   7013		return ret;
   7014
   7015	*ppos += cnt;
   7016
   7017	return cnt;
   7018}
   7019
   7020static ssize_t
   7021tracing_total_entries_read(struct file *filp, char __user *ubuf,
   7022				size_t cnt, loff_t *ppos)
   7023{
   7024	struct trace_array *tr = filp->private_data;
   7025	char buf[64];
   7026	int r, cpu;
   7027	unsigned long size = 0, expanded_size = 0;
   7028
   7029	mutex_lock(&trace_types_lock);
   7030	for_each_tracing_cpu(cpu) {
   7031		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
   7032		if (!ring_buffer_expanded)
   7033			expanded_size += trace_buf_size >> 10;
   7034	}
   7035	if (ring_buffer_expanded)
   7036		r = sprintf(buf, "%lu\n", size);
   7037	else
   7038		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
   7039	mutex_unlock(&trace_types_lock);
   7040
   7041	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   7042}
   7043
   7044static ssize_t
   7045tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
   7046			  size_t cnt, loff_t *ppos)
   7047{
   7048	/*
   7049	 * There is no need to read what the user has written, this function
   7050	 * is just to make sure that there is no error when "echo" is used
   7051	 */
   7052
   7053	*ppos += cnt;
   7054
   7055	return cnt;
   7056}
   7057
   7058static int
   7059tracing_free_buffer_release(struct inode *inode, struct file *filp)
   7060{
   7061	struct trace_array *tr = inode->i_private;
   7062
   7063	/* disable tracing ? */
   7064	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
   7065		tracer_tracing_off(tr);
   7066	/* resize the ring buffer to 0 */
   7067	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
   7068
   7069	trace_array_put(tr);
   7070
   7071	return 0;
   7072}
   7073
   7074static ssize_t
   7075tracing_mark_write(struct file *filp, const char __user *ubuf,
   7076					size_t cnt, loff_t *fpos)
   7077{
   7078	struct trace_array *tr = filp->private_data;
   7079	struct ring_buffer_event *event;
   7080	enum event_trigger_type tt = ETT_NONE;
   7081	struct trace_buffer *buffer;
   7082	struct print_entry *entry;
   7083	ssize_t written;
   7084	int size;
   7085	int len;
   7086
   7087/* Used in tracing_mark_raw_write() as well */
   7088#define FAULTED_STR "<faulted>"
   7089#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
   7090
   7091	if (tracing_disabled)
   7092		return -EINVAL;
   7093
   7094	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
   7095		return -EINVAL;
   7096
   7097	if (cnt > TRACE_BUF_SIZE)
   7098		cnt = TRACE_BUF_SIZE;
   7099
   7100	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
   7101
   7102	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
   7103
   7104	/* If less than "<faulted>", then make sure we can still add that */
   7105	if (cnt < FAULTED_SIZE)
   7106		size += FAULTED_SIZE - cnt;
   7107
   7108	buffer = tr->array_buffer.buffer;
   7109	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
   7110					    tracing_gen_ctx());
   7111	if (unlikely(!event))
   7112		/* Ring buffer disabled, return as if not open for write */
   7113		return -EBADF;
   7114
   7115	entry = ring_buffer_event_data(event);
   7116	entry->ip = _THIS_IP_;
   7117
   7118	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
   7119	if (len) {
   7120		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
   7121		cnt = FAULTED_SIZE;
   7122		written = -EFAULT;
   7123	} else
   7124		written = cnt;
   7125
   7126	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
   7127		/* do not add \n before testing triggers, but add \0 */
   7128		entry->buf[cnt] = '\0';
   7129		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
   7130	}
   7131
   7132	if (entry->buf[cnt - 1] != '\n') {
   7133		entry->buf[cnt] = '\n';
   7134		entry->buf[cnt + 1] = '\0';
   7135	} else
   7136		entry->buf[cnt] = '\0';
   7137
   7138	if (static_branch_unlikely(&trace_marker_exports_enabled))
   7139		ftrace_exports(event, TRACE_EXPORT_MARKER);
   7140	__buffer_unlock_commit(buffer, event);
   7141
   7142	if (tt)
   7143		event_triggers_post_call(tr->trace_marker_file, tt);
   7144
   7145	return written;
   7146}
   7147
   7148/* Limit it for now to 3K (including tag) */
   7149#define RAW_DATA_MAX_SIZE (1024*3)
   7150
   7151static ssize_t
   7152tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
   7153					size_t cnt, loff_t *fpos)
   7154{
   7155	struct trace_array *tr = filp->private_data;
   7156	struct ring_buffer_event *event;
   7157	struct trace_buffer *buffer;
   7158	struct raw_data_entry *entry;
   7159	ssize_t written;
   7160	int size;
   7161	int len;
   7162
   7163#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
   7164
   7165	if (tracing_disabled)
   7166		return -EINVAL;
   7167
   7168	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
   7169		return -EINVAL;
   7170
   7171	/* The marker must at least have a tag id */
   7172	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
   7173		return -EINVAL;
   7174
   7175	if (cnt > TRACE_BUF_SIZE)
   7176		cnt = TRACE_BUF_SIZE;
   7177
   7178	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
   7179
   7180	size = sizeof(*entry) + cnt;
   7181	if (cnt < FAULT_SIZE_ID)
   7182		size += FAULT_SIZE_ID - cnt;
   7183
   7184	buffer = tr->array_buffer.buffer;
   7185	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
   7186					    tracing_gen_ctx());
   7187	if (!event)
   7188		/* Ring buffer disabled, return as if not open for write */
   7189		return -EBADF;
   7190
   7191	entry = ring_buffer_event_data(event);
   7192
   7193	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
   7194	if (len) {
   7195		entry->id = -1;
   7196		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
   7197		written = -EFAULT;
   7198	} else
   7199		written = cnt;
   7200
   7201	__buffer_unlock_commit(buffer, event);
   7202
   7203	return written;
   7204}
   7205
   7206static int tracing_clock_show(struct seq_file *m, void *v)
   7207{
   7208	struct trace_array *tr = m->private;
   7209	int i;
   7210
   7211	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
   7212		seq_printf(m,
   7213			"%s%s%s%s", i ? " " : "",
   7214			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
   7215			i == tr->clock_id ? "]" : "");
   7216	seq_putc(m, '\n');
   7217
   7218	return 0;
   7219}
   7220
   7221int tracing_set_clock(struct trace_array *tr, const char *clockstr)
   7222{
   7223	int i;
   7224
   7225	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
   7226		if (strcmp(trace_clocks[i].name, clockstr) == 0)
   7227			break;
   7228	}
   7229	if (i == ARRAY_SIZE(trace_clocks))
   7230		return -EINVAL;
   7231
   7232	mutex_lock(&trace_types_lock);
   7233
   7234	tr->clock_id = i;
   7235
   7236	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
   7237
   7238	/*
   7239	 * New clock may not be consistent with the previous clock.
   7240	 * Reset the buffer so that it doesn't have incomparable timestamps.
   7241	 */
   7242	tracing_reset_online_cpus(&tr->array_buffer);
   7243
   7244#ifdef CONFIG_TRACER_MAX_TRACE
   7245	if (tr->max_buffer.buffer)
   7246		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
   7247	tracing_reset_online_cpus(&tr->max_buffer);
   7248#endif
   7249
   7250	mutex_unlock(&trace_types_lock);
   7251
   7252	return 0;
   7253}
   7254
   7255static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
   7256				   size_t cnt, loff_t *fpos)
   7257{
   7258	struct seq_file *m = filp->private_data;
   7259	struct trace_array *tr = m->private;
   7260	char buf[64];
   7261	const char *clockstr;
   7262	int ret;
   7263
   7264	if (cnt >= sizeof(buf))
   7265		return -EINVAL;
   7266
   7267	if (copy_from_user(buf, ubuf, cnt))
   7268		return -EFAULT;
   7269
   7270	buf[cnt] = 0;
   7271
   7272	clockstr = strstrip(buf);
   7273
   7274	ret = tracing_set_clock(tr, clockstr);
   7275	if (ret)
   7276		return ret;
   7277
   7278	*fpos += cnt;
   7279
   7280	return cnt;
   7281}
   7282
   7283static int tracing_clock_open(struct inode *inode, struct file *file)
   7284{
   7285	struct trace_array *tr = inode->i_private;
   7286	int ret;
   7287
   7288	ret = tracing_check_open_get_tr(tr);
   7289	if (ret)
   7290		return ret;
   7291
   7292	ret = single_open(file, tracing_clock_show, inode->i_private);
   7293	if (ret < 0)
   7294		trace_array_put(tr);
   7295
   7296	return ret;
   7297}
   7298
   7299static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
   7300{
   7301	struct trace_array *tr = m->private;
   7302
   7303	mutex_lock(&trace_types_lock);
   7304
   7305	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
   7306		seq_puts(m, "delta [absolute]\n");
   7307	else
   7308		seq_puts(m, "[delta] absolute\n");
   7309
   7310	mutex_unlock(&trace_types_lock);
   7311
   7312	return 0;
   7313}
   7314
   7315static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
   7316{
   7317	struct trace_array *tr = inode->i_private;
   7318	int ret;
   7319
   7320	ret = tracing_check_open_get_tr(tr);
   7321	if (ret)
   7322		return ret;
   7323
   7324	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
   7325	if (ret < 0)
   7326		trace_array_put(tr);
   7327
   7328	return ret;
   7329}
   7330
   7331u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
   7332{
   7333	if (rbe == this_cpu_read(trace_buffered_event))
   7334		return ring_buffer_time_stamp(buffer);
   7335
   7336	return ring_buffer_event_time_stamp(buffer, rbe);
   7337}
   7338
   7339/*
   7340 * Set or disable using the per CPU trace_buffer_event when possible.
   7341 */
   7342int tracing_set_filter_buffering(struct trace_array *tr, bool set)
   7343{
   7344	int ret = 0;
   7345
   7346	mutex_lock(&trace_types_lock);
   7347
   7348	if (set && tr->no_filter_buffering_ref++)
   7349		goto out;
   7350
   7351	if (!set) {
   7352		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
   7353			ret = -EINVAL;
   7354			goto out;
   7355		}
   7356
   7357		--tr->no_filter_buffering_ref;
   7358	}
   7359 out:
   7360	mutex_unlock(&trace_types_lock);
   7361
   7362	return ret;
   7363}
   7364
   7365struct ftrace_buffer_info {
   7366	struct trace_iterator	iter;
   7367	void			*spare;
   7368	unsigned int		spare_cpu;
   7369	unsigned int		read;
   7370};
   7371
   7372#ifdef CONFIG_TRACER_SNAPSHOT
   7373static int tracing_snapshot_open(struct inode *inode, struct file *file)
   7374{
   7375	struct trace_array *tr = inode->i_private;
   7376	struct trace_iterator *iter;
   7377	struct seq_file *m;
   7378	int ret;
   7379
   7380	ret = tracing_check_open_get_tr(tr);
   7381	if (ret)
   7382		return ret;
   7383
   7384	if (file->f_mode & FMODE_READ) {
   7385		iter = __tracing_open(inode, file, true);
   7386		if (IS_ERR(iter))
   7387			ret = PTR_ERR(iter);
   7388	} else {
   7389		/* Writes still need the seq_file to hold the private data */
   7390		ret = -ENOMEM;
   7391		m = kzalloc(sizeof(*m), GFP_KERNEL);
   7392		if (!m)
   7393			goto out;
   7394		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
   7395		if (!iter) {
   7396			kfree(m);
   7397			goto out;
   7398		}
   7399		ret = 0;
   7400
   7401		iter->tr = tr;
   7402		iter->array_buffer = &tr->max_buffer;
   7403		iter->cpu_file = tracing_get_cpu(inode);
   7404		m->private = iter;
   7405		file->private_data = m;
   7406	}
   7407out:
   7408	if (ret < 0)
   7409		trace_array_put(tr);
   7410
   7411	return ret;
   7412}
   7413
   7414static ssize_t
   7415tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
   7416		       loff_t *ppos)
   7417{
   7418	struct seq_file *m = filp->private_data;
   7419	struct trace_iterator *iter = m->private;
   7420	struct trace_array *tr = iter->tr;
   7421	unsigned long val;
   7422	int ret;
   7423
   7424	ret = tracing_update_buffers();
   7425	if (ret < 0)
   7426		return ret;
   7427
   7428	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   7429	if (ret)
   7430		return ret;
   7431
   7432	mutex_lock(&trace_types_lock);
   7433
   7434	if (tr->current_trace->use_max_tr) {
   7435		ret = -EBUSY;
   7436		goto out;
   7437	}
   7438
   7439	arch_spin_lock(&tr->max_lock);
   7440	if (tr->cond_snapshot)
   7441		ret = -EBUSY;
   7442	arch_spin_unlock(&tr->max_lock);
   7443	if (ret)
   7444		goto out;
   7445
   7446	switch (val) {
   7447	case 0:
   7448		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
   7449			ret = -EINVAL;
   7450			break;
   7451		}
   7452		if (tr->allocated_snapshot)
   7453			free_snapshot(tr);
   7454		break;
   7455	case 1:
   7456/* Only allow per-cpu swap if the ring buffer supports it */
   7457#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
   7458		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
   7459			ret = -EINVAL;
   7460			break;
   7461		}
   7462#endif
   7463		if (tr->allocated_snapshot)
   7464			ret = resize_buffer_duplicate_size(&tr->max_buffer,
   7465					&tr->array_buffer, iter->cpu_file);
   7466		else
   7467			ret = tracing_alloc_snapshot_instance(tr);
   7468		if (ret < 0)
   7469			break;
   7470		local_irq_disable();
   7471		/* Now, we're going to swap */
   7472		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
   7473			update_max_tr(tr, current, smp_processor_id(), NULL);
   7474		else
   7475			update_max_tr_single(tr, current, iter->cpu_file);
   7476		local_irq_enable();
   7477		break;
   7478	default:
   7479		if (tr->allocated_snapshot) {
   7480			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
   7481				tracing_reset_online_cpus(&tr->max_buffer);
   7482			else
   7483				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
   7484		}
   7485		break;
   7486	}
   7487
   7488	if (ret >= 0) {
   7489		*ppos += cnt;
   7490		ret = cnt;
   7491	}
   7492out:
   7493	mutex_unlock(&trace_types_lock);
   7494	return ret;
   7495}
   7496
   7497static int tracing_snapshot_release(struct inode *inode, struct file *file)
   7498{
   7499	struct seq_file *m = file->private_data;
   7500	int ret;
   7501
   7502	ret = tracing_release(inode, file);
   7503
   7504	if (file->f_mode & FMODE_READ)
   7505		return ret;
   7506
   7507	/* If write only, the seq_file is just a stub */
   7508	if (m)
   7509		kfree(m->private);
   7510	kfree(m);
   7511
   7512	return 0;
   7513}
   7514
   7515static int tracing_buffers_open(struct inode *inode, struct file *filp);
   7516static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
   7517				    size_t count, loff_t *ppos);
   7518static int tracing_buffers_release(struct inode *inode, struct file *file);
   7519static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
   7520		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
   7521
   7522static int snapshot_raw_open(struct inode *inode, struct file *filp)
   7523{
   7524	struct ftrace_buffer_info *info;
   7525	int ret;
   7526
   7527	/* The following checks for tracefs lockdown */
   7528	ret = tracing_buffers_open(inode, filp);
   7529	if (ret < 0)
   7530		return ret;
   7531
   7532	info = filp->private_data;
   7533
   7534	if (info->iter.trace->use_max_tr) {
   7535		tracing_buffers_release(inode, filp);
   7536		return -EBUSY;
   7537	}
   7538
   7539	info->iter.snapshot = true;
   7540	info->iter.array_buffer = &info->iter.tr->max_buffer;
   7541
   7542	return ret;
   7543}
   7544
   7545#endif /* CONFIG_TRACER_SNAPSHOT */
   7546
   7547
   7548static const struct file_operations tracing_thresh_fops = {
   7549	.open		= tracing_open_generic,
   7550	.read		= tracing_thresh_read,
   7551	.write		= tracing_thresh_write,
   7552	.llseek		= generic_file_llseek,
   7553};
   7554
   7555#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
   7556static const struct file_operations tracing_max_lat_fops = {
   7557	.open		= tracing_open_generic,
   7558	.read		= tracing_max_lat_read,
   7559	.write		= tracing_max_lat_write,
   7560	.llseek		= generic_file_llseek,
   7561};
   7562#endif
   7563
   7564static const struct file_operations set_tracer_fops = {
   7565	.open		= tracing_open_generic,
   7566	.read		= tracing_set_trace_read,
   7567	.write		= tracing_set_trace_write,
   7568	.llseek		= generic_file_llseek,
   7569};
   7570
   7571static const struct file_operations tracing_pipe_fops = {
   7572	.open		= tracing_open_pipe,
   7573	.poll		= tracing_poll_pipe,
   7574	.read		= tracing_read_pipe,
   7575	.splice_read	= tracing_splice_read_pipe,
   7576	.release	= tracing_release_pipe,
   7577	.llseek		= no_llseek,
   7578};
   7579
   7580static const struct file_operations tracing_entries_fops = {
   7581	.open		= tracing_open_generic_tr,
   7582	.read		= tracing_entries_read,
   7583	.write		= tracing_entries_write,
   7584	.llseek		= generic_file_llseek,
   7585	.release	= tracing_release_generic_tr,
   7586};
   7587
   7588static const struct file_operations tracing_total_entries_fops = {
   7589	.open		= tracing_open_generic_tr,
   7590	.read		= tracing_total_entries_read,
   7591	.llseek		= generic_file_llseek,
   7592	.release	= tracing_release_generic_tr,
   7593};
   7594
   7595static const struct file_operations tracing_free_buffer_fops = {
   7596	.open		= tracing_open_generic_tr,
   7597	.write		= tracing_free_buffer_write,
   7598	.release	= tracing_free_buffer_release,
   7599};
   7600
   7601static const struct file_operations tracing_mark_fops = {
   7602	.open		= tracing_mark_open,
   7603	.write		= tracing_mark_write,
   7604	.release	= tracing_release_generic_tr,
   7605};
   7606
   7607static const struct file_operations tracing_mark_raw_fops = {
   7608	.open		= tracing_mark_open,
   7609	.write		= tracing_mark_raw_write,
   7610	.release	= tracing_release_generic_tr,
   7611};
   7612
   7613static const struct file_operations trace_clock_fops = {
   7614	.open		= tracing_clock_open,
   7615	.read		= seq_read,
   7616	.llseek		= seq_lseek,
   7617	.release	= tracing_single_release_tr,
   7618	.write		= tracing_clock_write,
   7619};
   7620
   7621static const struct file_operations trace_time_stamp_mode_fops = {
   7622	.open		= tracing_time_stamp_mode_open,
   7623	.read		= seq_read,
   7624	.llseek		= seq_lseek,
   7625	.release	= tracing_single_release_tr,
   7626};
   7627
   7628#ifdef CONFIG_TRACER_SNAPSHOT
   7629static const struct file_operations snapshot_fops = {
   7630	.open		= tracing_snapshot_open,
   7631	.read		= seq_read,
   7632	.write		= tracing_snapshot_write,
   7633	.llseek		= tracing_lseek,
   7634	.release	= tracing_snapshot_release,
   7635};
   7636
   7637static const struct file_operations snapshot_raw_fops = {
   7638	.open		= snapshot_raw_open,
   7639	.read		= tracing_buffers_read,
   7640	.release	= tracing_buffers_release,
   7641	.splice_read	= tracing_buffers_splice_read,
   7642	.llseek		= no_llseek,
   7643};
   7644
   7645#endif /* CONFIG_TRACER_SNAPSHOT */
   7646
   7647/*
   7648 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
   7649 * @filp: The active open file structure
   7650 * @ubuf: The userspace provided buffer to read value into
   7651 * @cnt: The maximum number of bytes to read
   7652 * @ppos: The current "file" position
   7653 *
   7654 * This function implements the write interface for a struct trace_min_max_param.
   7655 * The filp->private_data must point to a trace_min_max_param structure that
   7656 * defines where to write the value, the min and the max acceptable values,
   7657 * and a lock to protect the write.
   7658 */
   7659static ssize_t
   7660trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
   7661{
   7662	struct trace_min_max_param *param = filp->private_data;
   7663	u64 val;
   7664	int err;
   7665
   7666	if (!param)
   7667		return -EFAULT;
   7668
   7669	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
   7670	if (err)
   7671		return err;
   7672
   7673	if (param->lock)
   7674		mutex_lock(param->lock);
   7675
   7676	if (param->min && val < *param->min)
   7677		err = -EINVAL;
   7678
   7679	if (param->max && val > *param->max)
   7680		err = -EINVAL;
   7681
   7682	if (!err)
   7683		*param->val = val;
   7684
   7685	if (param->lock)
   7686		mutex_unlock(param->lock);
   7687
   7688	if (err)
   7689		return err;
   7690
   7691	return cnt;
   7692}
   7693
   7694/*
   7695 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
   7696 * @filp: The active open file structure
   7697 * @ubuf: The userspace provided buffer to read value into
   7698 * @cnt: The maximum number of bytes to read
   7699 * @ppos: The current "file" position
   7700 *
   7701 * This function implements the read interface for a struct trace_min_max_param.
   7702 * The filp->private_data must point to a trace_min_max_param struct with valid
   7703 * data.
   7704 */
   7705static ssize_t
   7706trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
   7707{
   7708	struct trace_min_max_param *param = filp->private_data;
   7709	char buf[U64_STR_SIZE];
   7710	int len;
   7711	u64 val;
   7712
   7713	if (!param)
   7714		return -EFAULT;
   7715
   7716	val = *param->val;
   7717
   7718	if (cnt > sizeof(buf))
   7719		cnt = sizeof(buf);
   7720
   7721	len = snprintf(buf, sizeof(buf), "%llu\n", val);
   7722
   7723	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
   7724}
   7725
   7726const struct file_operations trace_min_max_fops = {
   7727	.open		= tracing_open_generic,
   7728	.read		= trace_min_max_read,
   7729	.write		= trace_min_max_write,
   7730};
   7731
   7732#define TRACING_LOG_ERRS_MAX	8
   7733#define TRACING_LOG_LOC_MAX	128
   7734
   7735#define CMD_PREFIX "  Command: "
   7736
   7737struct err_info {
   7738	const char	**errs;	/* ptr to loc-specific array of err strings */
   7739	u8		type;	/* index into errs -> specific err string */
   7740	u16		pos;	/* caret position */
   7741	u64		ts;
   7742};
   7743
   7744struct tracing_log_err {
   7745	struct list_head	list;
   7746	struct err_info		info;
   7747	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
   7748	char			*cmd;                     /* what caused err */
   7749};
   7750
   7751static DEFINE_MUTEX(tracing_err_log_lock);
   7752
   7753static struct tracing_log_err *alloc_tracing_log_err(int len)
   7754{
   7755	struct tracing_log_err *err;
   7756
   7757	err = kzalloc(sizeof(*err), GFP_KERNEL);
   7758	if (!err)
   7759		return ERR_PTR(-ENOMEM);
   7760
   7761	err->cmd = kzalloc(len, GFP_KERNEL);
   7762	if (!err->cmd) {
   7763		kfree(err);
   7764		return ERR_PTR(-ENOMEM);
   7765	}
   7766
   7767	return err;
   7768}
   7769
   7770static void free_tracing_log_err(struct tracing_log_err *err)
   7771{
   7772	kfree(err->cmd);
   7773	kfree(err);
   7774}
   7775
   7776static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
   7777						   int len)
   7778{
   7779	struct tracing_log_err *err;
   7780
   7781	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
   7782		err = alloc_tracing_log_err(len);
   7783		if (PTR_ERR(err) != -ENOMEM)
   7784			tr->n_err_log_entries++;
   7785
   7786		return err;
   7787	}
   7788
   7789	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
   7790	kfree(err->cmd);
   7791	err->cmd = kzalloc(len, GFP_KERNEL);
   7792	if (!err->cmd)
   7793		return ERR_PTR(-ENOMEM);
   7794	list_del(&err->list);
   7795
   7796	return err;
   7797}
   7798
   7799/**
   7800 * err_pos - find the position of a string within a command for error careting
   7801 * @cmd: The tracing command that caused the error
   7802 * @str: The string to position the caret at within @cmd
   7803 *
   7804 * Finds the position of the first occurrence of @str within @cmd.  The
   7805 * return value can be passed to tracing_log_err() for caret placement
   7806 * within @cmd.
   7807 *
   7808 * Returns the index within @cmd of the first occurrence of @str or 0
   7809 * if @str was not found.
   7810 */
   7811unsigned int err_pos(char *cmd, const char *str)
   7812{
   7813	char *found;
   7814
   7815	if (WARN_ON(!strlen(cmd)))
   7816		return 0;
   7817
   7818	found = strstr(cmd, str);
   7819	if (found)
   7820		return found - cmd;
   7821
   7822	return 0;
   7823}
   7824
   7825/**
   7826 * tracing_log_err - write an error to the tracing error log
   7827 * @tr: The associated trace array for the error (NULL for top level array)
   7828 * @loc: A string describing where the error occurred
   7829 * @cmd: The tracing command that caused the error
   7830 * @errs: The array of loc-specific static error strings
   7831 * @type: The index into errs[], which produces the specific static err string
   7832 * @pos: The position the caret should be placed in the cmd
   7833 *
   7834 * Writes an error into tracing/error_log of the form:
   7835 *
   7836 * <loc>: error: <text>
   7837 *   Command: <cmd>
   7838 *              ^
   7839 *
   7840 * tracing/error_log is a small log file containing the last
   7841 * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
   7842 * unless there has been a tracing error, and the error log can be
   7843 * cleared and have its memory freed by writing the empty string in
   7844 * truncation mode to it i.e. echo > tracing/error_log.
   7845 *
   7846 * NOTE: the @errs array along with the @type param are used to
   7847 * produce a static error string - this string is not copied and saved
   7848 * when the error is logged - only a pointer to it is saved.  See
   7849 * existing callers for examples of how static strings are typically
   7850 * defined for use with tracing_log_err().
   7851 */
   7852void tracing_log_err(struct trace_array *tr,
   7853		     const char *loc, const char *cmd,
   7854		     const char **errs, u8 type, u16 pos)
   7855{
   7856	struct tracing_log_err *err;
   7857	int len = 0;
   7858
   7859	if (!tr)
   7860		tr = &global_trace;
   7861
   7862	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
   7863
   7864	mutex_lock(&tracing_err_log_lock);
   7865	err = get_tracing_log_err(tr, len);
   7866	if (PTR_ERR(err) == -ENOMEM) {
   7867		mutex_unlock(&tracing_err_log_lock);
   7868		return;
   7869	}
   7870
   7871	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
   7872	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
   7873
   7874	err->info.errs = errs;
   7875	err->info.type = type;
   7876	err->info.pos = pos;
   7877	err->info.ts = local_clock();
   7878
   7879	list_add_tail(&err->list, &tr->err_log);
   7880	mutex_unlock(&tracing_err_log_lock);
   7881}
   7882
   7883static void clear_tracing_err_log(struct trace_array *tr)
   7884{
   7885	struct tracing_log_err *err, *next;
   7886
   7887	mutex_lock(&tracing_err_log_lock);
   7888	list_for_each_entry_safe(err, next, &tr->err_log, list) {
   7889		list_del(&err->list);
   7890		free_tracing_log_err(err);
   7891	}
   7892
   7893	tr->n_err_log_entries = 0;
   7894	mutex_unlock(&tracing_err_log_lock);
   7895}
   7896
   7897static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
   7898{
   7899	struct trace_array *tr = m->private;
   7900
   7901	mutex_lock(&tracing_err_log_lock);
   7902
   7903	return seq_list_start(&tr->err_log, *pos);
   7904}
   7905
   7906static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
   7907{
   7908	struct trace_array *tr = m->private;
   7909
   7910	return seq_list_next(v, &tr->err_log, pos);
   7911}
   7912
   7913static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
   7914{
   7915	mutex_unlock(&tracing_err_log_lock);
   7916}
   7917
   7918static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
   7919{
   7920	u16 i;
   7921
   7922	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
   7923		seq_putc(m, ' ');
   7924	for (i = 0; i < pos; i++)
   7925		seq_putc(m, ' ');
   7926	seq_puts(m, "^\n");
   7927}
   7928
   7929static int tracing_err_log_seq_show(struct seq_file *m, void *v)
   7930{
   7931	struct tracing_log_err *err = v;
   7932
   7933	if (err) {
   7934		const char *err_text = err->info.errs[err->info.type];
   7935		u64 sec = err->info.ts;
   7936		u32 nsec;
   7937
   7938		nsec = do_div(sec, NSEC_PER_SEC);
   7939		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
   7940			   err->loc, err_text);
   7941		seq_printf(m, "%s", err->cmd);
   7942		tracing_err_log_show_pos(m, err->info.pos);
   7943	}
   7944
   7945	return 0;
   7946}
   7947
   7948static const struct seq_operations tracing_err_log_seq_ops = {
   7949	.start  = tracing_err_log_seq_start,
   7950	.next   = tracing_err_log_seq_next,
   7951	.stop   = tracing_err_log_seq_stop,
   7952	.show   = tracing_err_log_seq_show
   7953};
   7954
   7955static int tracing_err_log_open(struct inode *inode, struct file *file)
   7956{
   7957	struct trace_array *tr = inode->i_private;
   7958	int ret = 0;
   7959
   7960	ret = tracing_check_open_get_tr(tr);
   7961	if (ret)
   7962		return ret;
   7963
   7964	/* If this file was opened for write, then erase contents */
   7965	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
   7966		clear_tracing_err_log(tr);
   7967
   7968	if (file->f_mode & FMODE_READ) {
   7969		ret = seq_open(file, &tracing_err_log_seq_ops);
   7970		if (!ret) {
   7971			struct seq_file *m = file->private_data;
   7972			m->private = tr;
   7973		} else {
   7974			trace_array_put(tr);
   7975		}
   7976	}
   7977	return ret;
   7978}
   7979
   7980static ssize_t tracing_err_log_write(struct file *file,
   7981				     const char __user *buffer,
   7982				     size_t count, loff_t *ppos)
   7983{
   7984	return count;
   7985}
   7986
   7987static int tracing_err_log_release(struct inode *inode, struct file *file)
   7988{
   7989	struct trace_array *tr = inode->i_private;
   7990
   7991	trace_array_put(tr);
   7992
   7993	if (file->f_mode & FMODE_READ)
   7994		seq_release(inode, file);
   7995
   7996	return 0;
   7997}
   7998
   7999static const struct file_operations tracing_err_log_fops = {
   8000	.open           = tracing_err_log_open,
   8001	.write		= tracing_err_log_write,
   8002	.read           = seq_read,
   8003	.llseek         = seq_lseek,
   8004	.release        = tracing_err_log_release,
   8005};
   8006
   8007static int tracing_buffers_open(struct inode *inode, struct file *filp)
   8008{
   8009	struct trace_array *tr = inode->i_private;
   8010	struct ftrace_buffer_info *info;
   8011	int ret;
   8012
   8013	ret = tracing_check_open_get_tr(tr);
   8014	if (ret)
   8015		return ret;
   8016
   8017	info = kvzalloc(sizeof(*info), GFP_KERNEL);
   8018	if (!info) {
   8019		trace_array_put(tr);
   8020		return -ENOMEM;
   8021	}
   8022
   8023	mutex_lock(&trace_types_lock);
   8024
   8025	info->iter.tr		= tr;
   8026	info->iter.cpu_file	= tracing_get_cpu(inode);
   8027	info->iter.trace	= tr->current_trace;
   8028	info->iter.array_buffer = &tr->array_buffer;
   8029	info->spare		= NULL;
   8030	/* Force reading ring buffer for first read */
   8031	info->read		= (unsigned int)-1;
   8032
   8033	filp->private_data = info;
   8034
   8035	tr->trace_ref++;
   8036
   8037	mutex_unlock(&trace_types_lock);
   8038
   8039	ret = nonseekable_open(inode, filp);
   8040	if (ret < 0)
   8041		trace_array_put(tr);
   8042
   8043	return ret;
   8044}
   8045
   8046static __poll_t
   8047tracing_buffers_poll(struct file *filp, poll_table *poll_table)
   8048{
   8049	struct ftrace_buffer_info *info = filp->private_data;
   8050	struct trace_iterator *iter = &info->iter;
   8051
   8052	return trace_poll(iter, filp, poll_table);
   8053}
   8054
   8055static ssize_t
   8056tracing_buffers_read(struct file *filp, char __user *ubuf,
   8057		     size_t count, loff_t *ppos)
   8058{
   8059	struct ftrace_buffer_info *info = filp->private_data;
   8060	struct trace_iterator *iter = &info->iter;
   8061	ssize_t ret = 0;
   8062	ssize_t size;
   8063
   8064	if (!count)
   8065		return 0;
   8066
   8067#ifdef CONFIG_TRACER_MAX_TRACE
   8068	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
   8069		return -EBUSY;
   8070#endif
   8071
   8072	if (!info->spare) {
   8073		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
   8074							  iter->cpu_file);
   8075		if (IS_ERR(info->spare)) {
   8076			ret = PTR_ERR(info->spare);
   8077			info->spare = NULL;
   8078		} else {
   8079			info->spare_cpu = iter->cpu_file;
   8080		}
   8081	}
   8082	if (!info->spare)
   8083		return ret;
   8084
   8085	/* Do we have previous read data to read? */
   8086	if (info->read < PAGE_SIZE)
   8087		goto read;
   8088
   8089 again:
   8090	trace_access_lock(iter->cpu_file);
   8091	ret = ring_buffer_read_page(iter->array_buffer->buffer,
   8092				    &info->spare,
   8093				    count,
   8094				    iter->cpu_file, 0);
   8095	trace_access_unlock(iter->cpu_file);
   8096
   8097	if (ret < 0) {
   8098		if (trace_empty(iter)) {
   8099			if ((filp->f_flags & O_NONBLOCK))
   8100				return -EAGAIN;
   8101
   8102			ret = wait_on_pipe(iter, 0);
   8103			if (ret)
   8104				return ret;
   8105
   8106			goto again;
   8107		}
   8108		return 0;
   8109	}
   8110
   8111	info->read = 0;
   8112 read:
   8113	size = PAGE_SIZE - info->read;
   8114	if (size > count)
   8115		size = count;
   8116
   8117	ret = copy_to_user(ubuf, info->spare + info->read, size);
   8118	if (ret == size)
   8119		return -EFAULT;
   8120
   8121	size -= ret;
   8122
   8123	*ppos += size;
   8124	info->read += size;
   8125
   8126	return size;
   8127}
   8128
   8129static int tracing_buffers_release(struct inode *inode, struct file *file)
   8130{
   8131	struct ftrace_buffer_info *info = file->private_data;
   8132	struct trace_iterator *iter = &info->iter;
   8133
   8134	mutex_lock(&trace_types_lock);
   8135
   8136	iter->tr->trace_ref--;
   8137
   8138	__trace_array_put(iter->tr);
   8139
   8140	if (info->spare)
   8141		ring_buffer_free_read_page(iter->array_buffer->buffer,
   8142					   info->spare_cpu, info->spare);
   8143	kvfree(info);
   8144
   8145	mutex_unlock(&trace_types_lock);
   8146
   8147	return 0;
   8148}
   8149
   8150struct buffer_ref {
   8151	struct trace_buffer	*buffer;
   8152	void			*page;
   8153	int			cpu;
   8154	refcount_t		refcount;
   8155};
   8156
   8157static void buffer_ref_release(struct buffer_ref *ref)
   8158{
   8159	if (!refcount_dec_and_test(&ref->refcount))
   8160		return;
   8161	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
   8162	kfree(ref);
   8163}
   8164
   8165static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
   8166				    struct pipe_buffer *buf)
   8167{
   8168	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
   8169
   8170	buffer_ref_release(ref);
   8171	buf->private = 0;
   8172}
   8173
   8174static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
   8175				struct pipe_buffer *buf)
   8176{
   8177	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
   8178
   8179	if (refcount_read(&ref->refcount) > INT_MAX/2)
   8180		return false;
   8181
   8182	refcount_inc(&ref->refcount);
   8183	return true;
   8184}
   8185
   8186/* Pipe buffer operations for a buffer. */
   8187static const struct pipe_buf_operations buffer_pipe_buf_ops = {
   8188	.release		= buffer_pipe_buf_release,
   8189	.get			= buffer_pipe_buf_get,
   8190};
   8191
   8192/*
   8193 * Callback from splice_to_pipe(), if we need to release some pages
   8194 * at the end of the spd in case we error'ed out in filling the pipe.
   8195 */
   8196static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
   8197{
   8198	struct buffer_ref *ref =
   8199		(struct buffer_ref *)spd->partial[i].private;
   8200
   8201	buffer_ref_release(ref);
   8202	spd->partial[i].private = 0;
   8203}
   8204
   8205static ssize_t
   8206tracing_buffers_splice_read(struct file *file, loff_t *ppos,
   8207			    struct pipe_inode_info *pipe, size_t len,
   8208			    unsigned int flags)
   8209{
   8210	struct ftrace_buffer_info *info = file->private_data;
   8211	struct trace_iterator *iter = &info->iter;
   8212	struct partial_page partial_def[PIPE_DEF_BUFFERS];
   8213	struct page *pages_def[PIPE_DEF_BUFFERS];
   8214	struct splice_pipe_desc spd = {
   8215		.pages		= pages_def,
   8216		.partial	= partial_def,
   8217		.nr_pages_max	= PIPE_DEF_BUFFERS,
   8218		.ops		= &buffer_pipe_buf_ops,
   8219		.spd_release	= buffer_spd_release,
   8220	};
   8221	struct buffer_ref *ref;
   8222	int entries, i;
   8223	ssize_t ret = 0;
   8224
   8225#ifdef CONFIG_TRACER_MAX_TRACE
   8226	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
   8227		return -EBUSY;
   8228#endif
   8229
   8230	if (*ppos & (PAGE_SIZE - 1))
   8231		return -EINVAL;
   8232
   8233	if (len & (PAGE_SIZE - 1)) {
   8234		if (len < PAGE_SIZE)
   8235			return -EINVAL;
   8236		len &= PAGE_MASK;
   8237	}
   8238
   8239	if (splice_grow_spd(pipe, &spd))
   8240		return -ENOMEM;
   8241
   8242 again:
   8243	trace_access_lock(iter->cpu_file);
   8244	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
   8245
   8246	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
   8247		struct page *page;
   8248		int r;
   8249
   8250		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
   8251		if (!ref) {
   8252			ret = -ENOMEM;
   8253			break;
   8254		}
   8255
   8256		refcount_set(&ref->refcount, 1);
   8257		ref->buffer = iter->array_buffer->buffer;
   8258		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
   8259		if (IS_ERR(ref->page)) {
   8260			ret = PTR_ERR(ref->page);
   8261			ref->page = NULL;
   8262			kfree(ref);
   8263			break;
   8264		}
   8265		ref->cpu = iter->cpu_file;
   8266
   8267		r = ring_buffer_read_page(ref->buffer, &ref->page,
   8268					  len, iter->cpu_file, 1);
   8269		if (r < 0) {
   8270			ring_buffer_free_read_page(ref->buffer, ref->cpu,
   8271						   ref->page);
   8272			kfree(ref);
   8273			break;
   8274		}
   8275
   8276		page = virt_to_page(ref->page);
   8277
   8278		spd.pages[i] = page;
   8279		spd.partial[i].len = PAGE_SIZE;
   8280		spd.partial[i].offset = 0;
   8281		spd.partial[i].private = (unsigned long)ref;
   8282		spd.nr_pages++;
   8283		*ppos += PAGE_SIZE;
   8284
   8285		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
   8286	}
   8287
   8288	trace_access_unlock(iter->cpu_file);
   8289	spd.nr_pages = i;
   8290
   8291	/* did we read anything? */
   8292	if (!spd.nr_pages) {
   8293		if (ret)
   8294			goto out;
   8295
   8296		ret = -EAGAIN;
   8297		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
   8298			goto out;
   8299
   8300		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
   8301		if (ret)
   8302			goto out;
   8303
   8304		goto again;
   8305	}
   8306
   8307	ret = splice_to_pipe(pipe, &spd);
   8308out:
   8309	splice_shrink_spd(&spd);
   8310
   8311	return ret;
   8312}
   8313
   8314static const struct file_operations tracing_buffers_fops = {
   8315	.open		= tracing_buffers_open,
   8316	.read		= tracing_buffers_read,
   8317	.poll		= tracing_buffers_poll,
   8318	.release	= tracing_buffers_release,
   8319	.splice_read	= tracing_buffers_splice_read,
   8320	.llseek		= no_llseek,
   8321};
   8322
   8323static ssize_t
   8324tracing_stats_read(struct file *filp, char __user *ubuf,
   8325		   size_t count, loff_t *ppos)
   8326{
   8327	struct inode *inode = file_inode(filp);
   8328	struct trace_array *tr = inode->i_private;
   8329	struct array_buffer *trace_buf = &tr->array_buffer;
   8330	int cpu = tracing_get_cpu(inode);
   8331	struct trace_seq *s;
   8332	unsigned long cnt;
   8333	unsigned long long t;
   8334	unsigned long usec_rem;
   8335
   8336	s = kmalloc(sizeof(*s), GFP_KERNEL);
   8337	if (!s)
   8338		return -ENOMEM;
   8339
   8340	trace_seq_init(s);
   8341
   8342	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
   8343	trace_seq_printf(s, "entries: %ld\n", cnt);
   8344
   8345	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
   8346	trace_seq_printf(s, "overrun: %ld\n", cnt);
   8347
   8348	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
   8349	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
   8350
   8351	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
   8352	trace_seq_printf(s, "bytes: %ld\n", cnt);
   8353
   8354	if (trace_clocks[tr->clock_id].in_ns) {
   8355		/* local or global for trace_clock */
   8356		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
   8357		usec_rem = do_div(t, USEC_PER_SEC);
   8358		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
   8359								t, usec_rem);
   8360
   8361		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
   8362		usec_rem = do_div(t, USEC_PER_SEC);
   8363		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
   8364	} else {
   8365		/* counter or tsc mode for trace_clock */
   8366		trace_seq_printf(s, "oldest event ts: %llu\n",
   8367				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
   8368
   8369		trace_seq_printf(s, "now ts: %llu\n",
   8370				ring_buffer_time_stamp(trace_buf->buffer));
   8371	}
   8372
   8373	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
   8374	trace_seq_printf(s, "dropped events: %ld\n", cnt);
   8375
   8376	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
   8377	trace_seq_printf(s, "read events: %ld\n", cnt);
   8378
   8379	count = simple_read_from_buffer(ubuf, count, ppos,
   8380					s->buffer, trace_seq_used(s));
   8381
   8382	kfree(s);
   8383
   8384	return count;
   8385}
   8386
   8387static const struct file_operations tracing_stats_fops = {
   8388	.open		= tracing_open_generic_tr,
   8389	.read		= tracing_stats_read,
   8390	.llseek		= generic_file_llseek,
   8391	.release	= tracing_release_generic_tr,
   8392};
   8393
   8394#ifdef CONFIG_DYNAMIC_FTRACE
   8395
   8396static ssize_t
   8397tracing_read_dyn_info(struct file *filp, char __user *ubuf,
   8398		  size_t cnt, loff_t *ppos)
   8399{
   8400	ssize_t ret;
   8401	char *buf;
   8402	int r;
   8403
   8404	/* 256 should be plenty to hold the amount needed */
   8405	buf = kmalloc(256, GFP_KERNEL);
   8406	if (!buf)
   8407		return -ENOMEM;
   8408
   8409	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
   8410		      ftrace_update_tot_cnt,
   8411		      ftrace_number_of_pages,
   8412		      ftrace_number_of_groups);
   8413
   8414	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   8415	kfree(buf);
   8416	return ret;
   8417}
   8418
   8419static const struct file_operations tracing_dyn_info_fops = {
   8420	.open		= tracing_open_generic,
   8421	.read		= tracing_read_dyn_info,
   8422	.llseek		= generic_file_llseek,
   8423};
   8424#endif /* CONFIG_DYNAMIC_FTRACE */
   8425
   8426#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
   8427static void
   8428ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
   8429		struct trace_array *tr, struct ftrace_probe_ops *ops,
   8430		void *data)
   8431{
   8432	tracing_snapshot_instance(tr);
   8433}
   8434
   8435static void
   8436ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
   8437		      struct trace_array *tr, struct ftrace_probe_ops *ops,
   8438		      void *data)
   8439{
   8440	struct ftrace_func_mapper *mapper = data;
   8441	long *count = NULL;
   8442
   8443	if (mapper)
   8444		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
   8445
   8446	if (count) {
   8447
   8448		if (*count <= 0)
   8449			return;
   8450
   8451		(*count)--;
   8452	}
   8453
   8454	tracing_snapshot_instance(tr);
   8455}
   8456
   8457static int
   8458ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
   8459		      struct ftrace_probe_ops *ops, void *data)
   8460{
   8461	struct ftrace_func_mapper *mapper = data;
   8462	long *count = NULL;
   8463
   8464	seq_printf(m, "%ps:", (void *)ip);
   8465
   8466	seq_puts(m, "snapshot");
   8467
   8468	if (mapper)
   8469		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
   8470
   8471	if (count)
   8472		seq_printf(m, ":count=%ld\n", *count);
   8473	else
   8474		seq_puts(m, ":unlimited\n");
   8475
   8476	return 0;
   8477}
   8478
   8479static int
   8480ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
   8481		     unsigned long ip, void *init_data, void **data)
   8482{
   8483	struct ftrace_func_mapper *mapper = *data;
   8484
   8485	if (!mapper) {
   8486		mapper = allocate_ftrace_func_mapper();
   8487		if (!mapper)
   8488			return -ENOMEM;
   8489		*data = mapper;
   8490	}
   8491
   8492	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
   8493}
   8494
   8495static void
   8496ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
   8497		     unsigned long ip, void *data)
   8498{
   8499	struct ftrace_func_mapper *mapper = data;
   8500
   8501	if (!ip) {
   8502		if (!mapper)
   8503			return;
   8504		free_ftrace_func_mapper(mapper, NULL);
   8505		return;
   8506	}
   8507
   8508	ftrace_func_mapper_remove_ip(mapper, ip);
   8509}
   8510
   8511static struct ftrace_probe_ops snapshot_probe_ops = {
   8512	.func			= ftrace_snapshot,
   8513	.print			= ftrace_snapshot_print,
   8514};
   8515
   8516static struct ftrace_probe_ops snapshot_count_probe_ops = {
   8517	.func			= ftrace_count_snapshot,
   8518	.print			= ftrace_snapshot_print,
   8519	.init			= ftrace_snapshot_init,
   8520	.free			= ftrace_snapshot_free,
   8521};
   8522
   8523static int
   8524ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
   8525			       char *glob, char *cmd, char *param, int enable)
   8526{
   8527	struct ftrace_probe_ops *ops;
   8528	void *count = (void *)-1;
   8529	char *number;
   8530	int ret;
   8531
   8532	if (!tr)
   8533		return -ENODEV;
   8534
   8535	/* hash funcs only work with set_ftrace_filter */
   8536	if (!enable)
   8537		return -EINVAL;
   8538
   8539	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
   8540
   8541	if (glob[0] == '!')
   8542		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
   8543
   8544	if (!param)
   8545		goto out_reg;
   8546
   8547	number = strsep(&param, ":");
   8548
   8549	if (!strlen(number))
   8550		goto out_reg;
   8551
   8552	/*
   8553	 * We use the callback data field (which is a pointer)
   8554	 * as our counter.
   8555	 */
   8556	ret = kstrtoul(number, 0, (unsigned long *)&count);
   8557	if (ret)
   8558		return ret;
   8559
   8560 out_reg:
   8561	ret = tracing_alloc_snapshot_instance(tr);
   8562	if (ret < 0)
   8563		goto out;
   8564
   8565	ret = register_ftrace_function_probe(glob, tr, ops, count);
   8566
   8567 out:
   8568	return ret < 0 ? ret : 0;
   8569}
   8570
   8571static struct ftrace_func_command ftrace_snapshot_cmd = {
   8572	.name			= "snapshot",
   8573	.func			= ftrace_trace_snapshot_callback,
   8574};
   8575
   8576static __init int register_snapshot_cmd(void)
   8577{
   8578	return register_ftrace_command(&ftrace_snapshot_cmd);
   8579}
   8580#else
   8581static inline __init int register_snapshot_cmd(void) { return 0; }
   8582#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
   8583
   8584static struct dentry *tracing_get_dentry(struct trace_array *tr)
   8585{
   8586	if (WARN_ON(!tr->dir))
   8587		return ERR_PTR(-ENODEV);
   8588
   8589	/* Top directory uses NULL as the parent */
   8590	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
   8591		return NULL;
   8592
   8593	/* All sub buffers have a descriptor */
   8594	return tr->dir;
   8595}
   8596
   8597static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
   8598{
   8599	struct dentry *d_tracer;
   8600
   8601	if (tr->percpu_dir)
   8602		return tr->percpu_dir;
   8603
   8604	d_tracer = tracing_get_dentry(tr);
   8605	if (IS_ERR(d_tracer))
   8606		return NULL;
   8607
   8608	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
   8609
   8610	MEM_FAIL(!tr->percpu_dir,
   8611		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
   8612
   8613	return tr->percpu_dir;
   8614}
   8615
   8616static struct dentry *
   8617trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
   8618		      void *data, long cpu, const struct file_operations *fops)
   8619{
   8620	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
   8621
   8622	if (ret) /* See tracing_get_cpu() */
   8623		d_inode(ret)->i_cdev = (void *)(cpu + 1);
   8624	return ret;
   8625}
   8626
   8627static void
   8628tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
   8629{
   8630	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
   8631	struct dentry *d_cpu;
   8632	char cpu_dir[30]; /* 30 characters should be more than enough */
   8633
   8634	if (!d_percpu)
   8635		return;
   8636
   8637	snprintf(cpu_dir, 30, "cpu%ld", cpu);
   8638	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
   8639	if (!d_cpu) {
   8640		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
   8641		return;
   8642	}
   8643
   8644	/* per cpu trace_pipe */
   8645	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
   8646				tr, cpu, &tracing_pipe_fops);
   8647
   8648	/* per cpu trace */
   8649	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
   8650				tr, cpu, &tracing_fops);
   8651
   8652	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
   8653				tr, cpu, &tracing_buffers_fops);
   8654
   8655	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
   8656				tr, cpu, &tracing_stats_fops);
   8657
   8658	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
   8659				tr, cpu, &tracing_entries_fops);
   8660
   8661#ifdef CONFIG_TRACER_SNAPSHOT
   8662	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
   8663				tr, cpu, &snapshot_fops);
   8664
   8665	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
   8666				tr, cpu, &snapshot_raw_fops);
   8667#endif
   8668}
   8669
   8670#ifdef CONFIG_FTRACE_SELFTEST
   8671/* Let selftest have access to static functions in this file */
   8672#include "trace_selftest.c"
   8673#endif
   8674
   8675static ssize_t
   8676trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
   8677			loff_t *ppos)
   8678{
   8679	struct trace_option_dentry *topt = filp->private_data;
   8680	char *buf;
   8681
   8682	if (topt->flags->val & topt->opt->bit)
   8683		buf = "1\n";
   8684	else
   8685		buf = "0\n";
   8686
   8687	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
   8688}
   8689
   8690static ssize_t
   8691trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
   8692			 loff_t *ppos)
   8693{
   8694	struct trace_option_dentry *topt = filp->private_data;
   8695	unsigned long val;
   8696	int ret;
   8697
   8698	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   8699	if (ret)
   8700		return ret;
   8701
   8702	if (val != 0 && val != 1)
   8703		return -EINVAL;
   8704
   8705	if (!!(topt->flags->val & topt->opt->bit) != val) {
   8706		mutex_lock(&trace_types_lock);
   8707		ret = __set_tracer_option(topt->tr, topt->flags,
   8708					  topt->opt, !val);
   8709		mutex_unlock(&trace_types_lock);
   8710		if (ret)
   8711			return ret;
   8712	}
   8713
   8714	*ppos += cnt;
   8715
   8716	return cnt;
   8717}
   8718
   8719
   8720static const struct file_operations trace_options_fops = {
   8721	.open = tracing_open_generic,
   8722	.read = trace_options_read,
   8723	.write = trace_options_write,
   8724	.llseek	= generic_file_llseek,
   8725};
   8726
   8727/*
   8728 * In order to pass in both the trace_array descriptor as well as the index
   8729 * to the flag that the trace option file represents, the trace_array
   8730 * has a character array of trace_flags_index[], which holds the index
   8731 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
   8732 * The address of this character array is passed to the flag option file
   8733 * read/write callbacks.
   8734 *
   8735 * In order to extract both the index and the trace_array descriptor,
   8736 * get_tr_index() uses the following algorithm.
   8737 *
   8738 *   idx = *ptr;
   8739 *
   8740 * As the pointer itself contains the address of the index (remember
   8741 * index[1] == 1).
   8742 *
   8743 * Then to get the trace_array descriptor, by subtracting that index
   8744 * from the ptr, we get to the start of the index itself.
   8745 *
   8746 *   ptr - idx == &index[0]
   8747 *
   8748 * Then a simple container_of() from that pointer gets us to the
   8749 * trace_array descriptor.
   8750 */
   8751static void get_tr_index(void *data, struct trace_array **ptr,
   8752			 unsigned int *pindex)
   8753{
   8754	*pindex = *(unsigned char *)data;
   8755
   8756	*ptr = container_of(data - *pindex, struct trace_array,
   8757			    trace_flags_index);
   8758}
   8759
   8760static ssize_t
   8761trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
   8762			loff_t *ppos)
   8763{
   8764	void *tr_index = filp->private_data;
   8765	struct trace_array *tr;
   8766	unsigned int index;
   8767	char *buf;
   8768
   8769	get_tr_index(tr_index, &tr, &index);
   8770
   8771	if (tr->trace_flags & (1 << index))
   8772		buf = "1\n";
   8773	else
   8774		buf = "0\n";
   8775
   8776	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
   8777}
   8778
   8779static ssize_t
   8780trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
   8781			 loff_t *ppos)
   8782{
   8783	void *tr_index = filp->private_data;
   8784	struct trace_array *tr;
   8785	unsigned int index;
   8786	unsigned long val;
   8787	int ret;
   8788
   8789	get_tr_index(tr_index, &tr, &index);
   8790
   8791	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   8792	if (ret)
   8793		return ret;
   8794
   8795	if (val != 0 && val != 1)
   8796		return -EINVAL;
   8797
   8798	mutex_lock(&event_mutex);
   8799	mutex_lock(&trace_types_lock);
   8800	ret = set_tracer_flag(tr, 1 << index, val);
   8801	mutex_unlock(&trace_types_lock);
   8802	mutex_unlock(&event_mutex);
   8803
   8804	if (ret < 0)
   8805		return ret;
   8806
   8807	*ppos += cnt;
   8808
   8809	return cnt;
   8810}
   8811
   8812static const struct file_operations trace_options_core_fops = {
   8813	.open = tracing_open_generic,
   8814	.read = trace_options_core_read,
   8815	.write = trace_options_core_write,
   8816	.llseek = generic_file_llseek,
   8817};
   8818
   8819struct dentry *trace_create_file(const char *name,
   8820				 umode_t mode,
   8821				 struct dentry *parent,
   8822				 void *data,
   8823				 const struct file_operations *fops)
   8824{
   8825	struct dentry *ret;
   8826
   8827	ret = tracefs_create_file(name, mode, parent, data, fops);
   8828	if (!ret)
   8829		pr_warn("Could not create tracefs '%s' entry\n", name);
   8830
   8831	return ret;
   8832}
   8833
   8834
   8835static struct dentry *trace_options_init_dentry(struct trace_array *tr)
   8836{
   8837	struct dentry *d_tracer;
   8838
   8839	if (tr->options)
   8840		return tr->options;
   8841
   8842	d_tracer = tracing_get_dentry(tr);
   8843	if (IS_ERR(d_tracer))
   8844		return NULL;
   8845
   8846	tr->options = tracefs_create_dir("options", d_tracer);
   8847	if (!tr->options) {
   8848		pr_warn("Could not create tracefs directory 'options'\n");
   8849		return NULL;
   8850	}
   8851
   8852	return tr->options;
   8853}
   8854
   8855static void
   8856create_trace_option_file(struct trace_array *tr,
   8857			 struct trace_option_dentry *topt,
   8858			 struct tracer_flags *flags,
   8859			 struct tracer_opt *opt)
   8860{
   8861	struct dentry *t_options;
   8862
   8863	t_options = trace_options_init_dentry(tr);
   8864	if (!t_options)
   8865		return;
   8866
   8867	topt->flags = flags;
   8868	topt->opt = opt;
   8869	topt->tr = tr;
   8870
   8871	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
   8872					t_options, topt, &trace_options_fops);
   8873
   8874}
   8875
   8876static void
   8877create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
   8878{
   8879	struct trace_option_dentry *topts;
   8880	struct trace_options *tr_topts;
   8881	struct tracer_flags *flags;
   8882	struct tracer_opt *opts;
   8883	int cnt;
   8884	int i;
   8885
   8886	if (!tracer)
   8887		return;
   8888
   8889	flags = tracer->flags;
   8890
   8891	if (!flags || !flags->opts)
   8892		return;
   8893
   8894	/*
   8895	 * If this is an instance, only create flags for tracers
   8896	 * the instance may have.
   8897	 */
   8898	if (!trace_ok_for_array(tracer, tr))
   8899		return;
   8900
   8901	for (i = 0; i < tr->nr_topts; i++) {
   8902		/* Make sure there's no duplicate flags. */
   8903		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
   8904			return;
   8905	}
   8906
   8907	opts = flags->opts;
   8908
   8909	for (cnt = 0; opts[cnt].name; cnt++)
   8910		;
   8911
   8912	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
   8913	if (!topts)
   8914		return;
   8915
   8916	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
   8917			    GFP_KERNEL);
   8918	if (!tr_topts) {
   8919		kfree(topts);
   8920		return;
   8921	}
   8922
   8923	tr->topts = tr_topts;
   8924	tr->topts[tr->nr_topts].tracer = tracer;
   8925	tr->topts[tr->nr_topts].topts = topts;
   8926	tr->nr_topts++;
   8927
   8928	for (cnt = 0; opts[cnt].name; cnt++) {
   8929		create_trace_option_file(tr, &topts[cnt], flags,
   8930					 &opts[cnt]);
   8931		MEM_FAIL(topts[cnt].entry == NULL,
   8932			  "Failed to create trace option: %s",
   8933			  opts[cnt].name);
   8934	}
   8935}
   8936
   8937static struct dentry *
   8938create_trace_option_core_file(struct trace_array *tr,
   8939			      const char *option, long index)
   8940{
   8941	struct dentry *t_options;
   8942
   8943	t_options = trace_options_init_dentry(tr);
   8944	if (!t_options)
   8945		return NULL;
   8946
   8947	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
   8948				 (void *)&tr->trace_flags_index[index],
   8949				 &trace_options_core_fops);
   8950}
   8951
   8952static void create_trace_options_dir(struct trace_array *tr)
   8953{
   8954	struct dentry *t_options;
   8955	bool top_level = tr == &global_trace;
   8956	int i;
   8957
   8958	t_options = trace_options_init_dentry(tr);
   8959	if (!t_options)
   8960		return;
   8961
   8962	for (i = 0; trace_options[i]; i++) {
   8963		if (top_level ||
   8964		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
   8965			create_trace_option_core_file(tr, trace_options[i], i);
   8966	}
   8967}
   8968
   8969static ssize_t
   8970rb_simple_read(struct file *filp, char __user *ubuf,
   8971	       size_t cnt, loff_t *ppos)
   8972{
   8973	struct trace_array *tr = filp->private_data;
   8974	char buf[64];
   8975	int r;
   8976
   8977	r = tracer_tracing_is_on(tr);
   8978	r = sprintf(buf, "%d\n", r);
   8979
   8980	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   8981}
   8982
   8983static ssize_t
   8984rb_simple_write(struct file *filp, const char __user *ubuf,
   8985		size_t cnt, loff_t *ppos)
   8986{
   8987	struct trace_array *tr = filp->private_data;
   8988	struct trace_buffer *buffer = tr->array_buffer.buffer;
   8989	unsigned long val;
   8990	int ret;
   8991
   8992	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   8993	if (ret)
   8994		return ret;
   8995
   8996	if (buffer) {
   8997		mutex_lock(&trace_types_lock);
   8998		if (!!val == tracer_tracing_is_on(tr)) {
   8999			val = 0; /* do nothing */
   9000		} else if (val) {
   9001			tracer_tracing_on(tr);
   9002			if (tr->current_trace->start)
   9003				tr->current_trace->start(tr);
   9004		} else {
   9005			tracer_tracing_off(tr);
   9006			if (tr->current_trace->stop)
   9007				tr->current_trace->stop(tr);
   9008		}
   9009		mutex_unlock(&trace_types_lock);
   9010	}
   9011
   9012	(*ppos)++;
   9013
   9014	return cnt;
   9015}
   9016
   9017static const struct file_operations rb_simple_fops = {
   9018	.open		= tracing_open_generic_tr,
   9019	.read		= rb_simple_read,
   9020	.write		= rb_simple_write,
   9021	.release	= tracing_release_generic_tr,
   9022	.llseek		= default_llseek,
   9023};
   9024
   9025static ssize_t
   9026buffer_percent_read(struct file *filp, char __user *ubuf,
   9027		    size_t cnt, loff_t *ppos)
   9028{
   9029	struct trace_array *tr = filp->private_data;
   9030	char buf[64];
   9031	int r;
   9032
   9033	r = tr->buffer_percent;
   9034	r = sprintf(buf, "%d\n", r);
   9035
   9036	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
   9037}
   9038
   9039static ssize_t
   9040buffer_percent_write(struct file *filp, const char __user *ubuf,
   9041		     size_t cnt, loff_t *ppos)
   9042{
   9043	struct trace_array *tr = filp->private_data;
   9044	unsigned long val;
   9045	int ret;
   9046
   9047	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
   9048	if (ret)
   9049		return ret;
   9050
   9051	if (val > 100)
   9052		return -EINVAL;
   9053
   9054	if (!val)
   9055		val = 1;
   9056
   9057	tr->buffer_percent = val;
   9058
   9059	(*ppos)++;
   9060
   9061	return cnt;
   9062}
   9063
   9064static const struct file_operations buffer_percent_fops = {
   9065	.open		= tracing_open_generic_tr,
   9066	.read		= buffer_percent_read,
   9067	.write		= buffer_percent_write,
   9068	.release	= tracing_release_generic_tr,
   9069	.llseek		= default_llseek,
   9070};
   9071
   9072static struct dentry *trace_instance_dir;
   9073
   9074static void
   9075init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
   9076
   9077static int
   9078allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
   9079{
   9080	enum ring_buffer_flags rb_flags;
   9081
   9082	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
   9083
   9084	buf->tr = tr;
   9085
   9086	buf->buffer = ring_buffer_alloc(size, rb_flags);
   9087	if (!buf->buffer)
   9088		return -ENOMEM;
   9089
   9090	buf->data = alloc_percpu(struct trace_array_cpu);
   9091	if (!buf->data) {
   9092		ring_buffer_free(buf->buffer);
   9093		buf->buffer = NULL;
   9094		return -ENOMEM;
   9095	}
   9096
   9097	/* Allocate the first page for all buffers */
   9098	set_buffer_entries(&tr->array_buffer,
   9099			   ring_buffer_size(tr->array_buffer.buffer, 0));
   9100
   9101	return 0;
   9102}
   9103
   9104static int allocate_trace_buffers(struct trace_array *tr, int size)
   9105{
   9106	int ret;
   9107
   9108	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
   9109	if (ret)
   9110		return ret;
   9111
   9112#ifdef CONFIG_TRACER_MAX_TRACE
   9113	ret = allocate_trace_buffer(tr, &tr->max_buffer,
   9114				    allocate_snapshot ? size : 1);
   9115	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
   9116		ring_buffer_free(tr->array_buffer.buffer);
   9117		tr->array_buffer.buffer = NULL;
   9118		free_percpu(tr->array_buffer.data);
   9119		tr->array_buffer.data = NULL;
   9120		return -ENOMEM;
   9121	}
   9122	tr->allocated_snapshot = allocate_snapshot;
   9123
   9124	/*
   9125	 * Only the top level trace array gets its snapshot allocated
   9126	 * from the kernel command line.
   9127	 */
   9128	allocate_snapshot = false;
   9129#endif
   9130
   9131	return 0;
   9132}
   9133
   9134static void free_trace_buffer(struct array_buffer *buf)
   9135{
   9136	if (buf->buffer) {
   9137		ring_buffer_free(buf->buffer);
   9138		buf->buffer = NULL;
   9139		free_percpu(buf->data);
   9140		buf->data = NULL;
   9141	}
   9142}
   9143
   9144static void free_trace_buffers(struct trace_array *tr)
   9145{
   9146	if (!tr)
   9147		return;
   9148
   9149	free_trace_buffer(&tr->array_buffer);
   9150
   9151#ifdef CONFIG_TRACER_MAX_TRACE
   9152	free_trace_buffer(&tr->max_buffer);
   9153#endif
   9154}
   9155
   9156static void init_trace_flags_index(struct trace_array *tr)
   9157{
   9158	int i;
   9159
   9160	/* Used by the trace options files */
   9161	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
   9162		tr->trace_flags_index[i] = i;
   9163}
   9164
   9165static void __update_tracer_options(struct trace_array *tr)
   9166{
   9167	struct tracer *t;
   9168
   9169	for (t = trace_types; t; t = t->next)
   9170		add_tracer_options(tr, t);
   9171}
   9172
   9173static void update_tracer_options(struct trace_array *tr)
   9174{
   9175	mutex_lock(&trace_types_lock);
   9176	tracer_options_updated = true;
   9177	__update_tracer_options(tr);
   9178	mutex_unlock(&trace_types_lock);
   9179}
   9180
   9181/* Must have trace_types_lock held */
   9182struct trace_array *trace_array_find(const char *instance)
   9183{
   9184	struct trace_array *tr, *found = NULL;
   9185
   9186	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
   9187		if (tr->name && strcmp(tr->name, instance) == 0) {
   9188			found = tr;
   9189			break;
   9190		}
   9191	}
   9192
   9193	return found;
   9194}
   9195
   9196struct trace_array *trace_array_find_get(const char *instance)
   9197{
   9198	struct trace_array *tr;
   9199
   9200	mutex_lock(&trace_types_lock);
   9201	tr = trace_array_find(instance);
   9202	if (tr)
   9203		tr->ref++;
   9204	mutex_unlock(&trace_types_lock);
   9205
   9206	return tr;
   9207}
   9208
   9209static int trace_array_create_dir(struct trace_array *tr)
   9210{
   9211	int ret;
   9212
   9213	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
   9214	if (!tr->dir)
   9215		return -EINVAL;
   9216
   9217	ret = event_trace_add_tracer(tr->dir, tr);
   9218	if (ret) {
   9219		tracefs_remove(tr->dir);
   9220		return ret;
   9221	}
   9222
   9223	init_tracer_tracefs(tr, tr->dir);
   9224	__update_tracer_options(tr);
   9225
   9226	return ret;
   9227}
   9228
   9229static struct trace_array *trace_array_create(const char *name)
   9230{
   9231	struct trace_array *tr;
   9232	int ret;
   9233
   9234	ret = -ENOMEM;
   9235	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
   9236	if (!tr)
   9237		return ERR_PTR(ret);
   9238
   9239	tr->name = kstrdup(name, GFP_KERNEL);
   9240	if (!tr->name)
   9241		goto out_free_tr;
   9242
   9243	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
   9244		goto out_free_tr;
   9245
   9246	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
   9247
   9248	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
   9249
   9250	raw_spin_lock_init(&tr->start_lock);
   9251
   9252	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
   9253
   9254	tr->current_trace = &nop_trace;
   9255
   9256	INIT_LIST_HEAD(&tr->systems);
   9257	INIT_LIST_HEAD(&tr->events);
   9258	INIT_LIST_HEAD(&tr->hist_vars);
   9259	INIT_LIST_HEAD(&tr->err_log);
   9260
   9261	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
   9262		goto out_free_tr;
   9263
   9264	if (ftrace_allocate_ftrace_ops(tr) < 0)
   9265		goto out_free_tr;
   9266
   9267	ftrace_init_trace_array(tr);
   9268
   9269	init_trace_flags_index(tr);
   9270
   9271	if (trace_instance_dir) {
   9272		ret = trace_array_create_dir(tr);
   9273		if (ret)
   9274			goto out_free_tr;
   9275	} else
   9276		__trace_early_add_events(tr);
   9277
   9278	list_add(&tr->list, &ftrace_trace_arrays);
   9279
   9280	tr->ref++;
   9281
   9282	return tr;
   9283
   9284 out_free_tr:
   9285	ftrace_free_ftrace_ops(tr);
   9286	free_trace_buffers(tr);
   9287	free_cpumask_var(tr->tracing_cpumask);
   9288	kfree(tr->name);
   9289	kfree(tr);
   9290
   9291	return ERR_PTR(ret);
   9292}
   9293
   9294static int instance_mkdir(const char *name)
   9295{
   9296	struct trace_array *tr;
   9297	int ret;
   9298
   9299	mutex_lock(&event_mutex);
   9300	mutex_lock(&trace_types_lock);
   9301
   9302	ret = -EEXIST;
   9303	if (trace_array_find(name))
   9304		goto out_unlock;
   9305
   9306	tr = trace_array_create(name);
   9307
   9308	ret = PTR_ERR_OR_ZERO(tr);
   9309
   9310out_unlock:
   9311	mutex_unlock(&trace_types_lock);
   9312	mutex_unlock(&event_mutex);
   9313	return ret;
   9314}
   9315
   9316/**
   9317 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
   9318 * @name: The name of the trace array to be looked up/created.
   9319 *
   9320 * Returns pointer to trace array with given name.
   9321 * NULL, if it cannot be created.
   9322 *
   9323 * NOTE: This function increments the reference counter associated with the
   9324 * trace array returned. This makes sure it cannot be freed while in use.
   9325 * Use trace_array_put() once the trace array is no longer needed.
   9326 * If the trace_array is to be freed, trace_array_destroy() needs to
   9327 * be called after the trace_array_put(), or simply let user space delete
   9328 * it from the tracefs instances directory. But until the
   9329 * trace_array_put() is called, user space can not delete it.
   9330 *
   9331 */
   9332struct trace_array *trace_array_get_by_name(const char *name)
   9333{
   9334	struct trace_array *tr;
   9335
   9336	mutex_lock(&event_mutex);
   9337	mutex_lock(&trace_types_lock);
   9338
   9339	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
   9340		if (tr->name && strcmp(tr->name, name) == 0)
   9341			goto out_unlock;
   9342	}
   9343
   9344	tr = trace_array_create(name);
   9345
   9346	if (IS_ERR(tr))
   9347		tr = NULL;
   9348out_unlock:
   9349	if (tr)
   9350		tr->ref++;
   9351
   9352	mutex_unlock(&trace_types_lock);
   9353	mutex_unlock(&event_mutex);
   9354	return tr;
   9355}
   9356EXPORT_SYMBOL_GPL(trace_array_get_by_name);
   9357
   9358static int __remove_instance(struct trace_array *tr)
   9359{
   9360	int i;
   9361
   9362	/* Reference counter for a newly created trace array = 1. */
   9363	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
   9364		return -EBUSY;
   9365
   9366	list_del(&tr->list);
   9367
   9368	/* Disable all the flags that were enabled coming in */
   9369	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
   9370		if ((1 << i) & ZEROED_TRACE_FLAGS)
   9371			set_tracer_flag(tr, 1 << i, 0);
   9372	}
   9373
   9374	tracing_set_nop(tr);
   9375	clear_ftrace_function_probes(tr);
   9376	event_trace_del_tracer(tr);
   9377	ftrace_clear_pids(tr);
   9378	ftrace_destroy_function_files(tr);
   9379	tracefs_remove(tr->dir);
   9380	free_percpu(tr->last_func_repeats);
   9381	free_trace_buffers(tr);
   9382
   9383	for (i = 0; i < tr->nr_topts; i++) {
   9384		kfree(tr->topts[i].topts);
   9385	}
   9386	kfree(tr->topts);
   9387
   9388	free_cpumask_var(tr->tracing_cpumask);
   9389	kfree(tr->name);
   9390	kfree(tr);
   9391
   9392	return 0;
   9393}
   9394
   9395int trace_array_destroy(struct trace_array *this_tr)
   9396{
   9397	struct trace_array *tr;
   9398	int ret;
   9399
   9400	if (!this_tr)
   9401		return -EINVAL;
   9402
   9403	mutex_lock(&event_mutex);
   9404	mutex_lock(&trace_types_lock);
   9405
   9406	ret = -ENODEV;
   9407
   9408	/* Making sure trace array exists before destroying it. */
   9409	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
   9410		if (tr == this_tr) {
   9411			ret = __remove_instance(tr);
   9412			break;
   9413		}
   9414	}
   9415
   9416	mutex_unlock(&trace_types_lock);
   9417	mutex_unlock(&event_mutex);
   9418
   9419	return ret;
   9420}
   9421EXPORT_SYMBOL_GPL(trace_array_destroy);
   9422
   9423static int instance_rmdir(const char *name)
   9424{
   9425	struct trace_array *tr;
   9426	int ret;
   9427
   9428	mutex_lock(&event_mutex);
   9429	mutex_lock(&trace_types_lock);
   9430
   9431	ret = -ENODEV;
   9432	tr = trace_array_find(name);
   9433	if (tr)
   9434		ret = __remove_instance(tr);
   9435
   9436	mutex_unlock(&trace_types_lock);
   9437	mutex_unlock(&event_mutex);
   9438
   9439	return ret;
   9440}
   9441
   9442static __init void create_trace_instances(struct dentry *d_tracer)
   9443{
   9444	struct trace_array *tr;
   9445
   9446	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
   9447							 instance_mkdir,
   9448							 instance_rmdir);
   9449	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
   9450		return;
   9451
   9452	mutex_lock(&event_mutex);
   9453	mutex_lock(&trace_types_lock);
   9454
   9455	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
   9456		if (!tr->name)
   9457			continue;
   9458		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
   9459			     "Failed to create instance directory\n"))
   9460			break;
   9461	}
   9462
   9463	mutex_unlock(&trace_types_lock);
   9464	mutex_unlock(&event_mutex);
   9465}
   9466
   9467static void
   9468init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
   9469{
   9470	struct trace_event_file *file;
   9471	int cpu;
   9472
   9473	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
   9474			tr, &show_traces_fops);
   9475
   9476	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
   9477			tr, &set_tracer_fops);
   9478
   9479	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
   9480			  tr, &tracing_cpumask_fops);
   9481
   9482	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
   9483			  tr, &tracing_iter_fops);
   9484
   9485	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
   9486			  tr, &tracing_fops);
   9487
   9488	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
   9489			  tr, &tracing_pipe_fops);
   9490
   9491	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
   9492			  tr, &tracing_entries_fops);
   9493
   9494	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
   9495			  tr, &tracing_total_entries_fops);
   9496
   9497	trace_create_file("free_buffer", 0200, d_tracer,
   9498			  tr, &tracing_free_buffer_fops);
   9499
   9500	trace_create_file("trace_marker", 0220, d_tracer,
   9501			  tr, &tracing_mark_fops);
   9502
   9503	file = __find_event_file(tr, "ftrace", "print");
   9504	if (file && file->dir)
   9505		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
   9506				  file, &event_trigger_fops);
   9507	tr->trace_marker_file = file;
   9508
   9509	trace_create_file("trace_marker_raw", 0220, d_tracer,
   9510			  tr, &tracing_mark_raw_fops);
   9511
   9512	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
   9513			  &trace_clock_fops);
   9514
   9515	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
   9516			  tr, &rb_simple_fops);
   9517
   9518	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
   9519			  &trace_time_stamp_mode_fops);
   9520
   9521	tr->buffer_percent = 50;
   9522
   9523	trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
   9524			tr, &buffer_percent_fops);
   9525
   9526	create_trace_options_dir(tr);
   9527
   9528	trace_create_maxlat_file(tr, d_tracer);
   9529
   9530	if (ftrace_create_function_files(tr, d_tracer))
   9531		MEM_FAIL(1, "Could not allocate function filter files");
   9532
   9533#ifdef CONFIG_TRACER_SNAPSHOT
   9534	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
   9535			  tr, &snapshot_fops);
   9536#endif
   9537
   9538	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
   9539			  tr, &tracing_err_log_fops);
   9540
   9541	for_each_tracing_cpu(cpu)
   9542		tracing_init_tracefs_percpu(tr, cpu);
   9543
   9544	ftrace_init_tracefs(tr, d_tracer);
   9545}
   9546
   9547static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
   9548{
   9549	struct vfsmount *mnt;
   9550	struct file_system_type *type;
   9551
   9552	/*
   9553	 * To maintain backward compatibility for tools that mount
   9554	 * debugfs to get to the tracing facility, tracefs is automatically
   9555	 * mounted to the debugfs/tracing directory.
   9556	 */
   9557	type = get_fs_type("tracefs");
   9558	if (!type)
   9559		return NULL;
   9560	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
   9561	put_filesystem(type);
   9562	if (IS_ERR(mnt))
   9563		return NULL;
   9564	mntget(mnt);
   9565
   9566	return mnt;
   9567}
   9568
   9569/**
   9570 * tracing_init_dentry - initialize top level trace array
   9571 *
   9572 * This is called when creating files or directories in the tracing
   9573 * directory. It is called via fs_initcall() by any of the boot up code
   9574 * and expects to return the dentry of the top level tracing directory.
   9575 */
   9576int tracing_init_dentry(void)
   9577{
   9578	struct trace_array *tr = &global_trace;
   9579
   9580	if (security_locked_down(LOCKDOWN_TRACEFS)) {
   9581		pr_warn("Tracing disabled due to lockdown\n");
   9582		return -EPERM;
   9583	}
   9584
   9585	/* The top level trace array uses  NULL as parent */
   9586	if (tr->dir)
   9587		return 0;
   9588
   9589	if (WARN_ON(!tracefs_initialized()))
   9590		return -ENODEV;
   9591
   9592	/*
   9593	 * As there may still be users that expect the tracing
   9594	 * files to exist in debugfs/tracing, we must automount
   9595	 * the tracefs file system there, so older tools still
   9596	 * work with the newer kernel.
   9597	 */
   9598	tr->dir = debugfs_create_automount("tracing", NULL,
   9599					   trace_automount, NULL);
   9600
   9601	return 0;
   9602}
   9603
   9604extern struct trace_eval_map *__start_ftrace_eval_maps[];
   9605extern struct trace_eval_map *__stop_ftrace_eval_maps[];
   9606
   9607static struct workqueue_struct *eval_map_wq __initdata;
   9608static struct work_struct eval_map_work __initdata;
   9609static struct work_struct tracerfs_init_work __initdata;
   9610
   9611static void __init eval_map_work_func(struct work_struct *work)
   9612{
   9613	int len;
   9614
   9615	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
   9616	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
   9617}
   9618
   9619static int __init trace_eval_init(void)
   9620{
   9621	INIT_WORK(&eval_map_work, eval_map_work_func);
   9622
   9623	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
   9624	if (!eval_map_wq) {
   9625		pr_err("Unable to allocate eval_map_wq\n");
   9626		/* Do work here */
   9627		eval_map_work_func(&eval_map_work);
   9628		return -ENOMEM;
   9629	}
   9630
   9631	queue_work(eval_map_wq, &eval_map_work);
   9632	return 0;
   9633}
   9634
   9635subsys_initcall(trace_eval_init);
   9636
   9637static int __init trace_eval_sync(void)
   9638{
   9639	/* Make sure the eval map updates are finished */
   9640	if (eval_map_wq)
   9641		destroy_workqueue(eval_map_wq);
   9642	return 0;
   9643}
   9644
   9645late_initcall_sync(trace_eval_sync);
   9646
   9647
   9648#ifdef CONFIG_MODULES
   9649static void trace_module_add_evals(struct module *mod)
   9650{
   9651	if (!mod->num_trace_evals)
   9652		return;
   9653
   9654	/*
   9655	 * Modules with bad taint do not have events created, do
   9656	 * not bother with enums either.
   9657	 */
   9658	if (trace_module_has_bad_taint(mod))
   9659		return;
   9660
   9661	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
   9662}
   9663
   9664#ifdef CONFIG_TRACE_EVAL_MAP_FILE
   9665static void trace_module_remove_evals(struct module *mod)
   9666{
   9667	union trace_eval_map_item *map;
   9668	union trace_eval_map_item **last = &trace_eval_maps;
   9669
   9670	if (!mod->num_trace_evals)
   9671		return;
   9672
   9673	mutex_lock(&trace_eval_mutex);
   9674
   9675	map = trace_eval_maps;
   9676
   9677	while (map) {
   9678		if (map->head.mod == mod)
   9679			break;
   9680		map = trace_eval_jmp_to_tail(map);
   9681		last = &map->tail.next;
   9682		map = map->tail.next;
   9683	}
   9684	if (!map)
   9685		goto out;
   9686
   9687	*last = trace_eval_jmp_to_tail(map)->tail.next;
   9688	kfree(map);
   9689 out:
   9690	mutex_unlock(&trace_eval_mutex);
   9691}
   9692#else
   9693static inline void trace_module_remove_evals(struct module *mod) { }
   9694#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
   9695
   9696static int trace_module_notify(struct notifier_block *self,
   9697			       unsigned long val, void *data)
   9698{
   9699	struct module *mod = data;
   9700
   9701	switch (val) {
   9702	case MODULE_STATE_COMING:
   9703		trace_module_add_evals(mod);
   9704		break;
   9705	case MODULE_STATE_GOING:
   9706		trace_module_remove_evals(mod);
   9707		break;
   9708	}
   9709
   9710	return NOTIFY_OK;
   9711}
   9712
   9713static struct notifier_block trace_module_nb = {
   9714	.notifier_call = trace_module_notify,
   9715	.priority = 0,
   9716};
   9717#endif /* CONFIG_MODULES */
   9718
   9719static __init void tracer_init_tracefs_work_func(struct work_struct *work)
   9720{
   9721
   9722	event_trace_init();
   9723
   9724	init_tracer_tracefs(&global_trace, NULL);
   9725	ftrace_init_tracefs_toplevel(&global_trace, NULL);
   9726
   9727	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
   9728			&global_trace, &tracing_thresh_fops);
   9729
   9730	trace_create_file("README", TRACE_MODE_READ, NULL,
   9731			NULL, &tracing_readme_fops);
   9732
   9733	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
   9734			NULL, &tracing_saved_cmdlines_fops);
   9735
   9736	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
   9737			  NULL, &tracing_saved_cmdlines_size_fops);
   9738
   9739	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
   9740			NULL, &tracing_saved_tgids_fops);
   9741
   9742	trace_create_eval_file(NULL);
   9743
   9744#ifdef CONFIG_MODULES
   9745	register_module_notifier(&trace_module_nb);
   9746#endif
   9747
   9748#ifdef CONFIG_DYNAMIC_FTRACE
   9749	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
   9750			NULL, &tracing_dyn_info_fops);
   9751#endif
   9752
   9753	create_trace_instances(NULL);
   9754
   9755	update_tracer_options(&global_trace);
   9756}
   9757
   9758static __init int tracer_init_tracefs(void)
   9759{
   9760	int ret;
   9761
   9762	trace_access_lock_init();
   9763
   9764	ret = tracing_init_dentry();
   9765	if (ret)
   9766		return 0;
   9767
   9768	if (eval_map_wq) {
   9769		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
   9770		queue_work(eval_map_wq, &tracerfs_init_work);
   9771	} else {
   9772		tracer_init_tracefs_work_func(NULL);
   9773	}
   9774
   9775	return 0;
   9776}
   9777
   9778fs_initcall(tracer_init_tracefs);
   9779
   9780static int trace_panic_handler(struct notifier_block *this,
   9781			       unsigned long event, void *unused)
   9782{
   9783	if (ftrace_dump_on_oops)
   9784		ftrace_dump(ftrace_dump_on_oops);
   9785	return NOTIFY_OK;
   9786}
   9787
   9788static struct notifier_block trace_panic_notifier = {
   9789	.notifier_call  = trace_panic_handler,
   9790	.next           = NULL,
   9791	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
   9792};
   9793
   9794static int trace_die_handler(struct notifier_block *self,
   9795			     unsigned long val,
   9796			     void *data)
   9797{
   9798	switch (val) {
   9799	case DIE_OOPS:
   9800		if (ftrace_dump_on_oops)
   9801			ftrace_dump(ftrace_dump_on_oops);
   9802		break;
   9803	default:
   9804		break;
   9805	}
   9806	return NOTIFY_OK;
   9807}
   9808
   9809static struct notifier_block trace_die_notifier = {
   9810	.notifier_call = trace_die_handler,
   9811	.priority = 200
   9812};
   9813
   9814/*
   9815 * printk is set to max of 1024, we really don't need it that big.
   9816 * Nothing should be printing 1000 characters anyway.
   9817 */
   9818#define TRACE_MAX_PRINT		1000
   9819
   9820/*
   9821 * Define here KERN_TRACE so that we have one place to modify
   9822 * it if we decide to change what log level the ftrace dump
   9823 * should be at.
   9824 */
   9825#define KERN_TRACE		KERN_EMERG
   9826
   9827void
   9828trace_printk_seq(struct trace_seq *s)
   9829{
   9830	/* Probably should print a warning here. */
   9831	if (s->seq.len >= TRACE_MAX_PRINT)
   9832		s->seq.len = TRACE_MAX_PRINT;
   9833
   9834	/*
   9835	 * More paranoid code. Although the buffer size is set to
   9836	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
   9837	 * an extra layer of protection.
   9838	 */
   9839	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
   9840		s->seq.len = s->seq.size - 1;
   9841
   9842	/* should be zero ended, but we are paranoid. */
   9843	s->buffer[s->seq.len] = 0;
   9844
   9845	printk(KERN_TRACE "%s", s->buffer);
   9846
   9847	trace_seq_init(s);
   9848}
   9849
   9850void trace_init_global_iter(struct trace_iterator *iter)
   9851{
   9852	iter->tr = &global_trace;
   9853	iter->trace = iter->tr->current_trace;
   9854	iter->cpu_file = RING_BUFFER_ALL_CPUS;
   9855	iter->array_buffer = &global_trace.array_buffer;
   9856
   9857	if (iter->trace && iter->trace->open)
   9858		iter->trace->open(iter);
   9859
   9860	/* Annotate start of buffers if we had overruns */
   9861	if (ring_buffer_overruns(iter->array_buffer->buffer))
   9862		iter->iter_flags |= TRACE_FILE_ANNOTATE;
   9863
   9864	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
   9865	if (trace_clocks[iter->tr->clock_id].in_ns)
   9866		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
   9867}
   9868
   9869void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
   9870{
   9871	/* use static because iter can be a bit big for the stack */
   9872	static struct trace_iterator iter;
   9873	static atomic_t dump_running;
   9874	struct trace_array *tr = &global_trace;
   9875	unsigned int old_userobj;
   9876	unsigned long flags;
   9877	int cnt = 0, cpu;
   9878
   9879	/* Only allow one dump user at a time. */
   9880	if (atomic_inc_return(&dump_running) != 1) {
   9881		atomic_dec(&dump_running);
   9882		return;
   9883	}
   9884
   9885	/*
   9886	 * Always turn off tracing when we dump.
   9887	 * We don't need to show trace output of what happens
   9888	 * between multiple crashes.
   9889	 *
   9890	 * If the user does a sysrq-z, then they can re-enable
   9891	 * tracing with echo 1 > tracing_on.
   9892	 */
   9893	tracing_off();
   9894
   9895	local_irq_save(flags);
   9896
   9897	/* Simulate the iterator */
   9898	trace_init_global_iter(&iter);
   9899	/* Can not use kmalloc for iter.temp and iter.fmt */
   9900	iter.temp = static_temp_buf;
   9901	iter.temp_size = STATIC_TEMP_BUF_SIZE;
   9902	iter.fmt = static_fmt_buf;
   9903	iter.fmt_size = STATIC_FMT_BUF_SIZE;
   9904
   9905	for_each_tracing_cpu(cpu) {
   9906		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
   9907	}
   9908
   9909	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
   9910
   9911	/* don't look at user memory in panic mode */
   9912	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
   9913
   9914	switch (oops_dump_mode) {
   9915	case DUMP_ALL:
   9916		iter.cpu_file = RING_BUFFER_ALL_CPUS;
   9917		break;
   9918	case DUMP_ORIG:
   9919		iter.cpu_file = raw_smp_processor_id();
   9920		break;
   9921	case DUMP_NONE:
   9922		goto out_enable;
   9923	default:
   9924		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
   9925		iter.cpu_file = RING_BUFFER_ALL_CPUS;
   9926	}
   9927
   9928	printk(KERN_TRACE "Dumping ftrace buffer:\n");
   9929
   9930	/* Did function tracer already get disabled? */
   9931	if (ftrace_is_dead()) {
   9932		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
   9933		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
   9934	}
   9935
   9936	/*
   9937	 * We need to stop all tracing on all CPUS to read
   9938	 * the next buffer. This is a bit expensive, but is
   9939	 * not done often. We fill all what we can read,
   9940	 * and then release the locks again.
   9941	 */
   9942
   9943	while (!trace_empty(&iter)) {
   9944
   9945		if (!cnt)
   9946			printk(KERN_TRACE "---------------------------------\n");
   9947
   9948		cnt++;
   9949
   9950		trace_iterator_reset(&iter);
   9951		iter.iter_flags |= TRACE_FILE_LAT_FMT;
   9952
   9953		if (trace_find_next_entry_inc(&iter) != NULL) {
   9954			int ret;
   9955
   9956			ret = print_trace_line(&iter);
   9957			if (ret != TRACE_TYPE_NO_CONSUME)
   9958				trace_consume(&iter);
   9959		}
   9960		touch_nmi_watchdog();
   9961
   9962		trace_printk_seq(&iter.seq);
   9963	}
   9964
   9965	if (!cnt)
   9966		printk(KERN_TRACE "   (ftrace buffer empty)\n");
   9967	else
   9968		printk(KERN_TRACE "---------------------------------\n");
   9969
   9970 out_enable:
   9971	tr->trace_flags |= old_userobj;
   9972
   9973	for_each_tracing_cpu(cpu) {
   9974		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
   9975	}
   9976	atomic_dec(&dump_running);
   9977	local_irq_restore(flags);
   9978}
   9979EXPORT_SYMBOL_GPL(ftrace_dump);
   9980
   9981#define WRITE_BUFSIZE  4096
   9982
   9983ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
   9984				size_t count, loff_t *ppos,
   9985				int (*createfn)(const char *))
   9986{
   9987	char *kbuf, *buf, *tmp;
   9988	int ret = 0;
   9989	size_t done = 0;
   9990	size_t size;
   9991
   9992	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
   9993	if (!kbuf)
   9994		return -ENOMEM;
   9995
   9996	while (done < count) {
   9997		size = count - done;
   9998
   9999		if (size >= WRITE_BUFSIZE)
  10000			size = WRITE_BUFSIZE - 1;
  10001
  10002		if (copy_from_user(kbuf, buffer + done, size)) {
  10003			ret = -EFAULT;
  10004			goto out;
  10005		}
  10006		kbuf[size] = '\0';
  10007		buf = kbuf;
  10008		do {
  10009			tmp = strchr(buf, '\n');
  10010			if (tmp) {
  10011				*tmp = '\0';
  10012				size = tmp - buf + 1;
  10013			} else {
  10014				size = strlen(buf);
  10015				if (done + size < count) {
  10016					if (buf != kbuf)
  10017						break;
  10018					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
  10019					pr_warn("Line length is too long: Should be less than %d\n",
  10020						WRITE_BUFSIZE - 2);
  10021					ret = -EINVAL;
  10022					goto out;
  10023				}
  10024			}
  10025			done += size;
  10026
  10027			/* Remove comments */
  10028			tmp = strchr(buf, '#');
  10029
  10030			if (tmp)
  10031				*tmp = '\0';
  10032
  10033			ret = createfn(buf);
  10034			if (ret)
  10035				goto out;
  10036			buf += size;
  10037
  10038		} while (done < count);
  10039	}
  10040	ret = done;
  10041
  10042out:
  10043	kfree(kbuf);
  10044
  10045	return ret;
  10046}
  10047
  10048__init static int tracer_alloc_buffers(void)
  10049{
  10050	int ring_buf_size;
  10051	int ret = -ENOMEM;
  10052
  10053
  10054	if (security_locked_down(LOCKDOWN_TRACEFS)) {
  10055		pr_warn("Tracing disabled due to lockdown\n");
  10056		return -EPERM;
  10057	}
  10058
  10059	/*
  10060	 * Make sure we don't accidentally add more trace options
  10061	 * than we have bits for.
  10062	 */
  10063	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
  10064
  10065	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
  10066		goto out;
  10067
  10068	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
  10069		goto out_free_buffer_mask;
  10070
  10071	/* Only allocate trace_printk buffers if a trace_printk exists */
  10072	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
  10073		/* Must be called before global_trace.buffer is allocated */
  10074		trace_printk_init_buffers();
  10075
  10076	/* To save memory, keep the ring buffer size to its minimum */
  10077	if (ring_buffer_expanded)
  10078		ring_buf_size = trace_buf_size;
  10079	else
  10080		ring_buf_size = 1;
  10081
  10082	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
  10083	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
  10084
  10085	raw_spin_lock_init(&global_trace.start_lock);
  10086
  10087	/*
  10088	 * The prepare callbacks allocates some memory for the ring buffer. We
  10089	 * don't free the buffer if the CPU goes down. If we were to free
  10090	 * the buffer, then the user would lose any trace that was in the
  10091	 * buffer. The memory will be removed once the "instance" is removed.
  10092	 */
  10093	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
  10094				      "trace/RB:preapre", trace_rb_cpu_prepare,
  10095				      NULL);
  10096	if (ret < 0)
  10097		goto out_free_cpumask;
  10098	/* Used for event triggers */
  10099	ret = -ENOMEM;
  10100	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
  10101	if (!temp_buffer)
  10102		goto out_rm_hp_state;
  10103
  10104	if (trace_create_savedcmd() < 0)
  10105		goto out_free_temp_buffer;
  10106
  10107	/* TODO: make the number of buffers hot pluggable with CPUS */
  10108	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
  10109		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
  10110		goto out_free_savedcmd;
  10111	}
  10112
  10113	if (global_trace.buffer_disabled)
  10114		tracing_off();
  10115
  10116	if (trace_boot_clock) {
  10117		ret = tracing_set_clock(&global_trace, trace_boot_clock);
  10118		if (ret < 0)
  10119			pr_warn("Trace clock %s not defined, going back to default\n",
  10120				trace_boot_clock);
  10121	}
  10122
  10123	/*
  10124	 * register_tracer() might reference current_trace, so it
  10125	 * needs to be set before we register anything. This is
  10126	 * just a bootstrap of current_trace anyway.
  10127	 */
  10128	global_trace.current_trace = &nop_trace;
  10129
  10130	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
  10131
  10132	ftrace_init_global_array_ops(&global_trace);
  10133
  10134	init_trace_flags_index(&global_trace);
  10135
  10136	register_tracer(&nop_trace);
  10137
  10138	/* Function tracing may start here (via kernel command line) */
  10139	init_function_trace();
  10140
  10141	/* All seems OK, enable tracing */
  10142	tracing_disabled = 0;
  10143
  10144	atomic_notifier_chain_register(&panic_notifier_list,
  10145				       &trace_panic_notifier);
  10146
  10147	register_die_notifier(&trace_die_notifier);
  10148
  10149	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
  10150
  10151	INIT_LIST_HEAD(&global_trace.systems);
  10152	INIT_LIST_HEAD(&global_trace.events);
  10153	INIT_LIST_HEAD(&global_trace.hist_vars);
  10154	INIT_LIST_HEAD(&global_trace.err_log);
  10155	list_add(&global_trace.list, &ftrace_trace_arrays);
  10156
  10157	apply_trace_boot_options();
  10158
  10159	register_snapshot_cmd();
  10160
  10161	test_can_verify();
  10162
  10163	return 0;
  10164
  10165out_free_savedcmd:
  10166	free_saved_cmdlines_buffer(savedcmd);
  10167out_free_temp_buffer:
  10168	ring_buffer_free(temp_buffer);
  10169out_rm_hp_state:
  10170	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
  10171out_free_cpumask:
  10172	free_cpumask_var(global_trace.tracing_cpumask);
  10173out_free_buffer_mask:
  10174	free_cpumask_var(tracing_buffer_mask);
  10175out:
  10176	return ret;
  10177}
  10178
  10179void __init ftrace_boot_snapshot(void)
  10180{
  10181	if (snapshot_at_boot) {
  10182		tracing_snapshot();
  10183		internal_trace_puts("** Boot snapshot taken **\n");
  10184	}
  10185}
  10186
  10187void __init early_trace_init(void)
  10188{
  10189	if (tracepoint_printk) {
  10190		tracepoint_print_iter =
  10191			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
  10192		if (MEM_FAIL(!tracepoint_print_iter,
  10193			     "Failed to allocate trace iterator\n"))
  10194			tracepoint_printk = 0;
  10195		else
  10196			static_key_enable(&tracepoint_printk_key.key);
  10197	}
  10198	tracer_alloc_buffers();
  10199}
  10200
  10201void __init trace_init(void)
  10202{
  10203	trace_event_init();
  10204}
  10205
  10206__init static void clear_boot_tracer(void)
  10207{
  10208	/*
  10209	 * The default tracer at boot buffer is an init section.
  10210	 * This function is called in lateinit. If we did not
  10211	 * find the boot tracer, then clear it out, to prevent
  10212	 * later registration from accessing the buffer that is
  10213	 * about to be freed.
  10214	 */
  10215	if (!default_bootup_tracer)
  10216		return;
  10217
  10218	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
  10219	       default_bootup_tracer);
  10220	default_bootup_tracer = NULL;
  10221}
  10222
  10223#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
  10224__init static void tracing_set_default_clock(void)
  10225{
  10226	/* sched_clock_stable() is determined in late_initcall */
  10227	if (!trace_boot_clock && !sched_clock_stable()) {
  10228		if (security_locked_down(LOCKDOWN_TRACEFS)) {
  10229			pr_warn("Can not set tracing clock due to lockdown\n");
  10230			return;
  10231		}
  10232
  10233		printk(KERN_WARNING
  10234		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
  10235		       "If you want to keep using the local clock, then add:\n"
  10236		       "  \"trace_clock=local\"\n"
  10237		       "on the kernel command line\n");
  10238		tracing_set_clock(&global_trace, "global");
  10239	}
  10240}
  10241#else
  10242static inline void tracing_set_default_clock(void) { }
  10243#endif
  10244
  10245__init static int late_trace_init(void)
  10246{
  10247	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
  10248		static_key_disable(&tracepoint_printk_key.key);
  10249		tracepoint_printk = 0;
  10250	}
  10251
  10252	tracing_set_default_clock();
  10253	clear_boot_tracer();
  10254	return 0;
  10255}
  10256
  10257late_initcall_sync(late_trace_init);