cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

trace_osnoise.c (60506B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * OS Noise Tracer: computes the OS Noise suffered by a running thread.
      4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
      5 *
      6 * Based on "hwlat_detector" tracer by:
      7 *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
      8 *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
      9 *   With feedback from Clark Williams <williams@redhat.com>
     10 *
     11 * And also based on the rtsl tracer presented on:
     12 *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
     13 *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
     14 *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
     15 *
     16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
     17 */
     18
     19#include <linux/kthread.h>
     20#include <linux/tracefs.h>
     21#include <linux/uaccess.h>
     22#include <linux/cpumask.h>
     23#include <linux/delay.h>
     24#include <linux/sched/clock.h>
     25#include <uapi/linux/sched/types.h>
     26#include <linux/sched.h>
     27#include "trace.h"
     28
     29#ifdef CONFIG_X86_LOCAL_APIC
     30#include <asm/trace/irq_vectors.h>
     31#undef TRACE_INCLUDE_PATH
     32#undef TRACE_INCLUDE_FILE
     33#endif /* CONFIG_X86_LOCAL_APIC */
     34
     35#include <trace/events/irq.h>
     36#include <trace/events/sched.h>
     37
     38#define CREATE_TRACE_POINTS
     39#include <trace/events/osnoise.h>
     40
     41/*
     42 * Default values.
     43 */
     44#define BANNER			"osnoise: "
     45#define DEFAULT_SAMPLE_PERIOD	1000000			/* 1s */
     46#define DEFAULT_SAMPLE_RUNTIME	1000000			/* 1s */
     47
     48#define DEFAULT_TIMERLAT_PERIOD	1000			/* 1ms */
     49#define DEFAULT_TIMERLAT_PRIO	95			/* FIFO 95 */
     50
     51/*
     52 * trace_array of the enabled osnoise/timerlat instances.
     53 */
     54struct osnoise_instance {
     55	struct list_head	list;
     56	struct trace_array	*tr;
     57};
     58
     59static struct list_head osnoise_instances;
     60
     61static bool osnoise_has_registered_instances(void)
     62{
     63	return !!list_first_or_null_rcu(&osnoise_instances,
     64					struct osnoise_instance,
     65					list);
     66}
     67
     68/*
     69 * osnoise_instance_registered - check if a tr is already registered
     70 */
     71static int osnoise_instance_registered(struct trace_array *tr)
     72{
     73	struct osnoise_instance *inst;
     74	int found = 0;
     75
     76	rcu_read_lock();
     77	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
     78		if (inst->tr == tr)
     79			found = 1;
     80	}
     81	rcu_read_unlock();
     82
     83	return found;
     84}
     85
     86/*
     87 * osnoise_register_instance - register a new trace instance
     88 *
     89 * Register a trace_array *tr in the list of instances running
     90 * osnoise/timerlat tracers.
     91 */
     92static int osnoise_register_instance(struct trace_array *tr)
     93{
     94	struct osnoise_instance *inst;
     95
     96	/*
     97	 * register/unregister serialization is provided by trace's
     98	 * trace_types_lock.
     99	 */
    100	lockdep_assert_held(&trace_types_lock);
    101
    102	inst = kmalloc(sizeof(*inst), GFP_KERNEL);
    103	if (!inst)
    104		return -ENOMEM;
    105
    106	INIT_LIST_HEAD_RCU(&inst->list);
    107	inst->tr = tr;
    108	list_add_tail_rcu(&inst->list, &osnoise_instances);
    109
    110	return 0;
    111}
    112
    113/*
    114 *  osnoise_unregister_instance - unregister a registered trace instance
    115 *
    116 * Remove the trace_array *tr from the list of instances running
    117 * osnoise/timerlat tracers.
    118 */
    119static void osnoise_unregister_instance(struct trace_array *tr)
    120{
    121	struct osnoise_instance *inst;
    122	int found = 0;
    123
    124	/*
    125	 * register/unregister serialization is provided by trace's
    126	 * trace_types_lock.
    127	 */
    128	lockdep_assert_held(&trace_types_lock);
    129
    130	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
    131		if (inst->tr == tr) {
    132			list_del_rcu(&inst->list);
    133			found = 1;
    134			break;
    135		}
    136	}
    137
    138	if (!found)
    139		return;
    140
    141	kvfree_rcu(inst);
    142}
    143
    144/*
    145 * NMI runtime info.
    146 */
    147struct osn_nmi {
    148	u64	count;
    149	u64	delta_start;
    150};
    151
    152/*
    153 * IRQ runtime info.
    154 */
    155struct osn_irq {
    156	u64	count;
    157	u64	arrival_time;
    158	u64	delta_start;
    159};
    160
    161#define IRQ_CONTEXT	0
    162#define THREAD_CONTEXT	1
    163/*
    164 * sofirq runtime info.
    165 */
    166struct osn_softirq {
    167	u64	count;
    168	u64	arrival_time;
    169	u64	delta_start;
    170};
    171
    172/*
    173 * thread runtime info.
    174 */
    175struct osn_thread {
    176	u64	count;
    177	u64	arrival_time;
    178	u64	delta_start;
    179};
    180
    181/*
    182 * Runtime information: this structure saves the runtime information used by
    183 * one sampling thread.
    184 */
    185struct osnoise_variables {
    186	struct task_struct	*kthread;
    187	bool			sampling;
    188	pid_t			pid;
    189	struct osn_nmi		nmi;
    190	struct osn_irq		irq;
    191	struct osn_softirq	softirq;
    192	struct osn_thread	thread;
    193	local_t			int_counter;
    194};
    195
    196/*
    197 * Per-cpu runtime information.
    198 */
    199DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
    200
    201/*
    202 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
    203 */
    204static inline struct osnoise_variables *this_cpu_osn_var(void)
    205{
    206	return this_cpu_ptr(&per_cpu_osnoise_var);
    207}
    208
    209#ifdef CONFIG_TIMERLAT_TRACER
    210/*
    211 * Runtime information for the timer mode.
    212 */
    213struct timerlat_variables {
    214	struct task_struct	*kthread;
    215	struct hrtimer		timer;
    216	u64			rel_period;
    217	u64			abs_period;
    218	bool			tracing_thread;
    219	u64			count;
    220};
    221
    222DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
    223
    224/*
    225 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
    226 */
    227static inline struct timerlat_variables *this_cpu_tmr_var(void)
    228{
    229	return this_cpu_ptr(&per_cpu_timerlat_var);
    230}
    231
    232/*
    233 * tlat_var_reset - Reset the values of the given timerlat_variables
    234 */
    235static inline void tlat_var_reset(void)
    236{
    237	struct timerlat_variables *tlat_var;
    238	int cpu;
    239	/*
    240	 * So far, all the values are initialized as 0, so
    241	 * zeroing the structure is perfect.
    242	 */
    243	for_each_cpu(cpu, cpu_online_mask) {
    244		tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
    245		memset(tlat_var, 0, sizeof(*tlat_var));
    246	}
    247}
    248#else /* CONFIG_TIMERLAT_TRACER */
    249#define tlat_var_reset()	do {} while (0)
    250#endif /* CONFIG_TIMERLAT_TRACER */
    251
    252/*
    253 * osn_var_reset - Reset the values of the given osnoise_variables
    254 */
    255static inline void osn_var_reset(void)
    256{
    257	struct osnoise_variables *osn_var;
    258	int cpu;
    259
    260	/*
    261	 * So far, all the values are initialized as 0, so
    262	 * zeroing the structure is perfect.
    263	 */
    264	for_each_cpu(cpu, cpu_online_mask) {
    265		osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
    266		memset(osn_var, 0, sizeof(*osn_var));
    267	}
    268}
    269
    270/*
    271 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
    272 */
    273static inline void osn_var_reset_all(void)
    274{
    275	osn_var_reset();
    276	tlat_var_reset();
    277}
    278
    279/*
    280 * Tells NMIs to call back to the osnoise tracer to record timestamps.
    281 */
    282bool trace_osnoise_callback_enabled;
    283
    284/*
    285 * osnoise sample structure definition. Used to store the statistics of a
    286 * sample run.
    287 */
    288struct osnoise_sample {
    289	u64			runtime;	/* runtime */
    290	u64			noise;		/* noise */
    291	u64			max_sample;	/* max single noise sample */
    292	int			hw_count;	/* # HW (incl. hypervisor) interference */
    293	int			nmi_count;	/* # NMIs during this sample */
    294	int			irq_count;	/* # IRQs during this sample */
    295	int			softirq_count;	/* # softirqs during this sample */
    296	int			thread_count;	/* # threads during this sample */
    297};
    298
    299#ifdef CONFIG_TIMERLAT_TRACER
    300/*
    301 * timerlat sample structure definition. Used to store the statistics of
    302 * a sample run.
    303 */
    304struct timerlat_sample {
    305	u64			timer_latency;	/* timer_latency */
    306	unsigned int		seqnum;		/* unique sequence */
    307	int			context;	/* timer context */
    308};
    309#endif
    310
    311/*
    312 * Protect the interface.
    313 */
    314struct mutex interface_lock;
    315
    316/*
    317 * Tracer data.
    318 */
    319static struct osnoise_data {
    320	u64	sample_period;		/* total sampling period */
    321	u64	sample_runtime;		/* active sampling portion of period */
    322	u64	stop_tracing;		/* stop trace in the internal operation (loop/irq) */
    323	u64	stop_tracing_total;	/* stop trace in the final operation (report/thread) */
    324#ifdef CONFIG_TIMERLAT_TRACER
    325	u64	timerlat_period;	/* timerlat period */
    326	u64	print_stack;		/* print IRQ stack if total > */
    327	int	timerlat_tracer;	/* timerlat tracer */
    328#endif
    329	bool	tainted;		/* infor users and developers about a problem */
    330} osnoise_data = {
    331	.sample_period			= DEFAULT_SAMPLE_PERIOD,
    332	.sample_runtime			= DEFAULT_SAMPLE_RUNTIME,
    333	.stop_tracing			= 0,
    334	.stop_tracing_total		= 0,
    335#ifdef CONFIG_TIMERLAT_TRACER
    336	.print_stack			= 0,
    337	.timerlat_period		= DEFAULT_TIMERLAT_PERIOD,
    338	.timerlat_tracer		= 0,
    339#endif
    340};
    341
    342#ifdef CONFIG_TIMERLAT_TRACER
    343static inline bool timerlat_enabled(void)
    344{
    345	return osnoise_data.timerlat_tracer;
    346}
    347
    348static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
    349{
    350	struct timerlat_variables *tlat_var = this_cpu_tmr_var();
    351	/*
    352	 * If the timerlat is enabled, but the irq handler did
    353	 * not run yet enabling timerlat_tracer, do not trace.
    354	 */
    355	if (!tlat_var->tracing_thread) {
    356		osn_var->softirq.arrival_time = 0;
    357		osn_var->softirq.delta_start = 0;
    358		return 0;
    359	}
    360	return 1;
    361}
    362
    363static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
    364{
    365	struct timerlat_variables *tlat_var = this_cpu_tmr_var();
    366	/*
    367	 * If the timerlat is enabled, but the irq handler did
    368	 * not run yet enabling timerlat_tracer, do not trace.
    369	 */
    370	if (!tlat_var->tracing_thread) {
    371		osn_var->thread.delta_start = 0;
    372		osn_var->thread.arrival_time = 0;
    373		return 0;
    374	}
    375	return 1;
    376}
    377#else /* CONFIG_TIMERLAT_TRACER */
    378static inline bool timerlat_enabled(void)
    379{
    380	return false;
    381}
    382
    383static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
    384{
    385	return 1;
    386}
    387static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
    388{
    389	return 1;
    390}
    391#endif
    392
    393#ifdef CONFIG_PREEMPT_RT
    394/*
    395 * Print the osnoise header info.
    396 */
    397static void print_osnoise_headers(struct seq_file *s)
    398{
    399	if (osnoise_data.tainted)
    400		seq_puts(s, "# osnoise is tainted!\n");
    401
    402	seq_puts(s, "#                                _-------=> irqs-off\n");
    403	seq_puts(s, "#                               / _------=> need-resched\n");
    404	seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
    405	seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
    406	seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
    407	seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
    408	seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
    409
    410	seq_puts(s, "#                              |||||| /          ");
    411	seq_puts(s, "                                     MAX\n");
    412
    413	seq_puts(s, "#                              ||||| /                         ");
    414	seq_puts(s, "                    SINGLE      Interference counters:\n");
    415
    416	seq_puts(s, "#                              |||||||               RUNTIME   ");
    417	seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
    418
    419	seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
    420	seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
    421
    422	seq_puts(s, "#              | |         |   |||||||      |           |      ");
    423	seq_puts(s, "       |    |            |      |      |      |      |      |\n");
    424}
    425#else /* CONFIG_PREEMPT_RT */
    426static void print_osnoise_headers(struct seq_file *s)
    427{
    428	if (osnoise_data.tainted)
    429		seq_puts(s, "# osnoise is tainted!\n");
    430
    431	seq_puts(s, "#                                _-----=> irqs-off\n");
    432	seq_puts(s, "#                               / _----=> need-resched\n");
    433	seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
    434	seq_puts(s, "#                              || / _--=> preempt-depth\n");
    435	seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
    436	seq_puts(s, "                    MAX\n");
    437	seq_puts(s, "#                              |||| /     delay               ");
    438	seq_puts(s, "                    SINGLE      Interference counters:\n");
    439
    440	seq_puts(s, "#                              |||||               RUNTIME   ");
    441	seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
    442
    443	seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
    444	seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
    445
    446	seq_puts(s, "#              | |         |   |||||      |           |      ");
    447	seq_puts(s, "       |    |            |      |      |      |      |      |\n");
    448}
    449#endif /* CONFIG_PREEMPT_RT */
    450
    451/*
    452 * osnoise_taint - report an osnoise error.
    453 */
    454#define osnoise_taint(msg) ({							\
    455	struct osnoise_instance *inst;						\
    456	struct trace_buffer *buffer;						\
    457										\
    458	rcu_read_lock();							\
    459	list_for_each_entry_rcu(inst, &osnoise_instances, list) {		\
    460		buffer = inst->tr->array_buffer.buffer;				\
    461		trace_array_printk_buf(buffer, _THIS_IP_, msg);			\
    462	}									\
    463	rcu_read_unlock();							\
    464	osnoise_data.tainted = true;						\
    465})
    466
    467/*
    468 * Record an osnoise_sample into the tracer buffer.
    469 */
    470static void
    471__trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
    472{
    473	struct trace_event_call *call = &event_osnoise;
    474	struct ring_buffer_event *event;
    475	struct osnoise_entry *entry;
    476
    477	event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
    478					  tracing_gen_ctx());
    479	if (!event)
    480		return;
    481	entry	= ring_buffer_event_data(event);
    482	entry->runtime		= sample->runtime;
    483	entry->noise		= sample->noise;
    484	entry->max_sample	= sample->max_sample;
    485	entry->hw_count		= sample->hw_count;
    486	entry->nmi_count	= sample->nmi_count;
    487	entry->irq_count	= sample->irq_count;
    488	entry->softirq_count	= sample->softirq_count;
    489	entry->thread_count	= sample->thread_count;
    490
    491	if (!call_filter_check_discard(call, entry, buffer, event))
    492		trace_buffer_unlock_commit_nostack(buffer, event);
    493}
    494
    495/*
    496 * Record an osnoise_sample on all osnoise instances.
    497 */
    498static void trace_osnoise_sample(struct osnoise_sample *sample)
    499{
    500	struct osnoise_instance *inst;
    501	struct trace_buffer *buffer;
    502
    503	rcu_read_lock();
    504	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
    505		buffer = inst->tr->array_buffer.buffer;
    506		__trace_osnoise_sample(sample, buffer);
    507	}
    508	rcu_read_unlock();
    509}
    510
    511#ifdef CONFIG_TIMERLAT_TRACER
    512/*
    513 * Print the timerlat header info.
    514 */
    515#ifdef CONFIG_PREEMPT_RT
    516static void print_timerlat_headers(struct seq_file *s)
    517{
    518	seq_puts(s, "#                                _-------=> irqs-off\n");
    519	seq_puts(s, "#                               / _------=> need-resched\n");
    520	seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
    521	seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
    522	seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
    523	seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
    524	seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
    525	seq_puts(s, "#                              |||||| /\n");
    526	seq_puts(s, "#                              |||||||             ACTIVATION\n");
    527	seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
    528	seq_puts(s, "       CONTEXT                LATENCY\n");
    529	seq_puts(s, "#              | |         |   |||||||      |         |      ");
    530	seq_puts(s, "            |                       |\n");
    531}
    532#else /* CONFIG_PREEMPT_RT */
    533static void print_timerlat_headers(struct seq_file *s)
    534{
    535	seq_puts(s, "#                                _-----=> irqs-off\n");
    536	seq_puts(s, "#                               / _----=> need-resched\n");
    537	seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
    538	seq_puts(s, "#                              || / _--=> preempt-depth\n");
    539	seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
    540	seq_puts(s, "#                              |||| /     delay\n");
    541	seq_puts(s, "#                              |||||            ACTIVATION\n");
    542	seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
    543	seq_puts(s, "      CONTEXT                 LATENCY\n");
    544	seq_puts(s, "#              | |         |   |||||      |         |      ");
    545	seq_puts(s, "            |                       |\n");
    546}
    547#endif /* CONFIG_PREEMPT_RT */
    548
    549static void
    550__trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
    551{
    552	struct trace_event_call *call = &event_osnoise;
    553	struct ring_buffer_event *event;
    554	struct timerlat_entry *entry;
    555
    556	event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
    557					  tracing_gen_ctx());
    558	if (!event)
    559		return;
    560	entry	= ring_buffer_event_data(event);
    561	entry->seqnum			= sample->seqnum;
    562	entry->context			= sample->context;
    563	entry->timer_latency		= sample->timer_latency;
    564
    565	if (!call_filter_check_discard(call, entry, buffer, event))
    566		trace_buffer_unlock_commit_nostack(buffer, event);
    567}
    568
    569/*
    570 * Record an timerlat_sample into the tracer buffer.
    571 */
    572static void trace_timerlat_sample(struct timerlat_sample *sample)
    573{
    574	struct osnoise_instance *inst;
    575	struct trace_buffer *buffer;
    576
    577	rcu_read_lock();
    578	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
    579		buffer = inst->tr->array_buffer.buffer;
    580		__trace_timerlat_sample(sample, buffer);
    581	}
    582	rcu_read_unlock();
    583}
    584
    585#ifdef CONFIG_STACKTRACE
    586
    587#define	MAX_CALLS	256
    588
    589/*
    590 * Stack trace will take place only at IRQ level, so, no need
    591 * to control nesting here.
    592 */
    593struct trace_stack {
    594	int		stack_size;
    595	int		nr_entries;
    596	unsigned long	calls[MAX_CALLS];
    597};
    598
    599static DEFINE_PER_CPU(struct trace_stack, trace_stack);
    600
    601/*
    602 * timerlat_save_stack - save a stack trace without printing
    603 *
    604 * Save the current stack trace without printing. The
    605 * stack will be printed later, after the end of the measurement.
    606 */
    607static void timerlat_save_stack(int skip)
    608{
    609	unsigned int size, nr_entries;
    610	struct trace_stack *fstack;
    611
    612	fstack = this_cpu_ptr(&trace_stack);
    613
    614	size = ARRAY_SIZE(fstack->calls);
    615
    616	nr_entries = stack_trace_save(fstack->calls, size, skip);
    617
    618	fstack->stack_size = nr_entries * sizeof(unsigned long);
    619	fstack->nr_entries = nr_entries;
    620
    621	return;
    622
    623}
    624
    625static void
    626__timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
    627{
    628	struct trace_event_call *call = &event_osnoise;
    629	struct ring_buffer_event *event;
    630	struct stack_entry *entry;
    631
    632	event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
    633					  tracing_gen_ctx());
    634	if (!event)
    635		return;
    636
    637	entry = ring_buffer_event_data(event);
    638
    639	memcpy(&entry->caller, fstack->calls, size);
    640	entry->size = fstack->nr_entries;
    641
    642	if (!call_filter_check_discard(call, entry, buffer, event))
    643		trace_buffer_unlock_commit_nostack(buffer, event);
    644}
    645
    646/*
    647 * timerlat_dump_stack - dump a stack trace previously saved
    648 */
    649static void timerlat_dump_stack(u64 latency)
    650{
    651	struct osnoise_instance *inst;
    652	struct trace_buffer *buffer;
    653	struct trace_stack *fstack;
    654	unsigned int size;
    655
    656	/*
    657	 * trace only if latency > print_stack config, if enabled.
    658	 */
    659	if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
    660		return;
    661
    662	preempt_disable_notrace();
    663	fstack = this_cpu_ptr(&trace_stack);
    664	size = fstack->stack_size;
    665
    666	rcu_read_lock();
    667	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
    668		buffer = inst->tr->array_buffer.buffer;
    669		__timerlat_dump_stack(buffer, fstack, size);
    670
    671	}
    672	rcu_read_unlock();
    673	preempt_enable_notrace();
    674}
    675#else /* CONFIG_STACKTRACE */
    676#define timerlat_dump_stack(u64 latency) do {} while (0)
    677#define timerlat_save_stack(a) do {} while (0)
    678#endif /* CONFIG_STACKTRACE */
    679#endif /* CONFIG_TIMERLAT_TRACER */
    680
    681/*
    682 * Macros to encapsulate the time capturing infrastructure.
    683 */
    684#define time_get()	trace_clock_local()
    685#define time_to_us(x)	div_u64(x, 1000)
    686#define time_sub(a, b)	((a) - (b))
    687
    688/*
    689 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
    690 *
    691 * If an IRQ is preempted by an NMI, its delta_start is pushed forward
    692 * to discount the NMI interference.
    693 *
    694 * See get_int_safe_duration().
    695 */
    696static inline void
    697cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
    698{
    699	if (osn_var->irq.delta_start)
    700		osn_var->irq.delta_start += duration;
    701}
    702
    703#ifndef CONFIG_PREEMPT_RT
    704/*
    705 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
    706 *
    707 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
    708 * forward to discount the interference.
    709 *
    710 * See get_int_safe_duration().
    711 */
    712static inline void
    713cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
    714{
    715	if (osn_var->softirq.delta_start)
    716		osn_var->softirq.delta_start += duration;
    717}
    718#else /* CONFIG_PREEMPT_RT */
    719#define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
    720#endif
    721
    722/*
    723 * cond_move_thread_delta_start - Forward the delta_start of a running thread
    724 *
    725 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
    726 * is pushed forward to discount the interference.
    727 *
    728 * See get_int_safe_duration().
    729 */
    730static inline void
    731cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
    732{
    733	if (osn_var->thread.delta_start)
    734		osn_var->thread.delta_start += duration;
    735}
    736
    737/*
    738 * get_int_safe_duration - Get the duration of a window
    739 *
    740 * The irq, softirq and thread varaibles need to have its duration without
    741 * the interference from higher priority interrupts. Instead of keeping a
    742 * variable to discount the interrupt interference from these variables, the
    743 * starting time of these variables are pushed forward with the interrupt's
    744 * duration. In this way, a single variable is used to:
    745 *
    746 *   - Know if a given window is being measured.
    747 *   - Account its duration.
    748 *   - Discount the interference.
    749 *
    750 * To avoid getting inconsistent values, e.g.,:
    751 *
    752 *	now = time_get()
    753 *		--->	interrupt!
    754 *			delta_start -= int duration;
    755 *		<---
    756 *	duration = now - delta_start;
    757 *
    758 *	result: negative duration if the variable duration before the
    759 *	interrupt was smaller than the interrupt execution.
    760 *
    761 * A counter of interrupts is used. If the counter increased, try
    762 * to capture an interference safe duration.
    763 */
    764static inline s64
    765get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
    766{
    767	u64 int_counter, now;
    768	s64 duration;
    769
    770	do {
    771		int_counter = local_read(&osn_var->int_counter);
    772		/* synchronize with interrupts */
    773		barrier();
    774
    775		now = time_get();
    776		duration = (now - *delta_start);
    777
    778		/* synchronize with interrupts */
    779		barrier();
    780	} while (int_counter != local_read(&osn_var->int_counter));
    781
    782	/*
    783	 * This is an evidence of race conditions that cause
    784	 * a value to be "discounted" too much.
    785	 */
    786	if (duration < 0)
    787		osnoise_taint("Negative duration!\n");
    788
    789	*delta_start = 0;
    790
    791	return duration;
    792}
    793
    794/*
    795 *
    796 * set_int_safe_time - Save the current time on *time, aware of interference
    797 *
    798 * Get the time, taking into consideration a possible interference from
    799 * higher priority interrupts.
    800 *
    801 * See get_int_safe_duration() for an explanation.
    802 */
    803static u64
    804set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
    805{
    806	u64 int_counter;
    807
    808	do {
    809		int_counter = local_read(&osn_var->int_counter);
    810		/* synchronize with interrupts */
    811		barrier();
    812
    813		*time = time_get();
    814
    815		/* synchronize with interrupts */
    816		barrier();
    817	} while (int_counter != local_read(&osn_var->int_counter));
    818
    819	return int_counter;
    820}
    821
    822#ifdef CONFIG_TIMERLAT_TRACER
    823/*
    824 * copy_int_safe_time - Copy *src into *desc aware of interference
    825 */
    826static u64
    827copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
    828{
    829	u64 int_counter;
    830
    831	do {
    832		int_counter = local_read(&osn_var->int_counter);
    833		/* synchronize with interrupts */
    834		barrier();
    835
    836		*dst = *src;
    837
    838		/* synchronize with interrupts */
    839		barrier();
    840	} while (int_counter != local_read(&osn_var->int_counter));
    841
    842	return int_counter;
    843}
    844#endif /* CONFIG_TIMERLAT_TRACER */
    845
    846/*
    847 * trace_osnoise_callback - NMI entry/exit callback
    848 *
    849 * This function is called at the entry and exit NMI code. The bool enter
    850 * distinguishes between either case. This function is used to note a NMI
    851 * occurrence, compute the noise caused by the NMI, and to remove the noise
    852 * it is potentially causing on other interference variables.
    853 */
    854void trace_osnoise_callback(bool enter)
    855{
    856	struct osnoise_variables *osn_var = this_cpu_osn_var();
    857	u64 duration;
    858
    859	if (!osn_var->sampling)
    860		return;
    861
    862	/*
    863	 * Currently trace_clock_local() calls sched_clock() and the
    864	 * generic version is not NMI safe.
    865	 */
    866	if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
    867		if (enter) {
    868			osn_var->nmi.delta_start = time_get();
    869			local_inc(&osn_var->int_counter);
    870		} else {
    871			duration = time_get() - osn_var->nmi.delta_start;
    872
    873			trace_nmi_noise(osn_var->nmi.delta_start, duration);
    874
    875			cond_move_irq_delta_start(osn_var, duration);
    876			cond_move_softirq_delta_start(osn_var, duration);
    877			cond_move_thread_delta_start(osn_var, duration);
    878		}
    879	}
    880
    881	if (enter)
    882		osn_var->nmi.count++;
    883}
    884
    885/*
    886 * osnoise_trace_irq_entry - Note the starting of an IRQ
    887 *
    888 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
    889 * it is safe to use a single variable (ons_var->irq) to save the statistics.
    890 * The arrival_time is used to report... the arrival time. The delta_start
    891 * is used to compute the duration at the IRQ exit handler. See
    892 * cond_move_irq_delta_start().
    893 */
    894void osnoise_trace_irq_entry(int id)
    895{
    896	struct osnoise_variables *osn_var = this_cpu_osn_var();
    897
    898	if (!osn_var->sampling)
    899		return;
    900	/*
    901	 * This value will be used in the report, but not to compute
    902	 * the execution time, so it is safe to get it unsafe.
    903	 */
    904	osn_var->irq.arrival_time = time_get();
    905	set_int_safe_time(osn_var, &osn_var->irq.delta_start);
    906	osn_var->irq.count++;
    907
    908	local_inc(&osn_var->int_counter);
    909}
    910
    911/*
    912 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
    913 *
    914 * Computes the duration of the IRQ noise, and trace it. Also discounts the
    915 * interference from other sources of noise could be currently being accounted.
    916 */
    917void osnoise_trace_irq_exit(int id, const char *desc)
    918{
    919	struct osnoise_variables *osn_var = this_cpu_osn_var();
    920	int duration;
    921
    922	if (!osn_var->sampling)
    923		return;
    924
    925	duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
    926	trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
    927	osn_var->irq.arrival_time = 0;
    928	cond_move_softirq_delta_start(osn_var, duration);
    929	cond_move_thread_delta_start(osn_var, duration);
    930}
    931
    932/*
    933 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
    934 *
    935 * Used to note the starting of an IRQ occurece.
    936 */
    937static void trace_irqentry_callback(void *data, int irq,
    938				    struct irqaction *action)
    939{
    940	osnoise_trace_irq_entry(irq);
    941}
    942
    943/*
    944 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
    945 *
    946 * Used to note the end of an IRQ occurece.
    947 */
    948static void trace_irqexit_callback(void *data, int irq,
    949				   struct irqaction *action, int ret)
    950{
    951	osnoise_trace_irq_exit(irq, action->name);
    952}
    953
    954/*
    955 * arch specific register function.
    956 */
    957int __weak osnoise_arch_register(void)
    958{
    959	return 0;
    960}
    961
    962/*
    963 * arch specific unregister function.
    964 */
    965void __weak osnoise_arch_unregister(void)
    966{
    967	return;
    968}
    969
    970/*
    971 * hook_irq_events - Hook IRQ handling events
    972 *
    973 * This function hooks the IRQ related callbacks to the respective trace
    974 * events.
    975 */
    976static int hook_irq_events(void)
    977{
    978	int ret;
    979
    980	ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
    981	if (ret)
    982		goto out_err;
    983
    984	ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
    985	if (ret)
    986		goto out_unregister_entry;
    987
    988	ret = osnoise_arch_register();
    989	if (ret)
    990		goto out_irq_exit;
    991
    992	return 0;
    993
    994out_irq_exit:
    995	unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
    996out_unregister_entry:
    997	unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
    998out_err:
    999	return -EINVAL;
   1000}
   1001
   1002/*
   1003 * unhook_irq_events - Unhook IRQ handling events
   1004 *
   1005 * This function unhooks the IRQ related callbacks to the respective trace
   1006 * events.
   1007 */
   1008static void unhook_irq_events(void)
   1009{
   1010	osnoise_arch_unregister();
   1011	unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
   1012	unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
   1013}
   1014
   1015#ifndef CONFIG_PREEMPT_RT
   1016/*
   1017 * trace_softirq_entry_callback - Note the starting of a softirq
   1018 *
   1019 * Save the starting time of a softirq. As softirqs are non-preemptive to
   1020 * other softirqs, it is safe to use a single variable (ons_var->softirq)
   1021 * to save the statistics. The arrival_time is used to report... the
   1022 * arrival time. The delta_start is used to compute the duration at the
   1023 * softirq exit handler. See cond_move_softirq_delta_start().
   1024 */
   1025static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
   1026{
   1027	struct osnoise_variables *osn_var = this_cpu_osn_var();
   1028
   1029	if (!osn_var->sampling)
   1030		return;
   1031	/*
   1032	 * This value will be used in the report, but not to compute
   1033	 * the execution time, so it is safe to get it unsafe.
   1034	 */
   1035	osn_var->softirq.arrival_time = time_get();
   1036	set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
   1037	osn_var->softirq.count++;
   1038
   1039	local_inc(&osn_var->int_counter);
   1040}
   1041
   1042/*
   1043 * trace_softirq_exit_callback - Note the end of an softirq
   1044 *
   1045 * Computes the duration of the softirq noise, and trace it. Also discounts the
   1046 * interference from other sources of noise could be currently being accounted.
   1047 */
   1048static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
   1049{
   1050	struct osnoise_variables *osn_var = this_cpu_osn_var();
   1051	int duration;
   1052
   1053	if (!osn_var->sampling)
   1054		return;
   1055
   1056	if (unlikely(timerlat_enabled()))
   1057		if (!timerlat_softirq_exit(osn_var))
   1058			return;
   1059
   1060	duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
   1061	trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
   1062	cond_move_thread_delta_start(osn_var, duration);
   1063	osn_var->softirq.arrival_time = 0;
   1064}
   1065
   1066/*
   1067 * hook_softirq_events - Hook softirq handling events
   1068 *
   1069 * This function hooks the softirq related callbacks to the respective trace
   1070 * events.
   1071 */
   1072static int hook_softirq_events(void)
   1073{
   1074	int ret;
   1075
   1076	ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
   1077	if (ret)
   1078		goto out_err;
   1079
   1080	ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
   1081	if (ret)
   1082		goto out_unreg_entry;
   1083
   1084	return 0;
   1085
   1086out_unreg_entry:
   1087	unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
   1088out_err:
   1089	return -EINVAL;
   1090}
   1091
   1092/*
   1093 * unhook_softirq_events - Unhook softirq handling events
   1094 *
   1095 * This function hooks the softirq related callbacks to the respective trace
   1096 * events.
   1097 */
   1098static void unhook_softirq_events(void)
   1099{
   1100	unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
   1101	unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
   1102}
   1103#else /* CONFIG_PREEMPT_RT */
   1104/*
   1105 * softirq are threads on the PREEMPT_RT mode.
   1106 */
   1107static int hook_softirq_events(void)
   1108{
   1109	return 0;
   1110}
   1111static void unhook_softirq_events(void)
   1112{
   1113}
   1114#endif
   1115
   1116/*
   1117 * thread_entry - Record the starting of a thread noise window
   1118 *
   1119 * It saves the context switch time for a noisy thread, and increments
   1120 * the interference counters.
   1121 */
   1122static void
   1123thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
   1124{
   1125	if (!osn_var->sampling)
   1126		return;
   1127	/*
   1128	 * The arrival time will be used in the report, but not to compute
   1129	 * the execution time, so it is safe to get it unsafe.
   1130	 */
   1131	osn_var->thread.arrival_time = time_get();
   1132
   1133	set_int_safe_time(osn_var, &osn_var->thread.delta_start);
   1134
   1135	osn_var->thread.count++;
   1136	local_inc(&osn_var->int_counter);
   1137}
   1138
   1139/*
   1140 * thread_exit - Report the end of a thread noise window
   1141 *
   1142 * It computes the total noise from a thread, tracing if needed.
   1143 */
   1144static void
   1145thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
   1146{
   1147	int duration;
   1148
   1149	if (!osn_var->sampling)
   1150		return;
   1151
   1152	if (unlikely(timerlat_enabled()))
   1153		if (!timerlat_thread_exit(osn_var))
   1154			return;
   1155
   1156	duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
   1157
   1158	trace_thread_noise(t, osn_var->thread.arrival_time, duration);
   1159
   1160	osn_var->thread.arrival_time = 0;
   1161}
   1162
   1163/*
   1164 * trace_sched_switch - sched:sched_switch trace event handler
   1165 *
   1166 * This function is hooked to the sched:sched_switch trace event, and it is
   1167 * used to record the beginning and to report the end of a thread noise window.
   1168 */
   1169static void
   1170trace_sched_switch_callback(void *data, bool preempt,
   1171			    struct task_struct *p,
   1172			    struct task_struct *n,
   1173			    unsigned int prev_state)
   1174{
   1175	struct osnoise_variables *osn_var = this_cpu_osn_var();
   1176
   1177	if (p->pid != osn_var->pid)
   1178		thread_exit(osn_var, p);
   1179
   1180	if (n->pid != osn_var->pid)
   1181		thread_entry(osn_var, n);
   1182}
   1183
   1184/*
   1185 * hook_thread_events - Hook the insturmentation for thread noise
   1186 *
   1187 * Hook the osnoise tracer callbacks to handle the noise from other
   1188 * threads on the necessary kernel events.
   1189 */
   1190static int hook_thread_events(void)
   1191{
   1192	int ret;
   1193
   1194	ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
   1195	if (ret)
   1196		return -EINVAL;
   1197
   1198	return 0;
   1199}
   1200
   1201/*
   1202 * unhook_thread_events - *nhook the insturmentation for thread noise
   1203 *
   1204 * Unook the osnoise tracer callbacks to handle the noise from other
   1205 * threads on the necessary kernel events.
   1206 */
   1207static void unhook_thread_events(void)
   1208{
   1209	unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
   1210}
   1211
   1212/*
   1213 * save_osn_sample_stats - Save the osnoise_sample statistics
   1214 *
   1215 * Save the osnoise_sample statistics before the sampling phase. These
   1216 * values will be used later to compute the diff betwneen the statistics
   1217 * before and after the osnoise sampling.
   1218 */
   1219static void
   1220save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
   1221{
   1222	s->nmi_count = osn_var->nmi.count;
   1223	s->irq_count = osn_var->irq.count;
   1224	s->softirq_count = osn_var->softirq.count;
   1225	s->thread_count = osn_var->thread.count;
   1226}
   1227
   1228/*
   1229 * diff_osn_sample_stats - Compute the osnoise_sample statistics
   1230 *
   1231 * After a sample period, compute the difference on the osnoise_sample
   1232 * statistics. The struct osnoise_sample *s contains the statistics saved via
   1233 * save_osn_sample_stats() before the osnoise sampling.
   1234 */
   1235static void
   1236diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
   1237{
   1238	s->nmi_count = osn_var->nmi.count - s->nmi_count;
   1239	s->irq_count = osn_var->irq.count - s->irq_count;
   1240	s->softirq_count = osn_var->softirq.count - s->softirq_count;
   1241	s->thread_count = osn_var->thread.count - s->thread_count;
   1242}
   1243
   1244/*
   1245 * osnoise_stop_tracing - Stop tracing and the tracer.
   1246 */
   1247static __always_inline void osnoise_stop_tracing(void)
   1248{
   1249	struct osnoise_instance *inst;
   1250	struct trace_array *tr;
   1251
   1252	rcu_read_lock();
   1253	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
   1254		tr = inst->tr;
   1255		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
   1256				"stop tracing hit on cpu %d\n", smp_processor_id());
   1257
   1258		tracer_tracing_off(tr);
   1259	}
   1260	rcu_read_unlock();
   1261}
   1262
   1263/*
   1264 * notify_new_max_latency - Notify a new max latency via fsnotify interface.
   1265 */
   1266static void notify_new_max_latency(u64 latency)
   1267{
   1268	struct osnoise_instance *inst;
   1269	struct trace_array *tr;
   1270
   1271	rcu_read_lock();
   1272	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
   1273		tr = inst->tr;
   1274		if (tr->max_latency < latency) {
   1275			tr->max_latency = latency;
   1276			latency_fsnotify(tr);
   1277		}
   1278	}
   1279	rcu_read_unlock();
   1280}
   1281
   1282/*
   1283 * run_osnoise - Sample the time and look for osnoise
   1284 *
   1285 * Used to capture the time, looking for potential osnoise latency repeatedly.
   1286 * Different from hwlat_detector, it is called with preemption and interrupts
   1287 * enabled. This allows irqs, softirqs and threads to run, interfering on the
   1288 * osnoise sampling thread, as they would do with a regular thread.
   1289 */
   1290static int run_osnoise(void)
   1291{
   1292	struct osnoise_variables *osn_var = this_cpu_osn_var();
   1293	u64 start, sample, last_sample;
   1294	u64 last_int_count, int_count;
   1295	s64 noise = 0, max_noise = 0;
   1296	s64 total, last_total = 0;
   1297	struct osnoise_sample s;
   1298	unsigned int threshold;
   1299	u64 runtime, stop_in;
   1300	u64 sum_noise = 0;
   1301	int hw_count = 0;
   1302	int ret = -1;
   1303
   1304	/*
   1305	 * Considers the current thread as the workload.
   1306	 */
   1307	osn_var->pid = current->pid;
   1308
   1309	/*
   1310	 * Save the current stats for the diff
   1311	 */
   1312	save_osn_sample_stats(osn_var, &s);
   1313
   1314	/*
   1315	 * if threshold is 0, use the default value of 5 us.
   1316	 */
   1317	threshold = tracing_thresh ? : 5000;
   1318
   1319	/*
   1320	 * Make sure NMIs see sampling first
   1321	 */
   1322	osn_var->sampling = true;
   1323	barrier();
   1324
   1325	/*
   1326	 * Transform the *_us config to nanoseconds to avoid the
   1327	 * division on the main loop.
   1328	 */
   1329	runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
   1330	stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
   1331
   1332	/*
   1333	 * Start timestemp
   1334	 */
   1335	start = time_get();
   1336
   1337	/*
   1338	 * "previous" loop.
   1339	 */
   1340	last_int_count = set_int_safe_time(osn_var, &last_sample);
   1341
   1342	do {
   1343		/*
   1344		 * Get sample!
   1345		 */
   1346		int_count = set_int_safe_time(osn_var, &sample);
   1347
   1348		noise = time_sub(sample, last_sample);
   1349
   1350		/*
   1351		 * This shouldn't happen.
   1352		 */
   1353		if (noise < 0) {
   1354			osnoise_taint("negative noise!");
   1355			goto out;
   1356		}
   1357
   1358		/*
   1359		 * Sample runtime.
   1360		 */
   1361		total = time_sub(sample, start);
   1362
   1363		/*
   1364		 * Check for possible overflows.
   1365		 */
   1366		if (total < last_total) {
   1367			osnoise_taint("total overflow!");
   1368			break;
   1369		}
   1370
   1371		last_total = total;
   1372
   1373		if (noise >= threshold) {
   1374			int interference = int_count - last_int_count;
   1375
   1376			if (noise > max_noise)
   1377				max_noise = noise;
   1378
   1379			if (!interference)
   1380				hw_count++;
   1381
   1382			sum_noise += noise;
   1383
   1384			trace_sample_threshold(last_sample, noise, interference);
   1385
   1386			if (osnoise_data.stop_tracing)
   1387				if (noise > stop_in)
   1388					osnoise_stop_tracing();
   1389		}
   1390
   1391		/*
   1392		 * In some cases, notably when running on a nohz_full CPU with
   1393		 * a stopped tick PREEMPT_RCU has no way to account for QSs.
   1394		 * This will eventually cause unwarranted noise as PREEMPT_RCU
   1395		 * will force preemption as the means of ending the current
   1396		 * grace period. We avoid this problem by calling
   1397		 * rcu_momentary_dyntick_idle(), which performs a zero duration
   1398		 * EQS allowing PREEMPT_RCU to end the current grace period.
   1399		 * This call shouldn't be wrapped inside an RCU critical
   1400		 * section.
   1401		 *
   1402		 * Note that in non PREEMPT_RCU kernels QSs are handled through
   1403		 * cond_resched()
   1404		 */
   1405		if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
   1406			local_irq_disable();
   1407			rcu_momentary_dyntick_idle();
   1408			local_irq_enable();
   1409		}
   1410
   1411		/*
   1412		 * For the non-preemptive kernel config: let threads runs, if
   1413		 * they so wish.
   1414		 */
   1415		cond_resched();
   1416
   1417		last_sample = sample;
   1418		last_int_count = int_count;
   1419
   1420	} while (total < runtime && !kthread_should_stop());
   1421
   1422	/*
   1423	 * Finish the above in the view for interrupts.
   1424	 */
   1425	barrier();
   1426
   1427	osn_var->sampling = false;
   1428
   1429	/*
   1430	 * Make sure sampling data is no longer updated.
   1431	 */
   1432	barrier();
   1433
   1434	/*
   1435	 * Save noise info.
   1436	 */
   1437	s.noise = time_to_us(sum_noise);
   1438	s.runtime = time_to_us(total);
   1439	s.max_sample = time_to_us(max_noise);
   1440	s.hw_count = hw_count;
   1441
   1442	/* Save interference stats info */
   1443	diff_osn_sample_stats(osn_var, &s);
   1444
   1445	trace_osnoise_sample(&s);
   1446
   1447	notify_new_max_latency(max_noise);
   1448
   1449	if (osnoise_data.stop_tracing_total)
   1450		if (s.noise > osnoise_data.stop_tracing_total)
   1451			osnoise_stop_tracing();
   1452
   1453	return 0;
   1454out:
   1455	return ret;
   1456}
   1457
   1458static struct cpumask osnoise_cpumask;
   1459static struct cpumask save_cpumask;
   1460
   1461/*
   1462 * osnoise_sleep - sleep until the next period
   1463 */
   1464static void osnoise_sleep(void)
   1465{
   1466	u64 interval;
   1467	ktime_t wake_time;
   1468
   1469	mutex_lock(&interface_lock);
   1470	interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
   1471	mutex_unlock(&interface_lock);
   1472
   1473	/*
   1474	 * differently from hwlat_detector, the osnoise tracer can run
   1475	 * without a pause because preemption is on.
   1476	 */
   1477	if (!interval) {
   1478		/* Let synchronize_rcu_tasks() make progress */
   1479		cond_resched_tasks_rcu_qs();
   1480		return;
   1481	}
   1482
   1483	wake_time = ktime_add_us(ktime_get(), interval);
   1484	__set_current_state(TASK_INTERRUPTIBLE);
   1485
   1486	while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) {
   1487		if (kthread_should_stop())
   1488			break;
   1489	}
   1490}
   1491
   1492/*
   1493 * osnoise_main - The osnoise detection kernel thread
   1494 *
   1495 * Calls run_osnoise() function to measure the osnoise for the configured runtime,
   1496 * every period.
   1497 */
   1498static int osnoise_main(void *data)
   1499{
   1500
   1501	while (!kthread_should_stop()) {
   1502		run_osnoise();
   1503		osnoise_sleep();
   1504	}
   1505
   1506	return 0;
   1507}
   1508
   1509#ifdef CONFIG_TIMERLAT_TRACER
   1510/*
   1511 * timerlat_irq - hrtimer handler for timerlat.
   1512 */
   1513static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
   1514{
   1515	struct osnoise_variables *osn_var = this_cpu_osn_var();
   1516	struct timerlat_variables *tlat;
   1517	struct timerlat_sample s;
   1518	u64 now;
   1519	u64 diff;
   1520
   1521	/*
   1522	 * I am not sure if the timer was armed for this CPU. So, get
   1523	 * the timerlat struct from the timer itself, not from this
   1524	 * CPU.
   1525	 */
   1526	tlat = container_of(timer, struct timerlat_variables, timer);
   1527
   1528	now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
   1529
   1530	/*
   1531	 * Enable the osnoise: events for thread an softirq.
   1532	 */
   1533	tlat->tracing_thread = true;
   1534
   1535	osn_var->thread.arrival_time = time_get();
   1536
   1537	/*
   1538	 * A hardirq is running: the timer IRQ. It is for sure preempting
   1539	 * a thread, and potentially preempting a softirq.
   1540	 *
   1541	 * At this point, it is not interesting to know the duration of the
   1542	 * preempted thread (and maybe softirq), but how much time they will
   1543	 * delay the beginning of the execution of the timer thread.
   1544	 *
   1545	 * To get the correct (net) delay added by the softirq, its delta_start
   1546	 * is set as the IRQ one. In this way, at the return of the IRQ, the delta
   1547	 * start of the sofitrq will be zeroed, accounting then only the time
   1548	 * after that.
   1549	 *
   1550	 * The thread follows the same principle. However, if a softirq is
   1551	 * running, the thread needs to receive the softirq delta_start. The
   1552	 * reason being is that the softirq will be the last to be unfolded,
   1553	 * resseting the thread delay to zero.
   1554	 *
   1555	 * The PREEMPT_RT is a special case, though. As softirqs run as threads
   1556	 * on RT, moving the thread is enough.
   1557	 */
   1558	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
   1559		copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
   1560				   &osn_var->softirq.delta_start);
   1561
   1562		copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
   1563				    &osn_var->irq.delta_start);
   1564	} else {
   1565		copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
   1566				    &osn_var->irq.delta_start);
   1567	}
   1568
   1569	/*
   1570	 * Compute the current time with the expected time.
   1571	 */
   1572	diff = now - tlat->abs_period;
   1573
   1574	tlat->count++;
   1575	s.seqnum = tlat->count;
   1576	s.timer_latency = diff;
   1577	s.context = IRQ_CONTEXT;
   1578
   1579	trace_timerlat_sample(&s);
   1580
   1581	if (osnoise_data.stop_tracing) {
   1582		if (time_to_us(diff) >= osnoise_data.stop_tracing) {
   1583
   1584			/*
   1585			 * At this point, if stop_tracing is set and <= print_stack,
   1586			 * print_stack is set and would be printed in the thread handler.
   1587			 *
   1588			 * Thus, print the stack trace as it is helpful to define the
   1589			 * root cause of an IRQ latency.
   1590			 */
   1591			if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
   1592				timerlat_save_stack(0);
   1593				timerlat_dump_stack(time_to_us(diff));
   1594			}
   1595
   1596			osnoise_stop_tracing();
   1597			notify_new_max_latency(diff);
   1598
   1599			return HRTIMER_NORESTART;
   1600		}
   1601	}
   1602
   1603	wake_up_process(tlat->kthread);
   1604
   1605	if (osnoise_data.print_stack)
   1606		timerlat_save_stack(0);
   1607
   1608	return HRTIMER_NORESTART;
   1609}
   1610
   1611/*
   1612 * wait_next_period - Wait for the next period for timerlat
   1613 */
   1614static int wait_next_period(struct timerlat_variables *tlat)
   1615{
   1616	ktime_t next_abs_period, now;
   1617	u64 rel_period = osnoise_data.timerlat_period * 1000;
   1618
   1619	now = hrtimer_cb_get_time(&tlat->timer);
   1620	next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
   1621
   1622	/*
   1623	 * Save the next abs_period.
   1624	 */
   1625	tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
   1626
   1627	/*
   1628	 * If the new abs_period is in the past, skip the activation.
   1629	 */
   1630	while (ktime_compare(now, next_abs_period) > 0) {
   1631		next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
   1632		tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
   1633	}
   1634
   1635	set_current_state(TASK_INTERRUPTIBLE);
   1636
   1637	hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
   1638	schedule();
   1639	return 1;
   1640}
   1641
   1642/*
   1643 * timerlat_main- Timerlat main
   1644 */
   1645static int timerlat_main(void *data)
   1646{
   1647	struct osnoise_variables *osn_var = this_cpu_osn_var();
   1648	struct timerlat_variables *tlat = this_cpu_tmr_var();
   1649	struct timerlat_sample s;
   1650	struct sched_param sp;
   1651	u64 now, diff;
   1652
   1653	/*
   1654	 * Make the thread RT, that is how cyclictest is usually used.
   1655	 */
   1656	sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
   1657	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
   1658
   1659	tlat->count = 0;
   1660	tlat->tracing_thread = false;
   1661
   1662	hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
   1663	tlat->timer.function = timerlat_irq;
   1664	tlat->kthread = current;
   1665	osn_var->pid = current->pid;
   1666	/*
   1667	 * Anotate the arrival time.
   1668	 */
   1669	tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
   1670
   1671	wait_next_period(tlat);
   1672
   1673	osn_var->sampling = 1;
   1674
   1675	while (!kthread_should_stop()) {
   1676		now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
   1677		diff = now - tlat->abs_period;
   1678
   1679		s.seqnum = tlat->count;
   1680		s.timer_latency = diff;
   1681		s.context = THREAD_CONTEXT;
   1682
   1683		trace_timerlat_sample(&s);
   1684
   1685		timerlat_dump_stack(time_to_us(diff));
   1686
   1687		tlat->tracing_thread = false;
   1688		if (osnoise_data.stop_tracing_total)
   1689			if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
   1690				osnoise_stop_tracing();
   1691
   1692		wait_next_period(tlat);
   1693	}
   1694
   1695	hrtimer_cancel(&tlat->timer);
   1696	return 0;
   1697}
   1698#else /* CONFIG_TIMERLAT_TRACER */
   1699static int timerlat_main(void *data)
   1700{
   1701	return 0;
   1702}
   1703#endif /* CONFIG_TIMERLAT_TRACER */
   1704
   1705/*
   1706 * stop_kthread - stop a workload thread
   1707 */
   1708static void stop_kthread(unsigned int cpu)
   1709{
   1710	struct task_struct *kthread;
   1711
   1712	kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
   1713	if (kthread)
   1714		kthread_stop(kthread);
   1715	per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
   1716}
   1717
   1718/*
   1719 * stop_per_cpu_kthread - Stop per-cpu threads
   1720 *
   1721 * Stop the osnoise sampling htread. Use this on unload and at system
   1722 * shutdown.
   1723 */
   1724static void stop_per_cpu_kthreads(void)
   1725{
   1726	int cpu;
   1727
   1728	cpus_read_lock();
   1729
   1730	for_each_online_cpu(cpu)
   1731		stop_kthread(cpu);
   1732
   1733	cpus_read_unlock();
   1734}
   1735
   1736/*
   1737 * start_kthread - Start a workload tread
   1738 */
   1739static int start_kthread(unsigned int cpu)
   1740{
   1741	struct task_struct *kthread;
   1742	void *main = osnoise_main;
   1743	char comm[24];
   1744
   1745	if (timerlat_enabled()) {
   1746		snprintf(comm, 24, "timerlat/%d", cpu);
   1747		main = timerlat_main;
   1748	} else {
   1749		snprintf(comm, 24, "osnoise/%d", cpu);
   1750	}
   1751
   1752	kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
   1753
   1754	if (IS_ERR(kthread)) {
   1755		pr_err(BANNER "could not start sampling thread\n");
   1756		stop_per_cpu_kthreads();
   1757		return -ENOMEM;
   1758	}
   1759
   1760	per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
   1761
   1762	return 0;
   1763}
   1764
   1765/*
   1766 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
   1767 *
   1768 * This starts the kernel thread that will look for osnoise on many
   1769 * cpus.
   1770 */
   1771static int start_per_cpu_kthreads(void)
   1772{
   1773	struct cpumask *current_mask = &save_cpumask;
   1774	int retval = 0;
   1775	int cpu;
   1776
   1777	cpus_read_lock();
   1778	/*
   1779	 * Run only on online CPUs in which osnoise is allowed to run.
   1780	 */
   1781	cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
   1782
   1783	for_each_possible_cpu(cpu)
   1784		per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
   1785
   1786	for_each_cpu(cpu, current_mask) {
   1787		retval = start_kthread(cpu);
   1788		if (retval) {
   1789			stop_per_cpu_kthreads();
   1790			break;
   1791		}
   1792	}
   1793
   1794	cpus_read_unlock();
   1795
   1796	return retval;
   1797}
   1798
   1799#ifdef CONFIG_HOTPLUG_CPU
   1800static void osnoise_hotplug_workfn(struct work_struct *dummy)
   1801{
   1802	unsigned int cpu = smp_processor_id();
   1803
   1804	mutex_lock(&trace_types_lock);
   1805
   1806	if (!osnoise_has_registered_instances())
   1807		goto out_unlock_trace;
   1808
   1809	mutex_lock(&interface_lock);
   1810	cpus_read_lock();
   1811
   1812	if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
   1813		goto out_unlock;
   1814
   1815	start_kthread(cpu);
   1816
   1817out_unlock:
   1818	cpus_read_unlock();
   1819	mutex_unlock(&interface_lock);
   1820out_unlock_trace:
   1821	mutex_unlock(&trace_types_lock);
   1822}
   1823
   1824static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
   1825
   1826/*
   1827 * osnoise_cpu_init - CPU hotplug online callback function
   1828 */
   1829static int osnoise_cpu_init(unsigned int cpu)
   1830{
   1831	schedule_work_on(cpu, &osnoise_hotplug_work);
   1832	return 0;
   1833}
   1834
   1835/*
   1836 * osnoise_cpu_die - CPU hotplug offline callback function
   1837 */
   1838static int osnoise_cpu_die(unsigned int cpu)
   1839{
   1840	stop_kthread(cpu);
   1841	return 0;
   1842}
   1843
   1844static void osnoise_init_hotplug_support(void)
   1845{
   1846	int ret;
   1847
   1848	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
   1849				osnoise_cpu_init, osnoise_cpu_die);
   1850	if (ret < 0)
   1851		pr_warn(BANNER "Error to init cpu hotplug support\n");
   1852
   1853	return;
   1854}
   1855#else /* CONFIG_HOTPLUG_CPU */
   1856static void osnoise_init_hotplug_support(void)
   1857{
   1858	return;
   1859}
   1860#endif /* CONFIG_HOTPLUG_CPU */
   1861
   1862/*
   1863 * osnoise_cpus_read - Read function for reading the "cpus" file
   1864 * @filp: The active open file structure
   1865 * @ubuf: The userspace provided buffer to read value into
   1866 * @cnt: The maximum number of bytes to read
   1867 * @ppos: The current "file" position
   1868 *
   1869 * Prints the "cpus" output into the user-provided buffer.
   1870 */
   1871static ssize_t
   1872osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
   1873		  loff_t *ppos)
   1874{
   1875	char *mask_str;
   1876	int len;
   1877
   1878	mutex_lock(&interface_lock);
   1879
   1880	len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
   1881	mask_str = kmalloc(len, GFP_KERNEL);
   1882	if (!mask_str) {
   1883		count = -ENOMEM;
   1884		goto out_unlock;
   1885	}
   1886
   1887	len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
   1888	if (len >= count) {
   1889		count = -EINVAL;
   1890		goto out_free;
   1891	}
   1892
   1893	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
   1894
   1895out_free:
   1896	kfree(mask_str);
   1897out_unlock:
   1898	mutex_unlock(&interface_lock);
   1899
   1900	return count;
   1901}
   1902
   1903/*
   1904 * osnoise_cpus_write - Write function for "cpus" entry
   1905 * @filp: The active open file structure
   1906 * @ubuf: The user buffer that contains the value to write
   1907 * @cnt: The maximum number of bytes to write to "file"
   1908 * @ppos: The current position in @file
   1909 *
   1910 * This function provides a write implementation for the "cpus"
   1911 * interface to the osnoise trace. By default, it lists all  CPUs,
   1912 * in this way, allowing osnoise threads to run on any online CPU
   1913 * of the system. It serves to restrict the execution of osnoise to the
   1914 * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
   1915 * Because the user might be interested in tracing what is running on
   1916 * other CPUs. For instance, one might run osnoise in one HT CPU
   1917 * while observing what is running on the sibling HT CPU.
   1918 */
   1919static ssize_t
   1920osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
   1921		   loff_t *ppos)
   1922{
   1923	cpumask_var_t osnoise_cpumask_new;
   1924	int running, err;
   1925	char buf[256];
   1926
   1927	if (count >= 256)
   1928		return -EINVAL;
   1929
   1930	if (copy_from_user(buf, ubuf, count))
   1931		return -EFAULT;
   1932
   1933	if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
   1934		return -ENOMEM;
   1935
   1936	err = cpulist_parse(buf, osnoise_cpumask_new);
   1937	if (err)
   1938		goto err_free;
   1939
   1940	/*
   1941	 * trace_types_lock is taken to avoid concurrency on start/stop.
   1942	 */
   1943	mutex_lock(&trace_types_lock);
   1944	running = osnoise_has_registered_instances();
   1945	if (running)
   1946		stop_per_cpu_kthreads();
   1947
   1948	mutex_lock(&interface_lock);
   1949	/*
   1950	 * osnoise_cpumask is read by CPU hotplug operations.
   1951	 */
   1952	cpus_read_lock();
   1953
   1954	cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
   1955
   1956	cpus_read_unlock();
   1957	mutex_unlock(&interface_lock);
   1958
   1959	if (running)
   1960		start_per_cpu_kthreads();
   1961	mutex_unlock(&trace_types_lock);
   1962
   1963	free_cpumask_var(osnoise_cpumask_new);
   1964	return count;
   1965
   1966err_free:
   1967	free_cpumask_var(osnoise_cpumask_new);
   1968
   1969	return err;
   1970}
   1971
   1972/*
   1973 * osnoise/runtime_us: cannot be greater than the period.
   1974 */
   1975static struct trace_min_max_param osnoise_runtime = {
   1976	.lock	= &interface_lock,
   1977	.val	= &osnoise_data.sample_runtime,
   1978	.max	= &osnoise_data.sample_period,
   1979	.min	= NULL,
   1980};
   1981
   1982/*
   1983 * osnoise/period_us: cannot be smaller than the runtime.
   1984 */
   1985static struct trace_min_max_param osnoise_period = {
   1986	.lock	= &interface_lock,
   1987	.val	= &osnoise_data.sample_period,
   1988	.max	= NULL,
   1989	.min	= &osnoise_data.sample_runtime,
   1990};
   1991
   1992/*
   1993 * osnoise/stop_tracing_us: no limit.
   1994 */
   1995static struct trace_min_max_param osnoise_stop_tracing_in = {
   1996	.lock	= &interface_lock,
   1997	.val	= &osnoise_data.stop_tracing,
   1998	.max	= NULL,
   1999	.min	= NULL,
   2000};
   2001
   2002/*
   2003 * osnoise/stop_tracing_total_us: no limit.
   2004 */
   2005static struct trace_min_max_param osnoise_stop_tracing_total = {
   2006	.lock	= &interface_lock,
   2007	.val	= &osnoise_data.stop_tracing_total,
   2008	.max	= NULL,
   2009	.min	= NULL,
   2010};
   2011
   2012#ifdef CONFIG_TIMERLAT_TRACER
   2013/*
   2014 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
   2015 * latency is higher than val.
   2016 */
   2017static struct trace_min_max_param osnoise_print_stack = {
   2018	.lock	= &interface_lock,
   2019	.val	= &osnoise_data.print_stack,
   2020	.max	= NULL,
   2021	.min	= NULL,
   2022};
   2023
   2024/*
   2025 * osnoise/timerlat_period: min 100 us, max 1 s
   2026 */
   2027u64 timerlat_min_period = 100;
   2028u64 timerlat_max_period = 1000000;
   2029static struct trace_min_max_param timerlat_period = {
   2030	.lock	= &interface_lock,
   2031	.val	= &osnoise_data.timerlat_period,
   2032	.max	= &timerlat_max_period,
   2033	.min	= &timerlat_min_period,
   2034};
   2035#endif
   2036
   2037static const struct file_operations cpus_fops = {
   2038	.open		= tracing_open_generic,
   2039	.read		= osnoise_cpus_read,
   2040	.write		= osnoise_cpus_write,
   2041	.llseek		= generic_file_llseek,
   2042};
   2043
   2044#ifdef CONFIG_TIMERLAT_TRACER
   2045#ifdef CONFIG_STACKTRACE
   2046static int init_timerlat_stack_tracefs(struct dentry *top_dir)
   2047{
   2048	struct dentry *tmp;
   2049
   2050	tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
   2051				  &osnoise_print_stack, &trace_min_max_fops);
   2052	if (!tmp)
   2053		return -ENOMEM;
   2054
   2055	return 0;
   2056}
   2057#else /* CONFIG_STACKTRACE */
   2058static int init_timerlat_stack_tracefs(struct dentry *top_dir)
   2059{
   2060	return 0;
   2061}
   2062#endif /* CONFIG_STACKTRACE */
   2063
   2064/*
   2065 * init_timerlat_tracefs - A function to initialize the timerlat interface files
   2066 */
   2067static int init_timerlat_tracefs(struct dentry *top_dir)
   2068{
   2069	struct dentry *tmp;
   2070
   2071	tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
   2072				  &timerlat_period, &trace_min_max_fops);
   2073	if (!tmp)
   2074		return -ENOMEM;
   2075
   2076	return init_timerlat_stack_tracefs(top_dir);
   2077}
   2078#else /* CONFIG_TIMERLAT_TRACER */
   2079static int init_timerlat_tracefs(struct dentry *top_dir)
   2080{
   2081	return 0;
   2082}
   2083#endif /* CONFIG_TIMERLAT_TRACER */
   2084
   2085/*
   2086 * init_tracefs - A function to initialize the tracefs interface files
   2087 *
   2088 * This function creates entries in tracefs for "osnoise" and "timerlat".
   2089 * It creates these directories in the tracing directory, and within that
   2090 * directory the use can change and view the configs.
   2091 */
   2092static int init_tracefs(void)
   2093{
   2094	struct dentry *top_dir;
   2095	struct dentry *tmp;
   2096	int ret;
   2097
   2098	ret = tracing_init_dentry();
   2099	if (ret)
   2100		return -ENOMEM;
   2101
   2102	top_dir = tracefs_create_dir("osnoise", NULL);
   2103	if (!top_dir)
   2104		return 0;
   2105
   2106	tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
   2107				  &osnoise_period, &trace_min_max_fops);
   2108	if (!tmp)
   2109		goto err;
   2110
   2111	tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
   2112				  &osnoise_runtime, &trace_min_max_fops);
   2113	if (!tmp)
   2114		goto err;
   2115
   2116	tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
   2117				  &osnoise_stop_tracing_in, &trace_min_max_fops);
   2118	if (!tmp)
   2119		goto err;
   2120
   2121	tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
   2122				  &osnoise_stop_tracing_total, &trace_min_max_fops);
   2123	if (!tmp)
   2124		goto err;
   2125
   2126	tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
   2127	if (!tmp)
   2128		goto err;
   2129
   2130	ret = init_timerlat_tracefs(top_dir);
   2131	if (ret)
   2132		goto err;
   2133
   2134	return 0;
   2135
   2136err:
   2137	tracefs_remove(top_dir);
   2138	return -ENOMEM;
   2139}
   2140
   2141static int osnoise_hook_events(void)
   2142{
   2143	int retval;
   2144
   2145	/*
   2146	 * Trace is already hooked, we are re-enabling from
   2147	 * a stop_tracing_*.
   2148	 */
   2149	if (trace_osnoise_callback_enabled)
   2150		return 0;
   2151
   2152	retval = hook_irq_events();
   2153	if (retval)
   2154		return -EINVAL;
   2155
   2156	retval = hook_softirq_events();
   2157	if (retval)
   2158		goto out_unhook_irq;
   2159
   2160	retval = hook_thread_events();
   2161	/*
   2162	 * All fine!
   2163	 */
   2164	if (!retval)
   2165		return 0;
   2166
   2167	unhook_softirq_events();
   2168out_unhook_irq:
   2169	unhook_irq_events();
   2170	return -EINVAL;
   2171}
   2172
   2173static void osnoise_unhook_events(void)
   2174{
   2175	unhook_thread_events();
   2176	unhook_softirq_events();
   2177	unhook_irq_events();
   2178}
   2179
   2180/*
   2181 * osnoise_workload_start - start the workload and hook to events
   2182 */
   2183static int osnoise_workload_start(void)
   2184{
   2185	int retval;
   2186
   2187	/*
   2188	 * Instances need to be registered after calling workload
   2189	 * start. Hence, if there is already an instance, the
   2190	 * workload was already registered. Otherwise, this
   2191	 * code is on the way to register the first instance,
   2192	 * and the workload will start.
   2193	 */
   2194	if (osnoise_has_registered_instances())
   2195		return 0;
   2196
   2197	osn_var_reset_all();
   2198
   2199	retval = osnoise_hook_events();
   2200	if (retval)
   2201		return retval;
   2202
   2203	/*
   2204	 * Make sure that ftrace_nmi_enter/exit() see reset values
   2205	 * before enabling trace_osnoise_callback_enabled.
   2206	 */
   2207	barrier();
   2208	trace_osnoise_callback_enabled = true;
   2209
   2210	retval = start_per_cpu_kthreads();
   2211	if (retval) {
   2212		trace_osnoise_callback_enabled = false;
   2213		/*
   2214		 * Make sure that ftrace_nmi_enter/exit() see
   2215		 * trace_osnoise_callback_enabled as false before continuing.
   2216		 */
   2217		barrier();
   2218
   2219		osnoise_unhook_events();
   2220		return retval;
   2221	}
   2222
   2223	return 0;
   2224}
   2225
   2226/*
   2227 * osnoise_workload_stop - stop the workload and unhook the events
   2228 */
   2229static void osnoise_workload_stop(void)
   2230{
   2231	/*
   2232	 * Instances need to be unregistered before calling
   2233	 * stop. Hence, if there is a registered instance, more
   2234	 * than one instance is running, and the workload will not
   2235	 * yet stop. Otherwise, this code is on the way to disable
   2236	 * the last instance, and the workload can stop.
   2237	 */
   2238	if (osnoise_has_registered_instances())
   2239		return;
   2240
   2241	/*
   2242	 * If callbacks were already disabled in a previous stop
   2243	 * call, there is no need to disable then again.
   2244	 *
   2245	 * For instance, this happens when tracing is stopped via:
   2246	 * echo 0 > tracing_on
   2247	 * echo nop > current_tracer.
   2248	 */
   2249	if (!trace_osnoise_callback_enabled)
   2250		return;
   2251
   2252	trace_osnoise_callback_enabled = false;
   2253	/*
   2254	 * Make sure that ftrace_nmi_enter/exit() see
   2255	 * trace_osnoise_callback_enabled as false before continuing.
   2256	 */
   2257	barrier();
   2258
   2259	stop_per_cpu_kthreads();
   2260
   2261	osnoise_unhook_events();
   2262}
   2263
   2264static void osnoise_tracer_start(struct trace_array *tr)
   2265{
   2266	int retval;
   2267
   2268	/*
   2269	 * If the instance is already registered, there is no need to
   2270	 * register it again.
   2271	 */
   2272	if (osnoise_instance_registered(tr))
   2273		return;
   2274
   2275	retval = osnoise_workload_start();
   2276	if (retval)
   2277		pr_err(BANNER "Error starting osnoise tracer\n");
   2278
   2279	osnoise_register_instance(tr);
   2280}
   2281
   2282static void osnoise_tracer_stop(struct trace_array *tr)
   2283{
   2284	osnoise_unregister_instance(tr);
   2285	osnoise_workload_stop();
   2286}
   2287
   2288static int osnoise_tracer_init(struct trace_array *tr)
   2289{
   2290	/*
   2291	 * Only allow osnoise tracer if timerlat tracer is not running
   2292	 * already.
   2293	 */
   2294	if (timerlat_enabled())
   2295		return -EBUSY;
   2296
   2297	tr->max_latency = 0;
   2298
   2299	osnoise_tracer_start(tr);
   2300	return 0;
   2301}
   2302
   2303static void osnoise_tracer_reset(struct trace_array *tr)
   2304{
   2305	osnoise_tracer_stop(tr);
   2306}
   2307
   2308static struct tracer osnoise_tracer __read_mostly = {
   2309	.name		= "osnoise",
   2310	.init		= osnoise_tracer_init,
   2311	.reset		= osnoise_tracer_reset,
   2312	.start		= osnoise_tracer_start,
   2313	.stop		= osnoise_tracer_stop,
   2314	.print_header	= print_osnoise_headers,
   2315	.allow_instances = true,
   2316};
   2317
   2318#ifdef CONFIG_TIMERLAT_TRACER
   2319static void timerlat_tracer_start(struct trace_array *tr)
   2320{
   2321	int retval;
   2322
   2323	/*
   2324	 * If the instance is already registered, there is no need to
   2325	 * register it again.
   2326	 */
   2327	if (osnoise_instance_registered(tr))
   2328		return;
   2329
   2330	retval = osnoise_workload_start();
   2331	if (retval)
   2332		pr_err(BANNER "Error starting timerlat tracer\n");
   2333
   2334	osnoise_register_instance(tr);
   2335
   2336	return;
   2337}
   2338
   2339static void timerlat_tracer_stop(struct trace_array *tr)
   2340{
   2341	int cpu;
   2342
   2343	osnoise_unregister_instance(tr);
   2344
   2345	/*
   2346	 * Instruct the threads to stop only if this is the last instance.
   2347	 */
   2348	if (!osnoise_has_registered_instances()) {
   2349		for_each_online_cpu(cpu)
   2350			per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
   2351	}
   2352
   2353	osnoise_workload_stop();
   2354}
   2355
   2356static int timerlat_tracer_init(struct trace_array *tr)
   2357{
   2358	/*
   2359	 * Only allow timerlat tracer if osnoise tracer is not running already.
   2360	 */
   2361	if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
   2362		return -EBUSY;
   2363
   2364	/*
   2365	 * If this is the first instance, set timerlat_tracer to block
   2366	 * osnoise tracer start.
   2367	 */
   2368	if (!osnoise_has_registered_instances())
   2369		osnoise_data.timerlat_tracer = 1;
   2370
   2371	tr->max_latency = 0;
   2372	timerlat_tracer_start(tr);
   2373
   2374	return 0;
   2375}
   2376
   2377static void timerlat_tracer_reset(struct trace_array *tr)
   2378{
   2379	timerlat_tracer_stop(tr);
   2380
   2381	/*
   2382	 * If this is the last instance, reset timerlat_tracer allowing
   2383	 * osnoise to be started.
   2384	 */
   2385	if (!osnoise_has_registered_instances())
   2386		osnoise_data.timerlat_tracer = 0;
   2387}
   2388
   2389static struct tracer timerlat_tracer __read_mostly = {
   2390	.name		= "timerlat",
   2391	.init		= timerlat_tracer_init,
   2392	.reset		= timerlat_tracer_reset,
   2393	.start		= timerlat_tracer_start,
   2394	.stop		= timerlat_tracer_stop,
   2395	.print_header	= print_timerlat_headers,
   2396	.allow_instances = true,
   2397};
   2398
   2399__init static int init_timerlat_tracer(void)
   2400{
   2401	return register_tracer(&timerlat_tracer);
   2402}
   2403#else /* CONFIG_TIMERLAT_TRACER */
   2404__init static int init_timerlat_tracer(void)
   2405{
   2406	return 0;
   2407}
   2408#endif /* CONFIG_TIMERLAT_TRACER */
   2409
   2410__init static int init_osnoise_tracer(void)
   2411{
   2412	int ret;
   2413
   2414	mutex_init(&interface_lock);
   2415
   2416	cpumask_copy(&osnoise_cpumask, cpu_all_mask);
   2417
   2418	ret = register_tracer(&osnoise_tracer);
   2419	if (ret) {
   2420		pr_err(BANNER "Error registering osnoise!\n");
   2421		return ret;
   2422	}
   2423
   2424	ret = init_timerlat_tracer();
   2425	if (ret) {
   2426		pr_err(BANNER "Error registering timerlat!\n");
   2427		return ret;
   2428	}
   2429
   2430	osnoise_init_hotplug_support();
   2431
   2432	INIT_LIST_HEAD_RCU(&osnoise_instances);
   2433
   2434	init_tracefs();
   2435
   2436	return 0;
   2437}
   2438late_initcall(init_osnoise_tracer);