cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

coresight-trbe.c (47809B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * This driver enables Trace Buffer Extension (TRBE) as a per-cpu coresight
      4 * sink device could then pair with an appropriate per-cpu coresight source
      5 * device (ETE) thus generating required trace data. Trace can be enabled
      6 * via the perf framework.
      7 *
      8 * The AUX buffer handling is inspired from Arm SPE PMU driver.
      9 *
     10 * Copyright (C) 2020 ARM Ltd.
     11 *
     12 * Author: Anshuman Khandual <anshuman.khandual@arm.com>
     13 */
     14#define DRVNAME "arm_trbe"
     15
     16#define pr_fmt(fmt) DRVNAME ": " fmt
     17
     18#include <asm/barrier.h>
     19#include <asm/cpufeature.h>
     20
     21#include "coresight-self-hosted-trace.h"
     22#include "coresight-trbe.h"
     23
     24#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
     25
     26/*
     27 * A padding packet that will help the user space tools
     28 * in skipping relevant sections in the captured trace
     29 * data which could not be decoded. TRBE doesn't support
     30 * formatting the trace data, unlike the legacy CoreSight
     31 * sinks and thus we use ETE trace packets to pad the
     32 * sections of the buffer.
     33 */
     34#define ETE_IGNORE_PACKET		0x70
     35
     36/*
     37 * Minimum amount of meaningful trace will contain:
     38 * A-Sync, Trace Info, Trace On, Address, Atom.
     39 * This is about 44bytes of ETE trace. To be on
     40 * the safer side, we assume 64bytes is the minimum
     41 * space required for a meaningful session, before
     42 * we hit a "WRAP" event.
     43 */
     44#define TRBE_TRACE_MIN_BUF_SIZE		64
     45
     46enum trbe_fault_action {
     47	TRBE_FAULT_ACT_WRAP,
     48	TRBE_FAULT_ACT_SPURIOUS,
     49	TRBE_FAULT_ACT_FATAL,
     50};
     51
     52struct trbe_buf {
     53	/*
     54	 * Even though trbe_base represents vmap()
     55	 * mapped allocated buffer's start address,
     56	 * it's being as unsigned long for various
     57	 * arithmetic and comparision operations &
     58	 * also to be consistent with trbe_write &
     59	 * trbe_limit sibling pointers.
     60	 */
     61	unsigned long trbe_base;
     62	/* The base programmed into the TRBE */
     63	unsigned long trbe_hw_base;
     64	unsigned long trbe_limit;
     65	unsigned long trbe_write;
     66	int nr_pages;
     67	void **pages;
     68	bool snapshot;
     69	struct trbe_cpudata *cpudata;
     70};
     71
     72/*
     73 * TRBE erratum list
     74 *
     75 * The errata are defined in arm64 generic cpu_errata framework.
     76 * Since the errata work arounds could be applied individually
     77 * to the affected CPUs inside the TRBE driver, we need to know if
     78 * a given CPU is affected by the erratum. Unlike the other erratum
     79 * work arounds, TRBE driver needs to check multiple times during
     80 * a trace session. Thus we need a quicker access to per-CPU
     81 * errata and not issue costly this_cpu_has_cap() everytime.
     82 * We keep a set of the affected errata in trbe_cpudata, per TRBE.
     83 *
     84 * We rely on the corresponding cpucaps to be defined for a given
     85 * TRBE erratum. We map the given cpucap into a TRBE internal number
     86 * to make the tracking of the errata lean.
     87 *
     88 * This helps in :
     89 *   - Not duplicating the detection logic
     90 *   - Streamlined detection of erratum across the system
     91 */
     92#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE	0
     93#define TRBE_WORKAROUND_WRITE_OUT_OF_RANGE	1
     94#define TRBE_NEEDS_DRAIN_AFTER_DISABLE		2
     95#define TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE	3
     96#define TRBE_IS_BROKEN				4
     97
     98static int trbe_errata_cpucaps[] = {
     99	[TRBE_WORKAROUND_OVERWRITE_FILL_MODE] = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
    100	[TRBE_WORKAROUND_WRITE_OUT_OF_RANGE] = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE,
    101	[TRBE_NEEDS_DRAIN_AFTER_DISABLE] = ARM64_WORKAROUND_2064142,
    102	[TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE] = ARM64_WORKAROUND_2038923,
    103	[TRBE_IS_BROKEN] = ARM64_WORKAROUND_1902691,
    104	-1,		/* Sentinel, must be the last entry */
    105};
    106
    107/* The total number of listed errata in trbe_errata_cpucaps */
    108#define TRBE_ERRATA_MAX			(ARRAY_SIZE(trbe_errata_cpucaps) - 1)
    109
    110/*
    111 * Safe limit for the number of bytes that may be overwritten
    112 * when ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE is triggered.
    113 */
    114#define TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES	256
    115
    116/*
    117 * struct trbe_cpudata: TRBE instance specific data
    118 * @trbe_flag		- TRBE dirty/access flag support
    119 * @trbe_hw_align	- Actual TRBE alignment required for TRBPTR_EL1.
    120 * @trbe_align		- Software alignment used for the TRBPTR_EL1.
    121 * @cpu			- CPU this TRBE belongs to.
    122 * @mode		- Mode of current operation. (perf/disabled)
    123 * @drvdata		- TRBE specific drvdata
    124 * @errata		- Bit map for the errata on this TRBE.
    125 */
    126struct trbe_cpudata {
    127	bool trbe_flag;
    128	u64 trbe_hw_align;
    129	u64 trbe_align;
    130	int cpu;
    131	enum cs_mode mode;
    132	struct trbe_buf *buf;
    133	struct trbe_drvdata *drvdata;
    134	DECLARE_BITMAP(errata, TRBE_ERRATA_MAX);
    135};
    136
    137struct trbe_drvdata {
    138	struct trbe_cpudata __percpu *cpudata;
    139	struct perf_output_handle * __percpu *handle;
    140	struct hlist_node hotplug_node;
    141	int irq;
    142	cpumask_t supported_cpus;
    143	enum cpuhp_state trbe_online;
    144	struct platform_device *pdev;
    145};
    146
    147static void trbe_check_errata(struct trbe_cpudata *cpudata)
    148{
    149	int i;
    150
    151	for (i = 0; i < TRBE_ERRATA_MAX; i++) {
    152		int cap = trbe_errata_cpucaps[i];
    153
    154		if (WARN_ON_ONCE(cap < 0))
    155			return;
    156		if (this_cpu_has_cap(cap))
    157			set_bit(i, cpudata->errata);
    158	}
    159}
    160
    161static inline bool trbe_has_erratum(struct trbe_cpudata *cpudata, int i)
    162{
    163	return (i < TRBE_ERRATA_MAX) && test_bit(i, cpudata->errata);
    164}
    165
    166static inline bool trbe_may_overwrite_in_fill_mode(struct trbe_cpudata *cpudata)
    167{
    168	return trbe_has_erratum(cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE);
    169}
    170
    171static inline bool trbe_may_write_out_of_range(struct trbe_cpudata *cpudata)
    172{
    173	return trbe_has_erratum(cpudata, TRBE_WORKAROUND_WRITE_OUT_OF_RANGE);
    174}
    175
    176static inline bool trbe_needs_drain_after_disable(struct trbe_cpudata *cpudata)
    177{
    178	/*
    179	 * Errata affected TRBE implementation will need TSB CSYNC and
    180	 * DSB in order to prevent subsequent writes into certain TRBE
    181	 * system registers from being ignored and not effected.
    182	 */
    183	return trbe_has_erratum(cpudata, TRBE_NEEDS_DRAIN_AFTER_DISABLE);
    184}
    185
    186static inline bool trbe_needs_ctxt_sync_after_enable(struct trbe_cpudata *cpudata)
    187{
    188	/*
    189	 * Errata affected TRBE implementation will need an additional
    190	 * context synchronization in order to prevent an inconsistent
    191	 * TRBE prohibited region view on the CPU which could possibly
    192	 * corrupt the TRBE buffer or the TRBE state.
    193	 */
    194	return trbe_has_erratum(cpudata, TRBE_NEEDS_CTXT_SYNC_AFTER_ENABLE);
    195}
    196
    197static inline bool trbe_is_broken(struct trbe_cpudata *cpudata)
    198{
    199	return trbe_has_erratum(cpudata, TRBE_IS_BROKEN);
    200}
    201
    202static int trbe_alloc_node(struct perf_event *event)
    203{
    204	if (event->cpu == -1)
    205		return NUMA_NO_NODE;
    206	return cpu_to_node(event->cpu);
    207}
    208
    209static inline void trbe_drain_buffer(void)
    210{
    211	tsb_csync();
    212	dsb(nsh);
    213}
    214
    215static inline void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr)
    216{
    217	/*
    218	 * Enable the TRBE without clearing LIMITPTR which
    219	 * might be required for fetching the buffer limits.
    220	 */
    221	trblimitr |= TRBLIMITR_ENABLE;
    222	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
    223
    224	/* Synchronize the TRBE enable event */
    225	isb();
    226
    227	if (trbe_needs_ctxt_sync_after_enable(cpudata))
    228		isb();
    229}
    230
    231static inline void set_trbe_disabled(struct trbe_cpudata *cpudata)
    232{
    233	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
    234
    235	/*
    236	 * Disable the TRBE without clearing LIMITPTR which
    237	 * might be required for fetching the buffer limits.
    238	 */
    239	trblimitr &= ~TRBLIMITR_ENABLE;
    240	write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
    241
    242	if (trbe_needs_drain_after_disable(cpudata))
    243		trbe_drain_buffer();
    244	isb();
    245}
    246
    247static void trbe_drain_and_disable_local(struct trbe_cpudata *cpudata)
    248{
    249	trbe_drain_buffer();
    250	set_trbe_disabled(cpudata);
    251}
    252
    253static void trbe_reset_local(struct trbe_cpudata *cpudata)
    254{
    255	trbe_drain_and_disable_local(cpudata);
    256	write_sysreg_s(0, SYS_TRBLIMITR_EL1);
    257	write_sysreg_s(0, SYS_TRBPTR_EL1);
    258	write_sysreg_s(0, SYS_TRBBASER_EL1);
    259	write_sysreg_s(0, SYS_TRBSR_EL1);
    260}
    261
    262static void trbe_report_wrap_event(struct perf_output_handle *handle)
    263{
    264	/*
    265	 * Mark the buffer to indicate that there was a WRAP event by
    266	 * setting the COLLISION flag. This indicates to the user that
    267	 * the TRBE trace collection was stopped without stopping the
    268	 * ETE and thus there might be some amount of trace that was
    269	 * lost between the time the WRAP was detected and the IRQ
    270	 * was consumed by the CPU.
    271	 *
    272	 * Setting the TRUNCATED flag would move the event to STOPPED
    273	 * state unnecessarily, even when there is space left in the
    274	 * ring buffer. Using the COLLISION flag doesn't have this side
    275	 * effect. We only set TRUNCATED flag when there is no space
    276	 * left in the ring buffer.
    277	 */
    278	perf_aux_output_flag(handle, PERF_AUX_FLAG_COLLISION);
    279}
    280
    281static void trbe_stop_and_truncate_event(struct perf_output_handle *handle)
    282{
    283	struct trbe_buf *buf = etm_perf_sink_config(handle);
    284
    285	/*
    286	 * We cannot proceed with the buffer collection and we
    287	 * do not have any data for the current session. The
    288	 * etm_perf driver expects to close out the aux_buffer
    289	 * at event_stop(). So disable the TRBE here and leave
    290	 * the update_buffer() to return a 0 size.
    291	 */
    292	trbe_drain_and_disable_local(buf->cpudata);
    293	perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
    294	perf_aux_output_end(handle, 0);
    295	*this_cpu_ptr(buf->cpudata->drvdata->handle) = NULL;
    296}
    297
    298/*
    299 * TRBE Buffer Management
    300 *
    301 * The TRBE buffer spans from the base pointer till the limit pointer. When enabled,
    302 * it starts writing trace data from the write pointer onward till the limit pointer.
    303 * When the write pointer reaches the address just before the limit pointer, it gets
    304 * wrapped around again to the base pointer. This is called a TRBE wrap event, which
    305 * generates a maintenance interrupt when operated in WRAP or FILL mode. This driver
    306 * uses FILL mode, where the TRBE stops the trace collection at wrap event. The IRQ
    307 * handler updates the AUX buffer and re-enables the TRBE with updated WRITE and
    308 * LIMIT pointers.
    309 *
    310 *	Wrap around with an IRQ
    311 *	------ < ------ < ------- < ----- < -----
    312 *	|					|
    313 *	------ > ------ > ------- > ----- > -----
    314 *
    315 *	+---------------+-----------------------+
    316 *	|		|			|
    317 *	+---------------+-----------------------+
    318 *	Base Pointer	Write Pointer		Limit Pointer
    319 *
    320 * The base and limit pointers always needs to be PAGE_SIZE aligned. But the write
    321 * pointer can be aligned to the implementation defined TRBE trace buffer alignment
    322 * as captured in trbe_cpudata->trbe_align.
    323 *
    324 *
    325 *		head		tail		wakeup
    326 *	+---------------------------------------+----- ~ ~ ------
    327 *	|$$$$$$$|################|$$$$$$$$$$$$$$|		|
    328 *	+---------------------------------------+----- ~ ~ ------
    329 *	Base Pointer	Write Pointer		Limit Pointer
    330 *
    331 * The perf_output_handle indices (head, tail, wakeup) are monotonically increasing
    332 * values which tracks all the driver writes and user reads from the perf auxiliary
    333 * buffer. Generally [head..tail] is the area where the driver can write into unless
    334 * the wakeup is behind the tail. Enabled TRBE buffer span needs to be adjusted and
    335 * configured depending on the perf_output_handle indices, so that the driver does
    336 * not override into areas in the perf auxiliary buffer which is being or yet to be
    337 * consumed from the user space. The enabled TRBE buffer area is a moving subset of
    338 * the allocated perf auxiliary buffer.
    339 */
    340
    341static void __trbe_pad_buf(struct trbe_buf *buf, u64 offset, int len)
    342{
    343	memset((void *)buf->trbe_base + offset, ETE_IGNORE_PACKET, len);
    344}
    345
    346static void trbe_pad_buf(struct perf_output_handle *handle, int len)
    347{
    348	struct trbe_buf *buf = etm_perf_sink_config(handle);
    349	u64 head = PERF_IDX2OFF(handle->head, buf);
    350
    351	__trbe_pad_buf(buf, head, len);
    352	if (!buf->snapshot)
    353		perf_aux_output_skip(handle, len);
    354}
    355
    356static unsigned long trbe_snapshot_offset(struct perf_output_handle *handle)
    357{
    358	struct trbe_buf *buf = etm_perf_sink_config(handle);
    359
    360	/*
    361	 * The ETE trace has alignment synchronization packets allowing
    362	 * the decoder to reset in case of an overflow or corruption.
    363	 * So we can use the entire buffer for the snapshot mode.
    364	 */
    365	return buf->nr_pages * PAGE_SIZE;
    366}
    367
    368static u64 trbe_min_trace_buf_size(struct perf_output_handle *handle)
    369{
    370	u64 size = TRBE_TRACE_MIN_BUF_SIZE;
    371	struct trbe_buf *buf = etm_perf_sink_config(handle);
    372	struct trbe_cpudata *cpudata = buf->cpudata;
    373
    374	/*
    375	 * When the TRBE is affected by an erratum that could make it
    376	 * write to the next "virtually addressed" page beyond the LIMIT.
    377	 * We need to make sure there is always a PAGE after the LIMIT,
    378	 * within the buffer. Thus we ensure there is at least an extra
    379	 * page than normal. With this we could then adjust the LIMIT
    380	 * pointer down by a PAGE later.
    381	 */
    382	if (trbe_may_write_out_of_range(cpudata))
    383		size += PAGE_SIZE;
    384	return size;
    385}
    386
    387/*
    388 * TRBE Limit Calculation
    389 *
    390 * The following markers are used to illustrate various TRBE buffer situations.
    391 *
    392 * $$$$ - Data area, unconsumed captured trace data, not to be overridden
    393 * #### - Free area, enabled, trace will be written
    394 * %%%% - Free area, disabled, trace will not be written
    395 * ==== - Free area, padded with ETE_IGNORE_PACKET, trace will be skipped
    396 */
    397static unsigned long __trbe_normal_offset(struct perf_output_handle *handle)
    398{
    399	struct trbe_buf *buf = etm_perf_sink_config(handle);
    400	struct trbe_cpudata *cpudata = buf->cpudata;
    401	const u64 bufsize = buf->nr_pages * PAGE_SIZE;
    402	u64 limit = bufsize;
    403	u64 head, tail, wakeup;
    404
    405	head = PERF_IDX2OFF(handle->head, buf);
    406
    407	/*
    408	 *		head
    409	 *	------->|
    410	 *	|
    411	 *	head	TRBE align	tail
    412	 * +----|-------|---------------|-------+
    413	 * |$$$$|=======|###############|$$$$$$$|
    414	 * +----|-------|---------------|-------+
    415	 * trbe_base				trbe_base + nr_pages
    416	 *
    417	 * Perf aux buffer output head position can be misaligned depending on
    418	 * various factors including user space reads. In case misaligned, head
    419	 * needs to be aligned before TRBE can be configured. Pad the alignment
    420	 * gap with ETE_IGNORE_PACKET bytes that will be ignored by user tools
    421	 * and skip this section thus advancing the head.
    422	 */
    423	if (!IS_ALIGNED(head, cpudata->trbe_align)) {
    424		unsigned long delta = roundup(head, cpudata->trbe_align) - head;
    425
    426		delta = min(delta, handle->size);
    427		trbe_pad_buf(handle, delta);
    428		head = PERF_IDX2OFF(handle->head, buf);
    429	}
    430
    431	/*
    432	 *	head = tail (size = 0)
    433	 * +----|-------------------------------+
    434	 * |$$$$|$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$	|
    435	 * +----|-------------------------------+
    436	 * trbe_base				trbe_base + nr_pages
    437	 *
    438	 * Perf aux buffer does not have any space for the driver to write into.
    439	 */
    440	if (!handle->size)
    441		return 0;
    442
    443	/* Compute the tail and wakeup indices now that we've aligned head */
    444	tail = PERF_IDX2OFF(handle->head + handle->size, buf);
    445	wakeup = PERF_IDX2OFF(handle->wakeup, buf);
    446
    447	/*
    448	 * Lets calculate the buffer area which TRBE could write into. There
    449	 * are three possible scenarios here. Limit needs to be aligned with
    450	 * PAGE_SIZE per the TRBE requirement. Always avoid clobbering the
    451	 * unconsumed data.
    452	 *
    453	 * 1) head < tail
    454	 *
    455	 *	head			tail
    456	 * +----|-----------------------|-------+
    457	 * |$$$$|#######################|$$$$$$$|
    458	 * +----|-----------------------|-------+
    459	 * trbe_base			limit	trbe_base + nr_pages
    460	 *
    461	 * TRBE could write into [head..tail] area. Unless the tail is right at
    462	 * the end of the buffer, neither an wrap around nor an IRQ is expected
    463	 * while being enabled.
    464	 *
    465	 * 2) head == tail
    466	 *
    467	 *	head = tail (size > 0)
    468	 * +----|-------------------------------+
    469	 * |%%%%|###############################|
    470	 * +----|-------------------------------+
    471	 * trbe_base				limit = trbe_base + nr_pages
    472	 *
    473	 * TRBE should just write into [head..base + nr_pages] area even though
    474	 * the entire buffer is empty. Reason being, when the trace reaches the
    475	 * end of the buffer, it will just wrap around with an IRQ giving an
    476	 * opportunity to reconfigure the buffer.
    477	 *
    478	 * 3) tail < head
    479	 *
    480	 *	tail			head
    481	 * +----|-----------------------|-------+
    482	 * |%%%%|$$$$$$$$$$$$$$$$$$$$$$$|#######|
    483	 * +----|-----------------------|-------+
    484	 * trbe_base				limit = trbe_base + nr_pages
    485	 *
    486	 * TRBE should just write into [head..base + nr_pages] area even though
    487	 * the [trbe_base..tail] is also empty. Reason being, when the trace
    488	 * reaches the end of the buffer, it will just wrap around with an IRQ
    489	 * giving an opportunity to reconfigure the buffer.
    490	 */
    491	if (head < tail)
    492		limit = round_down(tail, PAGE_SIZE);
    493
    494	/*
    495	 * Wakeup may be arbitrarily far into the future. If it's not in the
    496	 * current generation, either we'll wrap before hitting it, or it's
    497	 * in the past and has been handled already.
    498	 *
    499	 * If there's a wakeup before we wrap, arrange to be woken up by the
    500	 * page boundary following it. Keep the tail boundary if that's lower.
    501	 *
    502	 *	head		wakeup	tail
    503	 * +----|---------------|-------|-------+
    504	 * |$$$$|###############|%%%%%%%|$$$$$$$|
    505	 * +----|---------------|-------|-------+
    506	 * trbe_base		limit		trbe_base + nr_pages
    507	 */
    508	if (handle->wakeup < (handle->head + handle->size) && head <= wakeup)
    509		limit = min(limit, round_up(wakeup, PAGE_SIZE));
    510
    511	/*
    512	 * There are two situation when this can happen i.e limit is before
    513	 * the head and hence TRBE cannot be configured.
    514	 *
    515	 * 1) head < tail (aligned down with PAGE_SIZE) and also they are both
    516	 * within the same PAGE size range.
    517	 *
    518	 *			PAGE_SIZE
    519	 *		|----------------------|
    520	 *
    521	 *		limit	head	tail
    522	 * +------------|------|--------|-------+
    523	 * |$$$$$$$$$$$$$$$$$$$|========|$$$$$$$|
    524	 * +------------|------|--------|-------+
    525	 * trbe_base				trbe_base + nr_pages
    526	 *
    527	 * 2) head < wakeup (aligned up with PAGE_SIZE) < tail and also both
    528	 * head and wakeup are within same PAGE size range.
    529	 *
    530	 *		PAGE_SIZE
    531	 *	|----------------------|
    532	 *
    533	 *	limit	head	wakeup  tail
    534	 * +----|------|-------|--------|-------+
    535	 * |$$$$$$$$$$$|=======|========|$$$$$$$|
    536	 * +----|------|-------|--------|-------+
    537	 * trbe_base				trbe_base + nr_pages
    538	 */
    539	if (limit > head)
    540		return limit;
    541
    542	trbe_pad_buf(handle, handle->size);
    543	return 0;
    544}
    545
    546static unsigned long trbe_normal_offset(struct perf_output_handle *handle)
    547{
    548	struct trbe_buf *buf = etm_perf_sink_config(handle);
    549	u64 limit = __trbe_normal_offset(handle);
    550	u64 head = PERF_IDX2OFF(handle->head, buf);
    551
    552	/*
    553	 * If the head is too close to the limit and we don't
    554	 * have space for a meaningful run, we rather pad it
    555	 * and start fresh.
    556	 *
    557	 * We might have to do this more than once to make sure
    558	 * we have enough required space.
    559	 */
    560	while (limit && ((limit - head) < trbe_min_trace_buf_size(handle))) {
    561		trbe_pad_buf(handle, limit - head);
    562		limit = __trbe_normal_offset(handle);
    563		head = PERF_IDX2OFF(handle->head, buf);
    564	}
    565	return limit;
    566}
    567
    568static unsigned long compute_trbe_buffer_limit(struct perf_output_handle *handle)
    569{
    570	struct trbe_buf *buf = etm_perf_sink_config(handle);
    571	unsigned long offset;
    572
    573	if (buf->snapshot)
    574		offset = trbe_snapshot_offset(handle);
    575	else
    576		offset = trbe_normal_offset(handle);
    577	return buf->trbe_base + offset;
    578}
    579
    580static void clr_trbe_status(void)
    581{
    582	u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
    583
    584	WARN_ON(is_trbe_enabled());
    585	trbsr &= ~TRBSR_IRQ;
    586	trbsr &= ~TRBSR_TRG;
    587	trbsr &= ~TRBSR_WRAP;
    588	trbsr &= ~(TRBSR_EC_MASK << TRBSR_EC_SHIFT);
    589	trbsr &= ~(TRBSR_BSC_MASK << TRBSR_BSC_SHIFT);
    590	trbsr &= ~TRBSR_STOP;
    591	write_sysreg_s(trbsr, SYS_TRBSR_EL1);
    592}
    593
    594static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf)
    595{
    596	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
    597	unsigned long addr = buf->trbe_limit;
    598
    599	WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_LIMIT_SHIFT)));
    600	WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
    601
    602	trblimitr &= ~TRBLIMITR_NVM;
    603	trblimitr &= ~(TRBLIMITR_FILL_MODE_MASK << TRBLIMITR_FILL_MODE_SHIFT);
    604	trblimitr &= ~(TRBLIMITR_TRIG_MODE_MASK << TRBLIMITR_TRIG_MODE_SHIFT);
    605	trblimitr &= ~(TRBLIMITR_LIMIT_MASK << TRBLIMITR_LIMIT_SHIFT);
    606
    607	/*
    608	 * Fill trace buffer mode is used here while configuring the
    609	 * TRBE for trace capture. In this particular mode, the trace
    610	 * collection is stopped and a maintenance interrupt is raised
    611	 * when the current write pointer wraps. This pause in trace
    612	 * collection gives the software an opportunity to capture the
    613	 * trace data in the interrupt handler, before reconfiguring
    614	 * the TRBE.
    615	 */
    616	trblimitr |= (TRBE_FILL_MODE_FILL & TRBLIMITR_FILL_MODE_MASK) << TRBLIMITR_FILL_MODE_SHIFT;
    617
    618	/*
    619	 * Trigger mode is not used here while configuring the TRBE for
    620	 * the trace capture. Hence just keep this in the ignore mode.
    621	 */
    622	trblimitr |= (TRBE_TRIG_MODE_IGNORE & TRBLIMITR_TRIG_MODE_MASK) <<
    623		      TRBLIMITR_TRIG_MODE_SHIFT;
    624	trblimitr |= (addr & PAGE_MASK);
    625	set_trbe_enabled(buf->cpudata, trblimitr);
    626}
    627
    628static void trbe_enable_hw(struct trbe_buf *buf)
    629{
    630	WARN_ON(buf->trbe_hw_base < buf->trbe_base);
    631	WARN_ON(buf->trbe_write < buf->trbe_hw_base);
    632	WARN_ON(buf->trbe_write >= buf->trbe_limit);
    633	set_trbe_disabled(buf->cpudata);
    634	clr_trbe_status();
    635	set_trbe_base_pointer(buf->trbe_hw_base);
    636	set_trbe_write_pointer(buf->trbe_write);
    637
    638	/*
    639	 * Synchronize all the register updates
    640	 * till now before enabling the TRBE.
    641	 */
    642	isb();
    643	set_trbe_limit_pointer_enabled(buf);
    644}
    645
    646static enum trbe_fault_action trbe_get_fault_act(struct perf_output_handle *handle,
    647						 u64 trbsr)
    648{
    649	int ec = get_trbe_ec(trbsr);
    650	int bsc = get_trbe_bsc(trbsr);
    651	struct trbe_buf *buf = etm_perf_sink_config(handle);
    652	struct trbe_cpudata *cpudata = buf->cpudata;
    653
    654	WARN_ON(is_trbe_running(trbsr));
    655	if (is_trbe_trg(trbsr) || is_trbe_abort(trbsr))
    656		return TRBE_FAULT_ACT_FATAL;
    657
    658	if ((ec == TRBE_EC_STAGE1_ABORT) || (ec == TRBE_EC_STAGE2_ABORT))
    659		return TRBE_FAULT_ACT_FATAL;
    660
    661	/*
    662	 * If the trbe is affected by TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
    663	 * it might write data after a WRAP event in the fill mode.
    664	 * Thus the check TRBPTR == TRBBASER will not be honored.
    665	 */
    666	if ((is_trbe_wrap(trbsr) && (ec == TRBE_EC_OTHERS) && (bsc == TRBE_BSC_FILLED)) &&
    667	    (trbe_may_overwrite_in_fill_mode(cpudata) ||
    668	     get_trbe_write_pointer() == get_trbe_base_pointer()))
    669		return TRBE_FAULT_ACT_WRAP;
    670
    671	return TRBE_FAULT_ACT_SPURIOUS;
    672}
    673
    674static unsigned long trbe_get_trace_size(struct perf_output_handle *handle,
    675					 struct trbe_buf *buf, bool wrap)
    676{
    677	u64 write;
    678	u64 start_off, end_off;
    679	u64 size;
    680	u64 overwrite_skip = TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
    681
    682	/*
    683	 * If the TRBE has wrapped around the write pointer has
    684	 * wrapped and should be treated as limit.
    685	 *
    686	 * When the TRBE is affected by TRBE_WORKAROUND_WRITE_OUT_OF_RANGE,
    687	 * it may write upto 64bytes beyond the "LIMIT". The driver already
    688	 * keeps a valid page next to the LIMIT and we could potentially
    689	 * consume the trace data that may have been collected there. But we
    690	 * cannot be really sure it is available, and the TRBPTR may not
    691	 * indicate the same. Also, affected cores are also affected by another
    692	 * erratum which forces the PAGE_SIZE alignment on the TRBPTR, and thus
    693	 * could potentially pad an entire PAGE_SIZE - 64bytes, to get those
    694	 * 64bytes. Thus we ignore the potential triggering of the erratum
    695	 * on WRAP and limit the data to LIMIT.
    696	 */
    697	if (wrap)
    698		write = get_trbe_limit_pointer();
    699	else
    700		write = get_trbe_write_pointer();
    701
    702	/*
    703	 * TRBE may use a different base address than the base
    704	 * of the ring buffer. Thus use the beginning of the ring
    705	 * buffer to compute the offsets.
    706	 */
    707	end_off = write - buf->trbe_base;
    708	start_off = PERF_IDX2OFF(handle->head, buf);
    709
    710	if (WARN_ON_ONCE(end_off < start_off))
    711		return 0;
    712
    713	size = end_off - start_off;
    714	/*
    715	 * If the TRBE is affected by the following erratum, we must fill
    716	 * the space we skipped with IGNORE packets. And we are always
    717	 * guaranteed to have at least a PAGE_SIZE space in the buffer.
    718	 */
    719	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE) &&
    720	    !WARN_ON(size < overwrite_skip))
    721		__trbe_pad_buf(buf, start_off, overwrite_skip);
    722
    723	return size;
    724}
    725
    726static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
    727				   struct perf_event *event, void **pages,
    728				   int nr_pages, bool snapshot)
    729{
    730	struct trbe_buf *buf;
    731	struct page **pglist;
    732	int i;
    733
    734	/*
    735	 * TRBE LIMIT and TRBE WRITE pointers must be page aligned. But with
    736	 * just a single page, there would not be any room left while writing
    737	 * into a partially filled TRBE buffer after the page size alignment.
    738	 * Hence restrict the minimum buffer size as two pages.
    739	 */
    740	if (nr_pages < 2)
    741		return NULL;
    742
    743	buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, trbe_alloc_node(event));
    744	if (!buf)
    745		return ERR_PTR(-ENOMEM);
    746
    747	pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
    748	if (!pglist) {
    749		kfree(buf);
    750		return ERR_PTR(-ENOMEM);
    751	}
    752
    753	for (i = 0; i < nr_pages; i++)
    754		pglist[i] = virt_to_page(pages[i]);
    755
    756	buf->trbe_base = (unsigned long)vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
    757	if (!buf->trbe_base) {
    758		kfree(pglist);
    759		kfree(buf);
    760		return ERR_PTR(-ENOMEM);
    761	}
    762	buf->trbe_limit = buf->trbe_base + nr_pages * PAGE_SIZE;
    763	buf->trbe_write = buf->trbe_base;
    764	buf->snapshot = snapshot;
    765	buf->nr_pages = nr_pages;
    766	buf->pages = pages;
    767	kfree(pglist);
    768	return buf;
    769}
    770
    771static void arm_trbe_free_buffer(void *config)
    772{
    773	struct trbe_buf *buf = config;
    774
    775	vunmap((void *)buf->trbe_base);
    776	kfree(buf);
    777}
    778
    779static unsigned long arm_trbe_update_buffer(struct coresight_device *csdev,
    780					    struct perf_output_handle *handle,
    781					    void *config)
    782{
    783	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
    784	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
    785	struct trbe_buf *buf = config;
    786	enum trbe_fault_action act;
    787	unsigned long size, status;
    788	unsigned long flags;
    789	bool wrap = false;
    790
    791	WARN_ON(buf->cpudata != cpudata);
    792	WARN_ON(cpudata->cpu != smp_processor_id());
    793	WARN_ON(cpudata->drvdata != drvdata);
    794	if (cpudata->mode != CS_MODE_PERF)
    795		return 0;
    796
    797	/*
    798	 * We are about to disable the TRBE. And this could in turn
    799	 * fill up the buffer triggering, an IRQ. This could be consumed
    800	 * by the PE asynchronously, causing a race here against
    801	 * the IRQ handler in closing out the handle. So, let us
    802	 * make sure the IRQ can't trigger while we are collecting
    803	 * the buffer. We also make sure that a WRAP event is handled
    804	 * accordingly.
    805	 */
    806	local_irq_save(flags);
    807
    808	/*
    809	 * If the TRBE was disabled due to lack of space in the AUX buffer or a
    810	 * spurious fault, the driver leaves it disabled, truncating the buffer.
    811	 * Since the etm_perf driver expects to close out the AUX buffer, the
    812	 * driver skips it. Thus, just pass in 0 size here to indicate that the
    813	 * buffer was truncated.
    814	 */
    815	if (!is_trbe_enabled()) {
    816		size = 0;
    817		goto done;
    818	}
    819	/*
    820	 * perf handle structure needs to be shared with the TRBE IRQ handler for
    821	 * capturing trace data and restarting the handle. There is a probability
    822	 * of an undefined reference based crash when etm event is being stopped
    823	 * while a TRBE IRQ also getting processed. This happens due the release
    824	 * of perf handle via perf_aux_output_end() in etm_event_stop(). Stopping
    825	 * the TRBE here will ensure that no IRQ could be generated when the perf
    826	 * handle gets freed in etm_event_stop().
    827	 */
    828	trbe_drain_and_disable_local(cpudata);
    829
    830	/* Check if there is a pending interrupt and handle it here */
    831	status = read_sysreg_s(SYS_TRBSR_EL1);
    832	if (is_trbe_irq(status)) {
    833
    834		/*
    835		 * Now that we are handling the IRQ here, clear the IRQ
    836		 * from the status, to let the irq handler know that it
    837		 * is taken care of.
    838		 */
    839		clr_trbe_irq();
    840		isb();
    841
    842		act = trbe_get_fault_act(handle, status);
    843		/*
    844		 * If this was not due to a WRAP event, we have some
    845		 * errors and as such buffer is empty.
    846		 */
    847		if (act != TRBE_FAULT_ACT_WRAP) {
    848			size = 0;
    849			goto done;
    850		}
    851
    852		trbe_report_wrap_event(handle);
    853		wrap = true;
    854	}
    855
    856	size = trbe_get_trace_size(handle, buf, wrap);
    857
    858done:
    859	local_irq_restore(flags);
    860
    861	if (buf->snapshot)
    862		handle->head += size;
    863	return size;
    864}
    865
    866
    867static int trbe_apply_work_around_before_enable(struct trbe_buf *buf)
    868{
    869	/*
    870	 * TRBE_WORKAROUND_OVERWRITE_FILL_MODE causes the TRBE to overwrite a few cache
    871	 * line size from the "TRBBASER_EL1" in the event of a "FILL".
    872	 * Thus, we could loose some amount of the trace at the base.
    873	 *
    874	 * Before Fix:
    875	 *
    876	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
    877	 *  |                   \/                       /
    878	 *   -------------------------------------------------------------
    879	 *  |   Pg0      |   Pg1       |           |          |  PgN     |
    880	 *   -------------------------------------------------------------
    881	 *
    882	 * In the normal course of action, we would set the TRBBASER to the
    883	 * beginning of the ring-buffer (normal-BASE). But with the erratum,
    884	 * the TRBE could overwrite the contents at the "normal-BASE", after
    885	 * hitting the "normal-LIMIT", since it doesn't stop as expected. And
    886	 * this is wrong. This could result in overwriting trace collected in
    887	 * one of the previous runs, being consumed by the user. So we must
    888	 * always make sure that the TRBBASER is within the region
    889	 * [head, head+size]. Note that TRBBASER must be PAGE aligned,
    890	 *
    891	 *  After moving the BASE:
    892	 *
    893	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
    894	 *  |                   \/                       /
    895	 *   -------------------------------------------------------------
    896	 *  |         |          |xyzdef.     |..   tuvw|                |
    897	 *   -------------------------------------------------------------
    898	 *                      /
    899	 *              New-BASER
    900	 *
    901	 * Also, we would set the TRBPTR to head (after adjusting for
    902	 * alignment) at normal-PTR. This would mean that the last few bytes
    903	 * of the trace (say, "xyz") might overwrite the first few bytes of
    904	 * trace written ("abc"). More importantly they will appear in what
    905	 * userspace sees as the beginning of the trace, which is wrong. We may
    906	 * not always have space to move the latest trace "xyz" to the correct
    907	 * order as it must appear beyond the LIMIT. (i.e, [head..head+size]).
    908	 * Thus it is easier to ignore those bytes than to complicate the
    909	 * driver to move it, assuming that the erratum was triggered and
    910	 * doing additional checks to see if there is indeed allowed space at
    911	 * TRBLIMITR.LIMIT.
    912	 *
    913	 *  Thus the full workaround will move the BASE and the PTR and would
    914	 *  look like (after padding at the skipped bytes at the end of
    915	 *  session) :
    916	 *
    917	 *  normal-BASE     head (normal-TRBPTR)         tail (normal-LIMIT)
    918	 *  |                   \/                       /
    919	 *   -------------------------------------------------------------
    920	 *  |         |          |///abc..     |..  rst|                |
    921	 *   -------------------------------------------------------------
    922	 *                      /    |
    923	 *              New-BASER    New-TRBPTR
    924	 *
    925	 * To summarize, with the work around:
    926	 *
    927	 *  - We always align the offset for the next session to PAGE_SIZE
    928	 *    (This is to ensure we can program the TRBBASER to this offset
    929	 *    within the region [head...head+size]).
    930	 *
    931	 *  - At TRBE enable:
    932	 *     - Set the TRBBASER to the page aligned offset of the current
    933	 *       proposed write offset. (which is guaranteed to be aligned
    934	 *       as above)
    935	 *     - Move the TRBPTR to skip first 256bytes (that might be
    936	 *       overwritten with the erratum). This ensures that the trace
    937	 *       generated in the session is not re-written.
    938	 *
    939	 *  - At trace collection:
    940	 *     - Pad the 256bytes skipped above again with IGNORE packets.
    941	 */
    942	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_OVERWRITE_FILL_MODE)) {
    943		if (WARN_ON(!IS_ALIGNED(buf->trbe_write, PAGE_SIZE)))
    944			return -EINVAL;
    945		buf->trbe_hw_base = buf->trbe_write;
    946		buf->trbe_write += TRBE_WORKAROUND_OVERWRITE_FILL_MODE_SKIP_BYTES;
    947	}
    948
    949	/*
    950	 * TRBE_WORKAROUND_WRITE_OUT_OF_RANGE could cause the TRBE to write to
    951	 * the next page after the TRBLIMITR.LIMIT. For perf, the "next page"
    952	 * may be:
    953	 *     - The page beyond the ring buffer. This could mean, TRBE could
    954	 *       corrupt another entity (kernel / user)
    955	 *     - A portion of the "ring buffer" consumed by the userspace.
    956	 *       i.e, a page outisde [head, head + size].
    957	 *
    958	 * We work around this by:
    959	 *     - Making sure that we have at least an extra space of PAGE left
    960	 *       in the ring buffer [head, head + size], than we normally do
    961	 *       without the erratum. See trbe_min_trace_buf_size().
    962	 *
    963	 *     - Adjust the TRBLIMITR.LIMIT to leave the extra PAGE outside
    964	 *       the TRBE's range (i.e [TRBBASER, TRBLIMITR.LIMI] ).
    965	 */
    966	if (trbe_has_erratum(buf->cpudata, TRBE_WORKAROUND_WRITE_OUT_OF_RANGE)) {
    967		s64 space = buf->trbe_limit - buf->trbe_write;
    968		/*
    969		 * We must have more than a PAGE_SIZE worth space in the proposed
    970		 * range for the TRBE.
    971		 */
    972		if (WARN_ON(space <= PAGE_SIZE ||
    973			    !IS_ALIGNED(buf->trbe_limit, PAGE_SIZE)))
    974			return -EINVAL;
    975		buf->trbe_limit -= PAGE_SIZE;
    976	}
    977
    978	return 0;
    979}
    980
    981static int __arm_trbe_enable(struct trbe_buf *buf,
    982			     struct perf_output_handle *handle)
    983{
    984	int ret = 0;
    985
    986	perf_aux_output_flag(handle, PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
    987	buf->trbe_limit = compute_trbe_buffer_limit(handle);
    988	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
    989	if (buf->trbe_limit == buf->trbe_base) {
    990		ret = -ENOSPC;
    991		goto err;
    992	}
    993	/* Set the base of the TRBE to the buffer base */
    994	buf->trbe_hw_base = buf->trbe_base;
    995
    996	ret = trbe_apply_work_around_before_enable(buf);
    997	if (ret)
    998		goto err;
    999
   1000	*this_cpu_ptr(buf->cpudata->drvdata->handle) = handle;
   1001	trbe_enable_hw(buf);
   1002	return 0;
   1003err:
   1004	trbe_stop_and_truncate_event(handle);
   1005	return ret;
   1006}
   1007
   1008static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data)
   1009{
   1010	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
   1011	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
   1012	struct perf_output_handle *handle = data;
   1013	struct trbe_buf *buf = etm_perf_sink_config(handle);
   1014
   1015	WARN_ON(cpudata->cpu != smp_processor_id());
   1016	WARN_ON(cpudata->drvdata != drvdata);
   1017	if (mode != CS_MODE_PERF)
   1018		return -EINVAL;
   1019
   1020	cpudata->buf = buf;
   1021	cpudata->mode = mode;
   1022	buf->cpudata = cpudata;
   1023
   1024	return __arm_trbe_enable(buf, handle);
   1025}
   1026
   1027static int arm_trbe_disable(struct coresight_device *csdev)
   1028{
   1029	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
   1030	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
   1031	struct trbe_buf *buf = cpudata->buf;
   1032
   1033	WARN_ON(buf->cpudata != cpudata);
   1034	WARN_ON(cpudata->cpu != smp_processor_id());
   1035	WARN_ON(cpudata->drvdata != drvdata);
   1036	if (cpudata->mode != CS_MODE_PERF)
   1037		return -EINVAL;
   1038
   1039	trbe_drain_and_disable_local(cpudata);
   1040	buf->cpudata = NULL;
   1041	cpudata->buf = NULL;
   1042	cpudata->mode = CS_MODE_DISABLED;
   1043	return 0;
   1044}
   1045
   1046static void trbe_handle_spurious(struct perf_output_handle *handle)
   1047{
   1048	struct trbe_buf *buf = etm_perf_sink_config(handle);
   1049	u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
   1050
   1051	/*
   1052	 * If the IRQ was spurious, simply re-enable the TRBE
   1053	 * back without modifying the buffer parameters to
   1054	 * retain the trace collected so far.
   1055	 */
   1056	set_trbe_enabled(buf->cpudata, trblimitr);
   1057}
   1058
   1059static int trbe_handle_overflow(struct perf_output_handle *handle)
   1060{
   1061	struct perf_event *event = handle->event;
   1062	struct trbe_buf *buf = etm_perf_sink_config(handle);
   1063	unsigned long size;
   1064	struct etm_event_data *event_data;
   1065
   1066	size = trbe_get_trace_size(handle, buf, true);
   1067	if (buf->snapshot)
   1068		handle->head += size;
   1069
   1070	trbe_report_wrap_event(handle);
   1071	perf_aux_output_end(handle, size);
   1072	event_data = perf_aux_output_begin(handle, event);
   1073	if (!event_data) {
   1074		/*
   1075		 * We are unable to restart the trace collection,
   1076		 * thus leave the TRBE disabled. The etm-perf driver
   1077		 * is able to detect this with a disconnected handle
   1078		 * (handle->event = NULL).
   1079		 */
   1080		trbe_drain_and_disable_local(buf->cpudata);
   1081		*this_cpu_ptr(buf->cpudata->drvdata->handle) = NULL;
   1082		return -EINVAL;
   1083	}
   1084
   1085	return __arm_trbe_enable(buf, handle);
   1086}
   1087
   1088static bool is_perf_trbe(struct perf_output_handle *handle)
   1089{
   1090	struct trbe_buf *buf = etm_perf_sink_config(handle);
   1091	struct trbe_cpudata *cpudata = buf->cpudata;
   1092	struct trbe_drvdata *drvdata = cpudata->drvdata;
   1093	int cpu = smp_processor_id();
   1094
   1095	WARN_ON(buf->trbe_hw_base != get_trbe_base_pointer());
   1096	WARN_ON(buf->trbe_limit != get_trbe_limit_pointer());
   1097
   1098	if (cpudata->mode != CS_MODE_PERF)
   1099		return false;
   1100
   1101	if (cpudata->cpu != cpu)
   1102		return false;
   1103
   1104	if (!cpumask_test_cpu(cpu, &drvdata->supported_cpus))
   1105		return false;
   1106
   1107	return true;
   1108}
   1109
   1110static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
   1111{
   1112	struct perf_output_handle **handle_ptr = dev;
   1113	struct perf_output_handle *handle = *handle_ptr;
   1114	struct trbe_buf *buf = etm_perf_sink_config(handle);
   1115	enum trbe_fault_action act;
   1116	u64 status;
   1117	bool truncated = false;
   1118	u64 trfcr;
   1119
   1120	/* Reads to TRBSR_EL1 is fine when TRBE is active */
   1121	status = read_sysreg_s(SYS_TRBSR_EL1);
   1122	/*
   1123	 * If the pending IRQ was handled by update_buffer callback
   1124	 * we have nothing to do here.
   1125	 */
   1126	if (!is_trbe_irq(status))
   1127		return IRQ_NONE;
   1128
   1129	/* Prohibit the CPU from tracing before we disable the TRBE */
   1130	trfcr = cpu_prohibit_trace();
   1131	/*
   1132	 * Ensure the trace is visible to the CPUs and
   1133	 * any external aborts have been resolved.
   1134	 */
   1135	trbe_drain_and_disable_local(buf->cpudata);
   1136	clr_trbe_irq();
   1137	isb();
   1138
   1139	if (WARN_ON_ONCE(!handle) || !perf_get_aux(handle))
   1140		return IRQ_NONE;
   1141
   1142	if (!is_perf_trbe(handle))
   1143		return IRQ_NONE;
   1144
   1145	act = trbe_get_fault_act(handle, status);
   1146	switch (act) {
   1147	case TRBE_FAULT_ACT_WRAP:
   1148		truncated = !!trbe_handle_overflow(handle);
   1149		break;
   1150	case TRBE_FAULT_ACT_SPURIOUS:
   1151		trbe_handle_spurious(handle);
   1152		break;
   1153	case TRBE_FAULT_ACT_FATAL:
   1154		trbe_stop_and_truncate_event(handle);
   1155		truncated = true;
   1156		break;
   1157	}
   1158
   1159	/*
   1160	 * If the buffer was truncated, ensure perf callbacks
   1161	 * have completed, which will disable the event.
   1162	 *
   1163	 * Otherwise, restore the trace filter controls to
   1164	 * allow the tracing.
   1165	 */
   1166	if (truncated)
   1167		irq_work_run();
   1168	else
   1169		write_trfcr(trfcr);
   1170
   1171	return IRQ_HANDLED;
   1172}
   1173
   1174static const struct coresight_ops_sink arm_trbe_sink_ops = {
   1175	.enable		= arm_trbe_enable,
   1176	.disable	= arm_trbe_disable,
   1177	.alloc_buffer	= arm_trbe_alloc_buffer,
   1178	.free_buffer	= arm_trbe_free_buffer,
   1179	.update_buffer	= arm_trbe_update_buffer,
   1180};
   1181
   1182static const struct coresight_ops arm_trbe_cs_ops = {
   1183	.sink_ops	= &arm_trbe_sink_ops,
   1184};
   1185
   1186static ssize_t align_show(struct device *dev, struct device_attribute *attr, char *buf)
   1187{
   1188	struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
   1189
   1190	return sprintf(buf, "%llx\n", cpudata->trbe_hw_align);
   1191}
   1192static DEVICE_ATTR_RO(align);
   1193
   1194static ssize_t flag_show(struct device *dev, struct device_attribute *attr, char *buf)
   1195{
   1196	struct trbe_cpudata *cpudata = dev_get_drvdata(dev);
   1197
   1198	return sprintf(buf, "%d\n", cpudata->trbe_flag);
   1199}
   1200static DEVICE_ATTR_RO(flag);
   1201
   1202static struct attribute *arm_trbe_attrs[] = {
   1203	&dev_attr_align.attr,
   1204	&dev_attr_flag.attr,
   1205	NULL,
   1206};
   1207
   1208static const struct attribute_group arm_trbe_group = {
   1209	.attrs = arm_trbe_attrs,
   1210};
   1211
   1212static const struct attribute_group *arm_trbe_groups[] = {
   1213	&arm_trbe_group,
   1214	NULL,
   1215};
   1216
   1217static void arm_trbe_enable_cpu(void *info)
   1218{
   1219	struct trbe_drvdata *drvdata = info;
   1220	struct trbe_cpudata *cpudata = this_cpu_ptr(drvdata->cpudata);
   1221
   1222	trbe_reset_local(cpudata);
   1223	enable_percpu_irq(drvdata->irq, IRQ_TYPE_NONE);
   1224}
   1225
   1226static void arm_trbe_register_coresight_cpu(struct trbe_drvdata *drvdata, int cpu)
   1227{
   1228	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
   1229	struct coresight_device *trbe_csdev = coresight_get_percpu_sink(cpu);
   1230	struct coresight_desc desc = { 0 };
   1231	struct device *dev;
   1232
   1233	if (WARN_ON(trbe_csdev))
   1234		return;
   1235
   1236	/* If the TRBE was not probed on the CPU, we shouldn't be here */
   1237	if (WARN_ON(!cpudata->drvdata))
   1238		return;
   1239
   1240	dev = &cpudata->drvdata->pdev->dev;
   1241	desc.name = devm_kasprintf(dev, GFP_KERNEL, "trbe%d", cpu);
   1242	if (!desc.name)
   1243		goto cpu_clear;
   1244
   1245	desc.type = CORESIGHT_DEV_TYPE_SINK;
   1246	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM;
   1247	desc.ops = &arm_trbe_cs_ops;
   1248	desc.pdata = dev_get_platdata(dev);
   1249	desc.groups = arm_trbe_groups;
   1250	desc.dev = dev;
   1251	trbe_csdev = coresight_register(&desc);
   1252	if (IS_ERR(trbe_csdev))
   1253		goto cpu_clear;
   1254
   1255	dev_set_drvdata(&trbe_csdev->dev, cpudata);
   1256	coresight_set_percpu_sink(cpu, trbe_csdev);
   1257	return;
   1258cpu_clear:
   1259	cpumask_clear_cpu(cpu, &drvdata->supported_cpus);
   1260}
   1261
   1262/*
   1263 * Must be called with preemption disabled, for trbe_check_errata().
   1264 */
   1265static void arm_trbe_probe_cpu(void *info)
   1266{
   1267	struct trbe_drvdata *drvdata = info;
   1268	int cpu = smp_processor_id();
   1269	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
   1270	u64 trbidr;
   1271
   1272	if (WARN_ON(!cpudata))
   1273		goto cpu_clear;
   1274
   1275	if (!is_trbe_available()) {
   1276		pr_err("TRBE is not implemented on cpu %d\n", cpu);
   1277		goto cpu_clear;
   1278	}
   1279
   1280	trbidr = read_sysreg_s(SYS_TRBIDR_EL1);
   1281	if (!is_trbe_programmable(trbidr)) {
   1282		pr_err("TRBE is owned in higher exception level on cpu %d\n", cpu);
   1283		goto cpu_clear;
   1284	}
   1285
   1286	cpudata->trbe_hw_align = 1ULL << get_trbe_address_align(trbidr);
   1287	if (cpudata->trbe_hw_align > SZ_2K) {
   1288		pr_err("Unsupported alignment on cpu %d\n", cpu);
   1289		goto cpu_clear;
   1290	}
   1291
   1292	/*
   1293	 * Run the TRBE erratum checks, now that we know
   1294	 * this instance is about to be registered.
   1295	 */
   1296	trbe_check_errata(cpudata);
   1297
   1298	if (trbe_is_broken(cpudata)) {
   1299		pr_err("Disabling TRBE on cpu%d due to erratum\n", cpu);
   1300		goto cpu_clear;
   1301	}
   1302
   1303	/*
   1304	 * If the TRBE is affected by erratum TRBE_WORKAROUND_OVERWRITE_FILL_MODE,
   1305	 * we must always program the TBRPTR_EL1, 256bytes from a page
   1306	 * boundary, with TRBBASER_EL1 set to the page, to prevent
   1307	 * TRBE over-writing 256bytes at TRBBASER_EL1 on FILL event.
   1308	 *
   1309	 * Thus make sure we always align our write pointer to a PAGE_SIZE,
   1310	 * which also guarantees that we have at least a PAGE_SIZE space in
   1311	 * the buffer (TRBLIMITR is PAGE aligned) and thus we can skip
   1312	 * the required bytes at the base.
   1313	 */
   1314	if (trbe_may_overwrite_in_fill_mode(cpudata))
   1315		cpudata->trbe_align = PAGE_SIZE;
   1316	else
   1317		cpudata->trbe_align = cpudata->trbe_hw_align;
   1318
   1319	cpudata->trbe_flag = get_trbe_flag_update(trbidr);
   1320	cpudata->cpu = cpu;
   1321	cpudata->drvdata = drvdata;
   1322	return;
   1323cpu_clear:
   1324	cpumask_clear_cpu(cpu, &drvdata->supported_cpus);
   1325}
   1326
   1327static void arm_trbe_remove_coresight_cpu(void *info)
   1328{
   1329	int cpu = smp_processor_id();
   1330	struct trbe_drvdata *drvdata = info;
   1331	struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
   1332	struct coresight_device *trbe_csdev = coresight_get_percpu_sink(cpu);
   1333
   1334	disable_percpu_irq(drvdata->irq);
   1335	trbe_reset_local(cpudata);
   1336	if (trbe_csdev) {
   1337		coresight_unregister(trbe_csdev);
   1338		cpudata->drvdata = NULL;
   1339		coresight_set_percpu_sink(cpu, NULL);
   1340	}
   1341}
   1342
   1343static int arm_trbe_probe_coresight(struct trbe_drvdata *drvdata)
   1344{
   1345	int cpu;
   1346
   1347	drvdata->cpudata = alloc_percpu(typeof(*drvdata->cpudata));
   1348	if (!drvdata->cpudata)
   1349		return -ENOMEM;
   1350
   1351	for_each_cpu(cpu, &drvdata->supported_cpus) {
   1352		/* If we fail to probe the CPU, let us defer it to hotplug callbacks */
   1353		if (smp_call_function_single(cpu, arm_trbe_probe_cpu, drvdata, 1))
   1354			continue;
   1355		if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
   1356			arm_trbe_register_coresight_cpu(drvdata, cpu);
   1357		if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
   1358			smp_call_function_single(cpu, arm_trbe_enable_cpu, drvdata, 1);
   1359	}
   1360	return 0;
   1361}
   1362
   1363static int arm_trbe_remove_coresight(struct trbe_drvdata *drvdata)
   1364{
   1365	int cpu;
   1366
   1367	for_each_cpu(cpu, &drvdata->supported_cpus)
   1368		smp_call_function_single(cpu, arm_trbe_remove_coresight_cpu, drvdata, 1);
   1369	free_percpu(drvdata->cpudata);
   1370	return 0;
   1371}
   1372
   1373static void arm_trbe_probe_hotplugged_cpu(struct trbe_drvdata *drvdata)
   1374{
   1375	preempt_disable();
   1376	arm_trbe_probe_cpu(drvdata);
   1377	preempt_enable();
   1378}
   1379
   1380static int arm_trbe_cpu_startup(unsigned int cpu, struct hlist_node *node)
   1381{
   1382	struct trbe_drvdata *drvdata = hlist_entry_safe(node, struct trbe_drvdata, hotplug_node);
   1383
   1384	if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) {
   1385
   1386		/*
   1387		 * If this CPU was not probed for TRBE,
   1388		 * initialize it now.
   1389		 */
   1390		if (!coresight_get_percpu_sink(cpu)) {
   1391			arm_trbe_probe_hotplugged_cpu(drvdata);
   1392			if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
   1393				arm_trbe_register_coresight_cpu(drvdata, cpu);
   1394			if (cpumask_test_cpu(cpu, &drvdata->supported_cpus))
   1395				arm_trbe_enable_cpu(drvdata);
   1396		} else {
   1397			arm_trbe_enable_cpu(drvdata);
   1398		}
   1399	}
   1400	return 0;
   1401}
   1402
   1403static int arm_trbe_cpu_teardown(unsigned int cpu, struct hlist_node *node)
   1404{
   1405	struct trbe_drvdata *drvdata = hlist_entry_safe(node, struct trbe_drvdata, hotplug_node);
   1406
   1407	if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) {
   1408		struct trbe_cpudata *cpudata = per_cpu_ptr(drvdata->cpudata, cpu);
   1409
   1410		disable_percpu_irq(drvdata->irq);
   1411		trbe_reset_local(cpudata);
   1412	}
   1413	return 0;
   1414}
   1415
   1416static int arm_trbe_probe_cpuhp(struct trbe_drvdata *drvdata)
   1417{
   1418	enum cpuhp_state trbe_online;
   1419	int ret;
   1420
   1421	trbe_online = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRVNAME,
   1422					      arm_trbe_cpu_startup, arm_trbe_cpu_teardown);
   1423	if (trbe_online < 0)
   1424		return trbe_online;
   1425
   1426	ret = cpuhp_state_add_instance(trbe_online, &drvdata->hotplug_node);
   1427	if (ret) {
   1428		cpuhp_remove_multi_state(trbe_online);
   1429		return ret;
   1430	}
   1431	drvdata->trbe_online = trbe_online;
   1432	return 0;
   1433}
   1434
   1435static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata)
   1436{
   1437	cpuhp_remove_multi_state(drvdata->trbe_online);
   1438}
   1439
   1440static int arm_trbe_probe_irq(struct platform_device *pdev,
   1441			      struct trbe_drvdata *drvdata)
   1442{
   1443	int ret;
   1444
   1445	drvdata->irq = platform_get_irq(pdev, 0);
   1446	if (drvdata->irq < 0) {
   1447		pr_err("IRQ not found for the platform device\n");
   1448		return drvdata->irq;
   1449	}
   1450
   1451	if (!irq_is_percpu(drvdata->irq)) {
   1452		pr_err("IRQ is not a PPI\n");
   1453		return -EINVAL;
   1454	}
   1455
   1456	if (irq_get_percpu_devid_partition(drvdata->irq, &drvdata->supported_cpus))
   1457		return -EINVAL;
   1458
   1459	drvdata->handle = alloc_percpu(struct perf_output_handle *);
   1460	if (!drvdata->handle)
   1461		return -ENOMEM;
   1462
   1463	ret = request_percpu_irq(drvdata->irq, arm_trbe_irq_handler, DRVNAME, drvdata->handle);
   1464	if (ret) {
   1465		free_percpu(drvdata->handle);
   1466		return ret;
   1467	}
   1468	return 0;
   1469}
   1470
   1471static void arm_trbe_remove_irq(struct trbe_drvdata *drvdata)
   1472{
   1473	free_percpu_irq(drvdata->irq, drvdata->handle);
   1474	free_percpu(drvdata->handle);
   1475}
   1476
   1477static int arm_trbe_device_probe(struct platform_device *pdev)
   1478{
   1479	struct coresight_platform_data *pdata;
   1480	struct trbe_drvdata *drvdata;
   1481	struct device *dev = &pdev->dev;
   1482	int ret;
   1483
   1484	/* Trace capture is not possible with kernel page table isolation */
   1485	if (arm64_kernel_unmapped_at_el0()) {
   1486		pr_err("TRBE wouldn't work if kernel gets unmapped at EL0\n");
   1487		return -EOPNOTSUPP;
   1488	}
   1489
   1490	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
   1491	if (!drvdata)
   1492		return -ENOMEM;
   1493
   1494	pdata = coresight_get_platform_data(dev);
   1495	if (IS_ERR(pdata))
   1496		return PTR_ERR(pdata);
   1497
   1498	dev_set_drvdata(dev, drvdata);
   1499	dev->platform_data = pdata;
   1500	drvdata->pdev = pdev;
   1501	ret = arm_trbe_probe_irq(pdev, drvdata);
   1502	if (ret)
   1503		return ret;
   1504
   1505	ret = arm_trbe_probe_coresight(drvdata);
   1506	if (ret)
   1507		goto probe_failed;
   1508
   1509	ret = arm_trbe_probe_cpuhp(drvdata);
   1510	if (ret)
   1511		goto cpuhp_failed;
   1512
   1513	return 0;
   1514cpuhp_failed:
   1515	arm_trbe_remove_coresight(drvdata);
   1516probe_failed:
   1517	arm_trbe_remove_irq(drvdata);
   1518	return ret;
   1519}
   1520
   1521static int arm_trbe_device_remove(struct platform_device *pdev)
   1522{
   1523	struct trbe_drvdata *drvdata = platform_get_drvdata(pdev);
   1524
   1525	arm_trbe_remove_cpuhp(drvdata);
   1526	arm_trbe_remove_coresight(drvdata);
   1527	arm_trbe_remove_irq(drvdata);
   1528	return 0;
   1529}
   1530
   1531static const struct of_device_id arm_trbe_of_match[] = {
   1532	{ .compatible = "arm,trace-buffer-extension"},
   1533	{},
   1534};
   1535MODULE_DEVICE_TABLE(of, arm_trbe_of_match);
   1536
   1537static struct platform_driver arm_trbe_driver = {
   1538	.driver	= {
   1539		.name = DRVNAME,
   1540		.of_match_table = of_match_ptr(arm_trbe_of_match),
   1541		.suppress_bind_attrs = true,
   1542	},
   1543	.probe	= arm_trbe_device_probe,
   1544	.remove	= arm_trbe_device_remove,
   1545};
   1546
   1547static int __init arm_trbe_init(void)
   1548{
   1549	int ret;
   1550
   1551	ret = platform_driver_register(&arm_trbe_driver);
   1552	if (!ret)
   1553		return 0;
   1554
   1555	pr_err("Error registering %s platform driver\n", DRVNAME);
   1556	return ret;
   1557}
   1558
   1559static void __exit arm_trbe_exit(void)
   1560{
   1561	platform_driver_unregister(&arm_trbe_driver);
   1562}
   1563module_init(arm_trbe_init);
   1564module_exit(arm_trbe_exit);
   1565
   1566MODULE_AUTHOR("Anshuman Khandual <anshuman.khandual@arm.com>");
   1567MODULE_DESCRIPTION("Arm Trace Buffer Extension (TRBE) driver");
   1568MODULE_LICENSE("GPL v2");