cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel-pt.c (106600B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * intel_pt.c: Intel Processor Trace support
      4 * Copyright (c) 2013-2015, Intel Corporation.
      5 */
      6
      7#include <inttypes.h>
      8#include <stdio.h>
      9#include <stdbool.h>
     10#include <errno.h>
     11#include <linux/kernel.h>
     12#include <linux/string.h>
     13#include <linux/types.h>
     14#include <linux/zalloc.h>
     15
     16#include "session.h"
     17#include "machine.h"
     18#include "memswap.h"
     19#include "sort.h"
     20#include "tool.h"
     21#include "event.h"
     22#include "evlist.h"
     23#include "evsel.h"
     24#include "map.h"
     25#include "color.h"
     26#include "thread.h"
     27#include "thread-stack.h"
     28#include "symbol.h"
     29#include "callchain.h"
     30#include "dso.h"
     31#include "debug.h"
     32#include "auxtrace.h"
     33#include "tsc.h"
     34#include "intel-pt.h"
     35#include "config.h"
     36#include "util/perf_api_probe.h"
     37#include "util/synthetic-events.h"
     38#include "time-utils.h"
     39
     40#include "../arch/x86/include/uapi/asm/perf_regs.h"
     41
     42#include "intel-pt-decoder/intel-pt-log.h"
     43#include "intel-pt-decoder/intel-pt-decoder.h"
     44#include "intel-pt-decoder/intel-pt-insn-decoder.h"
     45#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
     46
     47#define MAX_TIMESTAMP (~0ULL)
     48
     49#define INTEL_PT_CFG_PASS_THRU	BIT_ULL(0)
     50#define INTEL_PT_CFG_PWR_EVT_EN	BIT_ULL(4)
     51#define INTEL_PT_CFG_BRANCH_EN	BIT_ULL(13)
     52#define INTEL_PT_CFG_EVT_EN	BIT_ULL(31)
     53#define INTEL_PT_CFG_TNT_DIS	BIT_ULL(55)
     54
     55struct range {
     56	u64 start;
     57	u64 end;
     58};
     59
     60struct intel_pt {
     61	struct auxtrace auxtrace;
     62	struct auxtrace_queues queues;
     63	struct auxtrace_heap heap;
     64	u32 auxtrace_type;
     65	struct perf_session *session;
     66	struct machine *machine;
     67	struct evsel *switch_evsel;
     68	struct thread *unknown_thread;
     69	bool timeless_decoding;
     70	bool sampling_mode;
     71	bool snapshot_mode;
     72	bool per_cpu_mmaps;
     73	bool have_tsc;
     74	bool data_queued;
     75	bool est_tsc;
     76	bool sync_switch;
     77	bool mispred_all;
     78	bool use_thread_stack;
     79	bool callstack;
     80	bool cap_event_trace;
     81	unsigned int br_stack_sz;
     82	unsigned int br_stack_sz_plus;
     83	int have_sched_switch;
     84	u32 pmu_type;
     85	u64 kernel_start;
     86	u64 switch_ip;
     87	u64 ptss_ip;
     88	u64 first_timestamp;
     89
     90	struct perf_tsc_conversion tc;
     91	bool cap_user_time_zero;
     92
     93	struct itrace_synth_opts synth_opts;
     94
     95	bool sample_instructions;
     96	u64 instructions_sample_type;
     97	u64 instructions_id;
     98
     99	bool sample_branches;
    100	u32 branches_filter;
    101	u64 branches_sample_type;
    102	u64 branches_id;
    103
    104	bool sample_transactions;
    105	u64 transactions_sample_type;
    106	u64 transactions_id;
    107
    108	bool sample_ptwrites;
    109	u64 ptwrites_sample_type;
    110	u64 ptwrites_id;
    111
    112	bool sample_pwr_events;
    113	u64 pwr_events_sample_type;
    114	u64 mwait_id;
    115	u64 pwre_id;
    116	u64 exstop_id;
    117	u64 pwrx_id;
    118	u64 cbr_id;
    119	u64 psb_id;
    120
    121	bool single_pebs;
    122	bool sample_pebs;
    123	struct evsel *pebs_evsel;
    124
    125	u64 evt_sample_type;
    126	u64 evt_id;
    127
    128	u64 iflag_chg_sample_type;
    129	u64 iflag_chg_id;
    130
    131	u64 tsc_bit;
    132	u64 mtc_bit;
    133	u64 mtc_freq_bits;
    134	u32 tsc_ctc_ratio_n;
    135	u32 tsc_ctc_ratio_d;
    136	u64 cyc_bit;
    137	u64 noretcomp_bit;
    138	unsigned max_non_turbo_ratio;
    139	unsigned cbr2khz;
    140	int max_loops;
    141
    142	unsigned long num_events;
    143
    144	char *filter;
    145	struct addr_filters filts;
    146
    147	struct range *time_ranges;
    148	unsigned int range_cnt;
    149
    150	struct ip_callchain *chain;
    151	struct branch_stack *br_stack;
    152
    153	u64 dflt_tsc_offset;
    154	struct rb_root vmcs_info;
    155};
    156
    157enum switch_state {
    158	INTEL_PT_SS_NOT_TRACING,
    159	INTEL_PT_SS_UNKNOWN,
    160	INTEL_PT_SS_TRACING,
    161	INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
    162	INTEL_PT_SS_EXPECTING_SWITCH_IP,
    163};
    164
    165/* applicable_counters is 64-bits */
    166#define INTEL_PT_MAX_PEBS 64
    167
    168struct intel_pt_pebs_event {
    169	struct evsel *evsel;
    170	u64 id;
    171};
    172
    173struct intel_pt_queue {
    174	struct intel_pt *pt;
    175	unsigned int queue_nr;
    176	struct auxtrace_buffer *buffer;
    177	struct auxtrace_buffer *old_buffer;
    178	void *decoder;
    179	const struct intel_pt_state *state;
    180	struct ip_callchain *chain;
    181	struct branch_stack *last_branch;
    182	union perf_event *event_buf;
    183	bool on_heap;
    184	bool stop;
    185	bool step_through_buffers;
    186	bool use_buffer_pid_tid;
    187	bool sync_switch;
    188	bool sample_ipc;
    189	pid_t pid, tid;
    190	int cpu;
    191	int switch_state;
    192	pid_t next_tid;
    193	struct thread *thread;
    194	struct machine *guest_machine;
    195	struct thread *guest_thread;
    196	struct thread *unknown_guest_thread;
    197	pid_t guest_machine_pid;
    198	bool exclude_kernel;
    199	bool have_sample;
    200	u64 time;
    201	u64 timestamp;
    202	u64 sel_timestamp;
    203	bool sel_start;
    204	unsigned int sel_idx;
    205	u32 flags;
    206	u16 insn_len;
    207	u64 last_insn_cnt;
    208	u64 ipc_insn_cnt;
    209	u64 ipc_cyc_cnt;
    210	u64 last_in_insn_cnt;
    211	u64 last_in_cyc_cnt;
    212	u64 last_br_insn_cnt;
    213	u64 last_br_cyc_cnt;
    214	unsigned int cbr_seen;
    215	char insn[INTEL_PT_INSN_BUF_SZ];
    216	struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS];
    217};
    218
    219static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
    220			  unsigned char *buf, size_t len)
    221{
    222	struct intel_pt_pkt packet;
    223	size_t pos = 0;
    224	int ret, pkt_len, i;
    225	char desc[INTEL_PT_PKT_DESC_MAX];
    226	const char *color = PERF_COLOR_BLUE;
    227	enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
    228
    229	color_fprintf(stdout, color,
    230		      ". ... Intel Processor Trace data: size %zu bytes\n",
    231		      len);
    232
    233	while (len) {
    234		ret = intel_pt_get_packet(buf, len, &packet, &ctx);
    235		if (ret > 0)
    236			pkt_len = ret;
    237		else
    238			pkt_len = 1;
    239		printf(".");
    240		color_fprintf(stdout, color, "  %08x: ", pos);
    241		for (i = 0; i < pkt_len; i++)
    242			color_fprintf(stdout, color, " %02x", buf[i]);
    243		for (; i < 16; i++)
    244			color_fprintf(stdout, color, "   ");
    245		if (ret > 0) {
    246			ret = intel_pt_pkt_desc(&packet, desc,
    247						INTEL_PT_PKT_DESC_MAX);
    248			if (ret > 0)
    249				color_fprintf(stdout, color, " %s\n", desc);
    250		} else {
    251			color_fprintf(stdout, color, " Bad packet!\n");
    252		}
    253		pos += pkt_len;
    254		buf += pkt_len;
    255		len -= pkt_len;
    256	}
    257}
    258
    259static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
    260				size_t len)
    261{
    262	printf(".\n");
    263	intel_pt_dump(pt, buf, len);
    264}
    265
    266static void intel_pt_log_event(union perf_event *event)
    267{
    268	FILE *f = intel_pt_log_fp();
    269
    270	if (!intel_pt_enable_logging || !f)
    271		return;
    272
    273	perf_event__fprintf(event, NULL, f);
    274}
    275
    276static void intel_pt_dump_sample(struct perf_session *session,
    277				 struct perf_sample *sample)
    278{
    279	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
    280					   auxtrace);
    281
    282	printf("\n");
    283	intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size);
    284}
    285
    286static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
    287{
    288	struct perf_time_interval *range = pt->synth_opts.ptime_range;
    289	int n = pt->synth_opts.range_num;
    290
    291	if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
    292		return true;
    293
    294	if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
    295		return false;
    296
    297	/* perf_time__ranges_skip_sample does not work if time is zero */
    298	if (!tm)
    299		tm = 1;
    300
    301	return !n || !perf_time__ranges_skip_sample(range, n, tm);
    302}
    303
    304static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root,
    305							u64 vmcs,
    306							u64 dflt_tsc_offset)
    307{
    308	struct rb_node **p = &rb_root->rb_node;
    309	struct rb_node *parent = NULL;
    310	struct intel_pt_vmcs_info *v;
    311
    312	while (*p) {
    313		parent = *p;
    314		v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node);
    315
    316		if (v->vmcs == vmcs)
    317			return v;
    318
    319		if (vmcs < v->vmcs)
    320			p = &(*p)->rb_left;
    321		else
    322			p = &(*p)->rb_right;
    323	}
    324
    325	v = zalloc(sizeof(*v));
    326	if (v) {
    327		v->vmcs = vmcs;
    328		v->tsc_offset = dflt_tsc_offset;
    329		v->reliable = dflt_tsc_offset;
    330
    331		rb_link_node(&v->rb_node, parent, p);
    332		rb_insert_color(&v->rb_node, rb_root);
    333	}
    334
    335	return v;
    336}
    337
    338static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs)
    339{
    340	struct intel_pt_queue *ptq = data;
    341	struct intel_pt *pt = ptq->pt;
    342
    343	if (!vmcs && !pt->dflt_tsc_offset)
    344		return NULL;
    345
    346	return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset);
    347}
    348
    349static void intel_pt_free_vmcs_info(struct intel_pt *pt)
    350{
    351	struct intel_pt_vmcs_info *v;
    352	struct rb_node *n;
    353
    354	n = rb_first(&pt->vmcs_info);
    355	while (n) {
    356		v = rb_entry(n, struct intel_pt_vmcs_info, rb_node);
    357		n = rb_next(n);
    358		rb_erase(&v->rb_node, &pt->vmcs_info);
    359		free(v);
    360	}
    361}
    362
    363static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
    364				   struct auxtrace_buffer *b)
    365{
    366	bool consecutive = false;
    367	void *start;
    368
    369	start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
    370				      pt->have_tsc, &consecutive,
    371				      pt->synth_opts.vm_time_correlation);
    372	if (!start)
    373		return -EINVAL;
    374	/*
    375	 * In the case of vm_time_correlation, the overlap might contain TSC
    376	 * packets that will not be fixed, and that will then no longer work for
    377	 * overlap detection. Avoid that by zeroing out the overlap.
    378	 */
    379	if (pt->synth_opts.vm_time_correlation)
    380		memset(b->data, 0, start - b->data);
    381	b->use_size = b->data + b->size - start;
    382	b->use_data = start;
    383	if (b->use_size && consecutive)
    384		b->consecutive = true;
    385	return 0;
    386}
    387
    388static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
    389			       struct auxtrace_buffer *buffer,
    390			       struct auxtrace_buffer *old_buffer,
    391			       struct intel_pt_buffer *b)
    392{
    393	bool might_overlap;
    394
    395	if (!buffer->data) {
    396		int fd = perf_data__fd(ptq->pt->session->data);
    397
    398		buffer->data = auxtrace_buffer__get_data(buffer, fd);
    399		if (!buffer->data)
    400			return -ENOMEM;
    401	}
    402
    403	might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
    404	if (might_overlap && !buffer->consecutive && old_buffer &&
    405	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
    406		return -ENOMEM;
    407
    408	if (buffer->use_data) {
    409		b->len = buffer->use_size;
    410		b->buf = buffer->use_data;
    411	} else {
    412		b->len = buffer->size;
    413		b->buf = buffer->data;
    414	}
    415	b->ref_timestamp = buffer->reference;
    416
    417	if (!old_buffer || (might_overlap && !buffer->consecutive)) {
    418		b->consecutive = false;
    419		b->trace_nr = buffer->buffer_nr + 1;
    420	} else {
    421		b->consecutive = true;
    422	}
    423
    424	return 0;
    425}
    426
    427/* Do not drop buffers with references - refer intel_pt_get_trace() */
    428static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
    429					   struct auxtrace_buffer *buffer)
    430{
    431	if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
    432		return;
    433
    434	auxtrace_buffer__drop_data(buffer);
    435}
    436
    437/* Must be serialized with respect to intel_pt_get_trace() */
    438static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
    439			      void *cb_data)
    440{
    441	struct intel_pt_queue *ptq = data;
    442	struct auxtrace_buffer *buffer = ptq->buffer;
    443	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
    444	struct auxtrace_queue *queue;
    445	int err = 0;
    446
    447	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
    448
    449	while (1) {
    450		struct intel_pt_buffer b = { .len = 0 };
    451
    452		buffer = auxtrace_buffer__next(queue, buffer);
    453		if (!buffer)
    454			break;
    455
    456		err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
    457		if (err)
    458			break;
    459
    460		if (b.len) {
    461			intel_pt_lookahead_drop_buffer(ptq, old_buffer);
    462			old_buffer = buffer;
    463		} else {
    464			intel_pt_lookahead_drop_buffer(ptq, buffer);
    465			continue;
    466		}
    467
    468		err = cb(&b, cb_data);
    469		if (err)
    470			break;
    471	}
    472
    473	if (buffer != old_buffer)
    474		intel_pt_lookahead_drop_buffer(ptq, buffer);
    475	intel_pt_lookahead_drop_buffer(ptq, old_buffer);
    476
    477	return err;
    478}
    479
    480/*
    481 * This function assumes data is processed sequentially only.
    482 * Must be serialized with respect to intel_pt_lookahead()
    483 */
    484static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
    485{
    486	struct intel_pt_queue *ptq = data;
    487	struct auxtrace_buffer *buffer = ptq->buffer;
    488	struct auxtrace_buffer *old_buffer = ptq->old_buffer;
    489	struct auxtrace_queue *queue;
    490	int err;
    491
    492	if (ptq->stop) {
    493		b->len = 0;
    494		return 0;
    495	}
    496
    497	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
    498
    499	buffer = auxtrace_buffer__next(queue, buffer);
    500	if (!buffer) {
    501		if (old_buffer)
    502			auxtrace_buffer__drop_data(old_buffer);
    503		b->len = 0;
    504		return 0;
    505	}
    506
    507	ptq->buffer = buffer;
    508
    509	err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
    510	if (err)
    511		return err;
    512
    513	if (ptq->step_through_buffers)
    514		ptq->stop = true;
    515
    516	if (b->len) {
    517		if (old_buffer)
    518			auxtrace_buffer__drop_data(old_buffer);
    519		ptq->old_buffer = buffer;
    520	} else {
    521		auxtrace_buffer__drop_data(buffer);
    522		return intel_pt_get_trace(b, data);
    523	}
    524
    525	return 0;
    526}
    527
    528struct intel_pt_cache_entry {
    529	struct auxtrace_cache_entry	entry;
    530	u64				insn_cnt;
    531	u64				byte_cnt;
    532	enum intel_pt_insn_op		op;
    533	enum intel_pt_insn_branch	branch;
    534	bool				emulated_ptwrite;
    535	int				length;
    536	int32_t				rel;
    537	char				insn[INTEL_PT_INSN_BUF_SZ];
    538};
    539
    540static int intel_pt_config_div(const char *var, const char *value, void *data)
    541{
    542	int *d = data;
    543	long val;
    544
    545	if (!strcmp(var, "intel-pt.cache-divisor")) {
    546		val = strtol(value, NULL, 0);
    547		if (val > 0 && val <= INT_MAX)
    548			*d = val;
    549	}
    550
    551	return 0;
    552}
    553
    554static int intel_pt_cache_divisor(void)
    555{
    556	static int d;
    557
    558	if (d)
    559		return d;
    560
    561	perf_config(intel_pt_config_div, &d);
    562
    563	if (!d)
    564		d = 64;
    565
    566	return d;
    567}
    568
    569static unsigned int intel_pt_cache_size(struct dso *dso,
    570					struct machine *machine)
    571{
    572	off_t size;
    573
    574	size = dso__data_size(dso, machine);
    575	size /= intel_pt_cache_divisor();
    576	if (size < 1000)
    577		return 10;
    578	if (size > (1 << 21))
    579		return 21;
    580	return 32 - __builtin_clz(size);
    581}
    582
    583static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
    584					     struct machine *machine)
    585{
    586	struct auxtrace_cache *c;
    587	unsigned int bits;
    588
    589	if (dso->auxtrace_cache)
    590		return dso->auxtrace_cache;
    591
    592	bits = intel_pt_cache_size(dso, machine);
    593
    594	/* Ignoring cache creation failure */
    595	c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
    596
    597	dso->auxtrace_cache = c;
    598
    599	return c;
    600}
    601
    602static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
    603			      u64 offset, u64 insn_cnt, u64 byte_cnt,
    604			      struct intel_pt_insn *intel_pt_insn)
    605{
    606	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
    607	struct intel_pt_cache_entry *e;
    608	int err;
    609
    610	if (!c)
    611		return -ENOMEM;
    612
    613	e = auxtrace_cache__alloc_entry(c);
    614	if (!e)
    615		return -ENOMEM;
    616
    617	e->insn_cnt = insn_cnt;
    618	e->byte_cnt = byte_cnt;
    619	e->op = intel_pt_insn->op;
    620	e->branch = intel_pt_insn->branch;
    621	e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite;
    622	e->length = intel_pt_insn->length;
    623	e->rel = intel_pt_insn->rel;
    624	memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
    625
    626	err = auxtrace_cache__add(c, offset, &e->entry);
    627	if (err)
    628		auxtrace_cache__free_entry(c, e);
    629
    630	return err;
    631}
    632
    633static struct intel_pt_cache_entry *
    634intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
    635{
    636	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
    637
    638	if (!c)
    639		return NULL;
    640
    641	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
    642}
    643
    644static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
    645				      u64 offset)
    646{
    647	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
    648
    649	if (!c)
    650		return;
    651
    652	auxtrace_cache__remove(dso->auxtrace_cache, offset);
    653}
    654
    655static inline bool intel_pt_guest_kernel_ip(uint64_t ip)
    656{
    657	/* Assumes 64-bit kernel */
    658	return ip & (1ULL << 63);
    659}
    660
    661static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr)
    662{
    663	if (nr) {
    664		return intel_pt_guest_kernel_ip(ip) ?
    665		       PERF_RECORD_MISC_GUEST_KERNEL :
    666		       PERF_RECORD_MISC_GUEST_USER;
    667	}
    668
    669	return ip >= ptq->pt->kernel_start ?
    670	       PERF_RECORD_MISC_KERNEL :
    671	       PERF_RECORD_MISC_USER;
    672}
    673
    674static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip)
    675{
    676	/* No support for non-zero CS base */
    677	if (from_ip)
    678		return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr);
    679	return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr);
    680}
    681
    682static int intel_pt_get_guest(struct intel_pt_queue *ptq)
    683{
    684	struct machines *machines = &ptq->pt->session->machines;
    685	struct machine *machine;
    686	pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid;
    687
    688	if (ptq->guest_machine && pid == ptq->guest_machine_pid)
    689		return 0;
    690
    691	ptq->guest_machine = NULL;
    692	thread__zput(ptq->unknown_guest_thread);
    693
    694	if (symbol_conf.guest_code) {
    695		thread__zput(ptq->guest_thread);
    696		ptq->guest_thread = machines__findnew_guest_code(machines, pid);
    697	}
    698
    699	machine = machines__find_guest(machines, pid);
    700	if (!machine)
    701		return -1;
    702
    703	ptq->unknown_guest_thread = machine__idle_thread(machine);
    704	if (!ptq->unknown_guest_thread)
    705		return -1;
    706
    707	ptq->guest_machine = machine;
    708	ptq->guest_machine_pid = pid;
    709
    710	return 0;
    711}
    712
    713static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn)
    714{
    715	return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL;
    716}
    717
    718#define PTWRITE_MAGIC		"\x0f\x0bperf,ptwrite  "
    719#define PTWRITE_MAGIC_LEN	16
    720
    721static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset)
    722{
    723	unsigned char buf[PTWRITE_MAGIC_LEN];
    724	ssize_t len;
    725
    726	len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN);
    727	if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) {
    728		intel_pt_log("Emulated ptwrite signature found\n");
    729		return true;
    730	}
    731	intel_pt_log("Emulated ptwrite signature not found\n");
    732	return false;
    733}
    734
    735static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
    736				   uint64_t *insn_cnt_ptr, uint64_t *ip,
    737				   uint64_t to_ip, uint64_t max_insn_cnt,
    738				   void *data)
    739{
    740	struct intel_pt_queue *ptq = data;
    741	struct machine *machine = ptq->pt->machine;
    742	struct thread *thread;
    743	struct addr_location al;
    744	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
    745	ssize_t len;
    746	int x86_64;
    747	u8 cpumode;
    748	u64 offset, start_offset, start_ip;
    749	u64 insn_cnt = 0;
    750	bool one_map = true;
    751	bool nr;
    752
    753	intel_pt_insn->length = 0;
    754
    755	if (to_ip && *ip == to_ip)
    756		goto out_no_cache;
    757
    758	nr = ptq->state->to_nr;
    759	cpumode = intel_pt_nr_cpumode(ptq, *ip, nr);
    760
    761	if (nr) {
    762		if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) ||
    763		    intel_pt_get_guest(ptq))
    764			return -EINVAL;
    765		machine = ptq->guest_machine;
    766		thread = ptq->guest_thread;
    767		if (!thread) {
    768			if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL)
    769				return -EINVAL;
    770			thread = ptq->unknown_guest_thread;
    771		}
    772	} else {
    773		thread = ptq->thread;
    774		if (!thread) {
    775			if (cpumode != PERF_RECORD_MISC_KERNEL)
    776				return -EINVAL;
    777			thread = ptq->pt->unknown_thread;
    778		}
    779	}
    780
    781	while (1) {
    782		if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
    783			return -EINVAL;
    784
    785		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
    786		    dso__data_status_seen(al.map->dso,
    787					  DSO_DATA_STATUS_SEEN_ITRACE))
    788			return -ENOENT;
    789
    790		offset = al.map->map_ip(al.map, *ip);
    791
    792		if (!to_ip && one_map) {
    793			struct intel_pt_cache_entry *e;
    794
    795			e = intel_pt_cache_lookup(al.map->dso, machine, offset);
    796			if (e &&
    797			    (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
    798				*insn_cnt_ptr = e->insn_cnt;
    799				*ip += e->byte_cnt;
    800				intel_pt_insn->op = e->op;
    801				intel_pt_insn->branch = e->branch;
    802				intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite;
    803				intel_pt_insn->length = e->length;
    804				intel_pt_insn->rel = e->rel;
    805				memcpy(intel_pt_insn->buf, e->insn,
    806				       INTEL_PT_INSN_BUF_SZ);
    807				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
    808				return 0;
    809			}
    810		}
    811
    812		start_offset = offset;
    813		start_ip = *ip;
    814
    815		/* Load maps to ensure dso->is_64_bit has been updated */
    816		map__load(al.map);
    817
    818		x86_64 = al.map->dso->is_64_bit;
    819
    820		while (1) {
    821			len = dso__data_read_offset(al.map->dso, machine,
    822						    offset, buf,
    823						    INTEL_PT_INSN_BUF_SZ);
    824			if (len <= 0)
    825				return -EINVAL;
    826
    827			if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
    828				return -EINVAL;
    829
    830			intel_pt_log_insn(intel_pt_insn, *ip);
    831
    832			insn_cnt += 1;
    833
    834			if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) {
    835				bool eptw;
    836				u64 offs;
    837
    838				if (!intel_pt_jmp_16(intel_pt_insn))
    839					goto out;
    840				/* Check for emulated ptwrite */
    841				offs = offset + intel_pt_insn->length;
    842				eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs);
    843				intel_pt_insn->emulated_ptwrite = eptw;
    844				goto out;
    845			}
    846
    847			if (max_insn_cnt && insn_cnt >= max_insn_cnt)
    848				goto out_no_cache;
    849
    850			*ip += intel_pt_insn->length;
    851
    852			if (to_ip && *ip == to_ip) {
    853				intel_pt_insn->length = 0;
    854				goto out_no_cache;
    855			}
    856
    857			if (*ip >= al.map->end)
    858				break;
    859
    860			offset += intel_pt_insn->length;
    861		}
    862		one_map = false;
    863	}
    864out:
    865	*insn_cnt_ptr = insn_cnt;
    866
    867	if (!one_map)
    868		goto out_no_cache;
    869
    870	/*
    871	 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
    872	 * entries.
    873	 */
    874	if (to_ip) {
    875		struct intel_pt_cache_entry *e;
    876
    877		e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
    878		if (e)
    879			return 0;
    880	}
    881
    882	/* Ignore cache errors */
    883	intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
    884			   *ip - start_ip, intel_pt_insn);
    885
    886	return 0;
    887
    888out_no_cache:
    889	*insn_cnt_ptr = insn_cnt;
    890	return 0;
    891}
    892
    893static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
    894				  uint64_t offset, const char *filename)
    895{
    896	struct addr_filter *filt;
    897	bool have_filter   = false;
    898	bool hit_tracestop = false;
    899	bool hit_filter    = false;
    900
    901	list_for_each_entry(filt, &pt->filts.head, list) {
    902		if (filt->start)
    903			have_filter = true;
    904
    905		if ((filename && !filt->filename) ||
    906		    (!filename && filt->filename) ||
    907		    (filename && strcmp(filename, filt->filename)))
    908			continue;
    909
    910		if (!(offset >= filt->addr && offset < filt->addr + filt->size))
    911			continue;
    912
    913		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
    914			     ip, offset, filename ? filename : "[kernel]",
    915			     filt->start ? "filter" : "stop",
    916			     filt->addr, filt->size);
    917
    918		if (filt->start)
    919			hit_filter = true;
    920		else
    921			hit_tracestop = true;
    922	}
    923
    924	if (!hit_tracestop && !hit_filter)
    925		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
    926			     ip, offset, filename ? filename : "[kernel]");
    927
    928	return hit_tracestop || (have_filter && !hit_filter);
    929}
    930
    931static int __intel_pt_pgd_ip(uint64_t ip, void *data)
    932{
    933	struct intel_pt_queue *ptq = data;
    934	struct thread *thread;
    935	struct addr_location al;
    936	u8 cpumode;
    937	u64 offset;
    938
    939	if (ptq->state->to_nr) {
    940		if (intel_pt_guest_kernel_ip(ip))
    941			return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
    942		/* No support for decoding guest user space */
    943		return -EINVAL;
    944	} else if (ip >= ptq->pt->kernel_start) {
    945		return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
    946	}
    947
    948	cpumode = PERF_RECORD_MISC_USER;
    949
    950	thread = ptq->thread;
    951	if (!thread)
    952		return -EINVAL;
    953
    954	if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
    955		return -EINVAL;
    956
    957	offset = al.map->map_ip(al.map, ip);
    958
    959	return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
    960				     al.map->dso->long_name);
    961}
    962
    963static bool intel_pt_pgd_ip(uint64_t ip, void *data)
    964{
    965	return __intel_pt_pgd_ip(ip, data) > 0;
    966}
    967
    968static bool intel_pt_get_config(struct intel_pt *pt,
    969				struct perf_event_attr *attr, u64 *config)
    970{
    971	if (attr->type == pt->pmu_type) {
    972		if (config)
    973			*config = attr->config;
    974		return true;
    975	}
    976
    977	return false;
    978}
    979
    980static bool intel_pt_exclude_kernel(struct intel_pt *pt)
    981{
    982	struct evsel *evsel;
    983
    984	evlist__for_each_entry(pt->session->evlist, evsel) {
    985		if (intel_pt_get_config(pt, &evsel->core.attr, NULL) &&
    986		    !evsel->core.attr.exclude_kernel)
    987			return false;
    988	}
    989	return true;
    990}
    991
    992static bool intel_pt_return_compression(struct intel_pt *pt)
    993{
    994	struct evsel *evsel;
    995	u64 config;
    996
    997	if (!pt->noretcomp_bit)
    998		return true;
    999
   1000	evlist__for_each_entry(pt->session->evlist, evsel) {
   1001		if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
   1002		    (config & pt->noretcomp_bit))
   1003			return false;
   1004	}
   1005	return true;
   1006}
   1007
   1008static bool intel_pt_branch_enable(struct intel_pt *pt)
   1009{
   1010	struct evsel *evsel;
   1011	u64 config;
   1012
   1013	evlist__for_each_entry(pt->session->evlist, evsel) {
   1014		if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
   1015		    (config & INTEL_PT_CFG_PASS_THRU) &&
   1016		    !(config & INTEL_PT_CFG_BRANCH_EN))
   1017			return false;
   1018	}
   1019	return true;
   1020}
   1021
   1022static bool intel_pt_disabled_tnt(struct intel_pt *pt)
   1023{
   1024	struct evsel *evsel;
   1025	u64 config;
   1026
   1027	evlist__for_each_entry(pt->session->evlist, evsel) {
   1028		if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
   1029		    config & INTEL_PT_CFG_TNT_DIS)
   1030			return true;
   1031	}
   1032	return false;
   1033}
   1034
   1035static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
   1036{
   1037	struct evsel *evsel;
   1038	unsigned int shift;
   1039	u64 config;
   1040
   1041	if (!pt->mtc_freq_bits)
   1042		return 0;
   1043
   1044	for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
   1045		config >>= 1;
   1046
   1047	evlist__for_each_entry(pt->session->evlist, evsel) {
   1048		if (intel_pt_get_config(pt, &evsel->core.attr, &config))
   1049			return (config & pt->mtc_freq_bits) >> shift;
   1050	}
   1051	return 0;
   1052}
   1053
   1054static bool intel_pt_timeless_decoding(struct intel_pt *pt)
   1055{
   1056	struct evsel *evsel;
   1057	bool timeless_decoding = true;
   1058	u64 config;
   1059
   1060	if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding)
   1061		return true;
   1062
   1063	evlist__for_each_entry(pt->session->evlist, evsel) {
   1064		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
   1065			return true;
   1066		if (intel_pt_get_config(pt, &evsel->core.attr, &config)) {
   1067			if (config & pt->tsc_bit)
   1068				timeless_decoding = false;
   1069			else
   1070				return true;
   1071		}
   1072	}
   1073	return timeless_decoding;
   1074}
   1075
   1076static bool intel_pt_tracing_kernel(struct intel_pt *pt)
   1077{
   1078	struct evsel *evsel;
   1079
   1080	evlist__for_each_entry(pt->session->evlist, evsel) {
   1081		if (intel_pt_get_config(pt, &evsel->core.attr, NULL) &&
   1082		    !evsel->core.attr.exclude_kernel)
   1083			return true;
   1084	}
   1085	return false;
   1086}
   1087
   1088static bool intel_pt_have_tsc(struct intel_pt *pt)
   1089{
   1090	struct evsel *evsel;
   1091	bool have_tsc = false;
   1092	u64 config;
   1093
   1094	if (!pt->tsc_bit)
   1095		return false;
   1096
   1097	evlist__for_each_entry(pt->session->evlist, evsel) {
   1098		if (intel_pt_get_config(pt, &evsel->core.attr, &config)) {
   1099			if (config & pt->tsc_bit)
   1100				have_tsc = true;
   1101			else
   1102				return false;
   1103		}
   1104	}
   1105	return have_tsc;
   1106}
   1107
   1108static bool intel_pt_have_mtc(struct intel_pt *pt)
   1109{
   1110	struct evsel *evsel;
   1111	u64 config;
   1112
   1113	evlist__for_each_entry(pt->session->evlist, evsel) {
   1114		if (intel_pt_get_config(pt, &evsel->core.attr, &config) &&
   1115		    (config & pt->mtc_bit))
   1116			return true;
   1117	}
   1118	return false;
   1119}
   1120
   1121static bool intel_pt_sampling_mode(struct intel_pt *pt)
   1122{
   1123	struct evsel *evsel;
   1124
   1125	evlist__for_each_entry(pt->session->evlist, evsel) {
   1126		if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) &&
   1127		    evsel->core.attr.aux_sample_size)
   1128			return true;
   1129	}
   1130	return false;
   1131}
   1132
   1133static u64 intel_pt_ctl(struct intel_pt *pt)
   1134{
   1135	struct evsel *evsel;
   1136	u64 config;
   1137
   1138	evlist__for_each_entry(pt->session->evlist, evsel) {
   1139		if (intel_pt_get_config(pt, &evsel->core.attr, &config))
   1140			return config;
   1141	}
   1142	return 0;
   1143}
   1144
   1145static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
   1146{
   1147	u64 quot, rem;
   1148
   1149	quot = ns / pt->tc.time_mult;
   1150	rem  = ns % pt->tc.time_mult;
   1151	return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
   1152		pt->tc.time_mult;
   1153}
   1154
   1155static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt)
   1156{
   1157	size_t sz = sizeof(struct ip_callchain);
   1158
   1159	/* Add 1 to callchain_sz for callchain context */
   1160	sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
   1161	return zalloc(sz);
   1162}
   1163
   1164static int intel_pt_callchain_init(struct intel_pt *pt)
   1165{
   1166	struct evsel *evsel;
   1167
   1168	evlist__for_each_entry(pt->session->evlist, evsel) {
   1169		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN))
   1170			evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN;
   1171	}
   1172
   1173	pt->chain = intel_pt_alloc_chain(pt);
   1174	if (!pt->chain)
   1175		return -ENOMEM;
   1176
   1177	return 0;
   1178}
   1179
   1180static void intel_pt_add_callchain(struct intel_pt *pt,
   1181				   struct perf_sample *sample)
   1182{
   1183	struct thread *thread = machine__findnew_thread(pt->machine,
   1184							sample->pid,
   1185							sample->tid);
   1186
   1187	thread_stack__sample_late(thread, sample->cpu, pt->chain,
   1188				  pt->synth_opts.callchain_sz + 1, sample->ip,
   1189				  pt->kernel_start);
   1190
   1191	sample->callchain = pt->chain;
   1192}
   1193
   1194static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt)
   1195{
   1196	size_t sz = sizeof(struct branch_stack);
   1197
   1198	sz += entry_cnt * sizeof(struct branch_entry);
   1199	return zalloc(sz);
   1200}
   1201
   1202static int intel_pt_br_stack_init(struct intel_pt *pt)
   1203{
   1204	struct evsel *evsel;
   1205
   1206	evlist__for_each_entry(pt->session->evlist, evsel) {
   1207		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
   1208			evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
   1209	}
   1210
   1211	pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz);
   1212	if (!pt->br_stack)
   1213		return -ENOMEM;
   1214
   1215	return 0;
   1216}
   1217
   1218static void intel_pt_add_br_stack(struct intel_pt *pt,
   1219				  struct perf_sample *sample)
   1220{
   1221	struct thread *thread = machine__findnew_thread(pt->machine,
   1222							sample->pid,
   1223							sample->tid);
   1224
   1225	thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
   1226				     pt->br_stack_sz, sample->ip,
   1227				     pt->kernel_start);
   1228
   1229	sample->branch_stack = pt->br_stack;
   1230}
   1231
   1232/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
   1233#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
   1234
   1235static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
   1236						   unsigned int queue_nr)
   1237{
   1238	struct intel_pt_params params = { .get_trace = 0, };
   1239	struct perf_env *env = pt->machine->env;
   1240	struct intel_pt_queue *ptq;
   1241
   1242	ptq = zalloc(sizeof(struct intel_pt_queue));
   1243	if (!ptq)
   1244		return NULL;
   1245
   1246	if (pt->synth_opts.callchain) {
   1247		ptq->chain = intel_pt_alloc_chain(pt);
   1248		if (!ptq->chain)
   1249			goto out_free;
   1250	}
   1251
   1252	if (pt->synth_opts.last_branch || pt->synth_opts.other_events) {
   1253		unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz);
   1254
   1255		ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt);
   1256		if (!ptq->last_branch)
   1257			goto out_free;
   1258	}
   1259
   1260	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
   1261	if (!ptq->event_buf)
   1262		goto out_free;
   1263
   1264	ptq->pt = pt;
   1265	ptq->queue_nr = queue_nr;
   1266	ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
   1267	ptq->pid = -1;
   1268	ptq->tid = -1;
   1269	ptq->cpu = -1;
   1270	ptq->next_tid = -1;
   1271
   1272	params.get_trace = intel_pt_get_trace;
   1273	params.walk_insn = intel_pt_walk_next_insn;
   1274	params.lookahead = intel_pt_lookahead;
   1275	params.findnew_vmcs_info = intel_pt_findnew_vmcs_info;
   1276	params.data = ptq;
   1277	params.return_compression = intel_pt_return_compression(pt);
   1278	params.branch_enable = intel_pt_branch_enable(pt);
   1279	params.ctl = intel_pt_ctl(pt);
   1280	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
   1281	params.mtc_period = intel_pt_mtc_period(pt);
   1282	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
   1283	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
   1284	params.quick = pt->synth_opts.quick;
   1285	params.vm_time_correlation = pt->synth_opts.vm_time_correlation;
   1286	params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run;
   1287	params.first_timestamp = pt->first_timestamp;
   1288	params.max_loops = pt->max_loops;
   1289
   1290	/* Cannot walk code without TNT, so force 'quick' mode */
   1291	if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick)
   1292		params.quick = 1;
   1293
   1294	if (pt->filts.cnt > 0)
   1295		params.pgd_ip = intel_pt_pgd_ip;
   1296
   1297	if (pt->synth_opts.instructions) {
   1298		if (pt->synth_opts.period) {
   1299			switch (pt->synth_opts.period_type) {
   1300			case PERF_ITRACE_PERIOD_INSTRUCTIONS:
   1301				params.period_type =
   1302						INTEL_PT_PERIOD_INSTRUCTIONS;
   1303				params.period = pt->synth_opts.period;
   1304				break;
   1305			case PERF_ITRACE_PERIOD_TICKS:
   1306				params.period_type = INTEL_PT_PERIOD_TICKS;
   1307				params.period = pt->synth_opts.period;
   1308				break;
   1309			case PERF_ITRACE_PERIOD_NANOSECS:
   1310				params.period_type = INTEL_PT_PERIOD_TICKS;
   1311				params.period = intel_pt_ns_to_ticks(pt,
   1312							pt->synth_opts.period);
   1313				break;
   1314			default:
   1315				break;
   1316			}
   1317		}
   1318
   1319		if (!params.period) {
   1320			params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
   1321			params.period = 1;
   1322		}
   1323	}
   1324
   1325	if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
   1326		params.flags |= INTEL_PT_FUP_WITH_NLIP;
   1327
   1328	ptq->decoder = intel_pt_decoder_new(&params);
   1329	if (!ptq->decoder)
   1330		goto out_free;
   1331
   1332	return ptq;
   1333
   1334out_free:
   1335	zfree(&ptq->event_buf);
   1336	zfree(&ptq->last_branch);
   1337	zfree(&ptq->chain);
   1338	free(ptq);
   1339	return NULL;
   1340}
   1341
   1342static void intel_pt_free_queue(void *priv)
   1343{
   1344	struct intel_pt_queue *ptq = priv;
   1345
   1346	if (!ptq)
   1347		return;
   1348	thread__zput(ptq->thread);
   1349	thread__zput(ptq->guest_thread);
   1350	thread__zput(ptq->unknown_guest_thread);
   1351	intel_pt_decoder_free(ptq->decoder);
   1352	zfree(&ptq->event_buf);
   1353	zfree(&ptq->last_branch);
   1354	zfree(&ptq->chain);
   1355	free(ptq);
   1356}
   1357
   1358static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp)
   1359{
   1360	unsigned int i;
   1361
   1362	pt->first_timestamp = timestamp;
   1363
   1364	for (i = 0; i < pt->queues.nr_queues; i++) {
   1365		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
   1366		struct intel_pt_queue *ptq = queue->priv;
   1367
   1368		if (ptq && ptq->decoder)
   1369			intel_pt_set_first_timestamp(ptq->decoder, timestamp);
   1370	}
   1371}
   1372
   1373static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
   1374				     struct auxtrace_queue *queue)
   1375{
   1376	struct intel_pt_queue *ptq = queue->priv;
   1377
   1378	if (queue->tid == -1 || pt->have_sched_switch) {
   1379		ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
   1380		if (ptq->tid == -1)
   1381			ptq->pid = -1;
   1382		thread__zput(ptq->thread);
   1383	}
   1384
   1385	if (!ptq->thread && ptq->tid != -1)
   1386		ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
   1387
   1388	if (ptq->thread) {
   1389		ptq->pid = ptq->thread->pid_;
   1390		if (queue->cpu == -1)
   1391			ptq->cpu = ptq->thread->cpu;
   1392	}
   1393}
   1394
   1395static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
   1396{
   1397	struct intel_pt *pt = ptq->pt;
   1398
   1399	ptq->insn_len = 0;
   1400	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
   1401		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
   1402	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
   1403		if (!ptq->state->to_ip)
   1404			ptq->flags = PERF_IP_FLAG_BRANCH |
   1405				     PERF_IP_FLAG_TRACE_END;
   1406		else if (ptq->state->from_nr && !ptq->state->to_nr)
   1407			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
   1408				     PERF_IP_FLAG_VMEXIT;
   1409		else
   1410			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
   1411				     PERF_IP_FLAG_ASYNC |
   1412				     PERF_IP_FLAG_INTERRUPT;
   1413	} else {
   1414		if (ptq->state->from_ip)
   1415			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
   1416		else
   1417			ptq->flags = PERF_IP_FLAG_BRANCH |
   1418				     PERF_IP_FLAG_TRACE_BEGIN;
   1419		if (ptq->state->flags & INTEL_PT_IN_TX)
   1420			ptq->flags |= PERF_IP_FLAG_IN_TX;
   1421		ptq->insn_len = ptq->state->insn_len;
   1422		memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
   1423	}
   1424
   1425	if (ptq->state->type & INTEL_PT_TRACE_BEGIN)
   1426		ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN;
   1427	if (ptq->state->type & INTEL_PT_TRACE_END)
   1428		ptq->flags |= PERF_IP_FLAG_TRACE_END;
   1429
   1430	if (pt->cap_event_trace) {
   1431		if (ptq->state->type & INTEL_PT_IFLAG_CHG) {
   1432			if (!ptq->state->from_iflag)
   1433				ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
   1434			if (ptq->state->from_iflag != ptq->state->to_iflag)
   1435				ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE;
   1436		} else if (!ptq->state->to_iflag) {
   1437			ptq->flags |= PERF_IP_FLAG_INTR_DISABLE;
   1438		}
   1439	}
   1440}
   1441
   1442static void intel_pt_setup_time_range(struct intel_pt *pt,
   1443				      struct intel_pt_queue *ptq)
   1444{
   1445	if (!pt->range_cnt)
   1446		return;
   1447
   1448	ptq->sel_timestamp = pt->time_ranges[0].start;
   1449	ptq->sel_idx = 0;
   1450
   1451	if (ptq->sel_timestamp) {
   1452		ptq->sel_start = true;
   1453	} else {
   1454		ptq->sel_timestamp = pt->time_ranges[0].end;
   1455		ptq->sel_start = false;
   1456	}
   1457}
   1458
   1459static int intel_pt_setup_queue(struct intel_pt *pt,
   1460				struct auxtrace_queue *queue,
   1461				unsigned int queue_nr)
   1462{
   1463	struct intel_pt_queue *ptq = queue->priv;
   1464
   1465	if (list_empty(&queue->head))
   1466		return 0;
   1467
   1468	if (!ptq) {
   1469		ptq = intel_pt_alloc_queue(pt, queue_nr);
   1470		if (!ptq)
   1471			return -ENOMEM;
   1472		queue->priv = ptq;
   1473
   1474		if (queue->cpu != -1)
   1475			ptq->cpu = queue->cpu;
   1476		ptq->tid = queue->tid;
   1477
   1478		ptq->cbr_seen = UINT_MAX;
   1479
   1480		if (pt->sampling_mode && !pt->snapshot_mode &&
   1481		    pt->timeless_decoding)
   1482			ptq->step_through_buffers = true;
   1483
   1484		ptq->sync_switch = pt->sync_switch;
   1485
   1486		intel_pt_setup_time_range(pt, ptq);
   1487	}
   1488
   1489	if (!ptq->on_heap &&
   1490	    (!ptq->sync_switch ||
   1491	     ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
   1492		const struct intel_pt_state *state;
   1493		int ret;
   1494
   1495		if (pt->timeless_decoding)
   1496			return 0;
   1497
   1498		intel_pt_log("queue %u getting timestamp\n", queue_nr);
   1499		intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
   1500			     queue_nr, ptq->cpu, ptq->pid, ptq->tid);
   1501
   1502		if (ptq->sel_start && ptq->sel_timestamp) {
   1503			ret = intel_pt_fast_forward(ptq->decoder,
   1504						    ptq->sel_timestamp);
   1505			if (ret)
   1506				return ret;
   1507		}
   1508
   1509		while (1) {
   1510			state = intel_pt_decode(ptq->decoder);
   1511			if (state->err) {
   1512				if (state->err == INTEL_PT_ERR_NODATA) {
   1513					intel_pt_log("queue %u has no timestamp\n",
   1514						     queue_nr);
   1515					return 0;
   1516				}
   1517				continue;
   1518			}
   1519			if (state->timestamp)
   1520				break;
   1521		}
   1522
   1523		ptq->timestamp = state->timestamp;
   1524		intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
   1525			     queue_nr, ptq->timestamp);
   1526		ptq->state = state;
   1527		ptq->have_sample = true;
   1528		if (ptq->sel_start && ptq->sel_timestamp &&
   1529		    ptq->timestamp < ptq->sel_timestamp)
   1530			ptq->have_sample = false;
   1531		intel_pt_sample_flags(ptq);
   1532		ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
   1533		if (ret)
   1534			return ret;
   1535		ptq->on_heap = true;
   1536	}
   1537
   1538	return 0;
   1539}
   1540
   1541static int intel_pt_setup_queues(struct intel_pt *pt)
   1542{
   1543	unsigned int i;
   1544	int ret;
   1545
   1546	for (i = 0; i < pt->queues.nr_queues; i++) {
   1547		ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
   1548		if (ret)
   1549			return ret;
   1550	}
   1551	return 0;
   1552}
   1553
   1554static inline bool intel_pt_skip_event(struct intel_pt *pt)
   1555{
   1556	return pt->synth_opts.initial_skip &&
   1557	       pt->num_events++ < pt->synth_opts.initial_skip;
   1558}
   1559
   1560/*
   1561 * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
   1562 * Also ensure CBR is first non-skipped event by allowing for 4 more samples
   1563 * from this decoder state.
   1564 */
   1565static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt)
   1566{
   1567	return pt->synth_opts.initial_skip &&
   1568	       pt->num_events + 4 < pt->synth_opts.initial_skip;
   1569}
   1570
   1571static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
   1572				   union perf_event *event,
   1573				   struct perf_sample *sample)
   1574{
   1575	event->sample.header.type = PERF_RECORD_SAMPLE;
   1576	event->sample.header.size = sizeof(struct perf_event_header);
   1577
   1578	sample->pid = ptq->pid;
   1579	sample->tid = ptq->tid;
   1580	sample->cpu = ptq->cpu;
   1581	sample->insn_len = ptq->insn_len;
   1582	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
   1583}
   1584
   1585static void intel_pt_prep_b_sample(struct intel_pt *pt,
   1586				   struct intel_pt_queue *ptq,
   1587				   union perf_event *event,
   1588				   struct perf_sample *sample)
   1589{
   1590	intel_pt_prep_a_sample(ptq, event, sample);
   1591
   1592	if (!pt->timeless_decoding)
   1593		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
   1594
   1595	sample->ip = ptq->state->from_ip;
   1596	sample->addr = ptq->state->to_ip;
   1597	sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr);
   1598	sample->period = 1;
   1599	sample->flags = ptq->flags;
   1600
   1601	event->sample.header.misc = sample->cpumode;
   1602}
   1603
   1604static int intel_pt_inject_event(union perf_event *event,
   1605				 struct perf_sample *sample, u64 type)
   1606{
   1607	event->header.size = perf_event__sample_event_size(sample, type, 0);
   1608	return perf_event__synthesize_sample(event, type, 0, sample);
   1609}
   1610
   1611static inline int intel_pt_opt_inject(struct intel_pt *pt,
   1612				      union perf_event *event,
   1613				      struct perf_sample *sample, u64 type)
   1614{
   1615	if (!pt->synth_opts.inject)
   1616		return 0;
   1617
   1618	return intel_pt_inject_event(event, sample, type);
   1619}
   1620
   1621static int intel_pt_deliver_synth_event(struct intel_pt *pt,
   1622					union perf_event *event,
   1623					struct perf_sample *sample, u64 type)
   1624{
   1625	int ret;
   1626
   1627	ret = intel_pt_opt_inject(pt, event, sample, type);
   1628	if (ret)
   1629		return ret;
   1630
   1631	ret = perf_session__deliver_synth_event(pt->session, event, sample);
   1632	if (ret)
   1633		pr_err("Intel PT: failed to deliver event, error %d\n", ret);
   1634
   1635	return ret;
   1636}
   1637
   1638static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
   1639{
   1640	struct intel_pt *pt = ptq->pt;
   1641	union perf_event *event = ptq->event_buf;
   1642	struct perf_sample sample = { .ip = 0, };
   1643	struct dummy_branch_stack {
   1644		u64			nr;
   1645		u64			hw_idx;
   1646		struct branch_entry	entries;
   1647	} dummy_bs;
   1648
   1649	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
   1650		return 0;
   1651
   1652	if (intel_pt_skip_event(pt))
   1653		return 0;
   1654
   1655	intel_pt_prep_b_sample(pt, ptq, event, &sample);
   1656
   1657	sample.id = ptq->pt->branches_id;
   1658	sample.stream_id = ptq->pt->branches_id;
   1659
   1660	/*
   1661	 * perf report cannot handle events without a branch stack when using
   1662	 * SORT_MODE__BRANCH so make a dummy one.
   1663	 */
   1664	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
   1665		dummy_bs = (struct dummy_branch_stack){
   1666			.nr = 1,
   1667			.hw_idx = -1ULL,
   1668			.entries = {
   1669				.from = sample.ip,
   1670				.to = sample.addr,
   1671			},
   1672		};
   1673		sample.branch_stack = (struct branch_stack *)&dummy_bs;
   1674	}
   1675
   1676	if (ptq->sample_ipc)
   1677		sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
   1678	if (sample.cyc_cnt) {
   1679		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
   1680		ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
   1681		ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
   1682	}
   1683
   1684	return intel_pt_deliver_synth_event(pt, event, &sample,
   1685					    pt->branches_sample_type);
   1686}
   1687
   1688static void intel_pt_prep_sample(struct intel_pt *pt,
   1689				 struct intel_pt_queue *ptq,
   1690				 union perf_event *event,
   1691				 struct perf_sample *sample)
   1692{
   1693	intel_pt_prep_b_sample(pt, ptq, event, sample);
   1694
   1695	if (pt->synth_opts.callchain) {
   1696		thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
   1697				     pt->synth_opts.callchain_sz + 1,
   1698				     sample->ip, pt->kernel_start);
   1699		sample->callchain = ptq->chain;
   1700	}
   1701
   1702	if (pt->synth_opts.last_branch) {
   1703		thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch,
   1704					pt->br_stack_sz);
   1705		sample->branch_stack = ptq->last_branch;
   1706	}
   1707}
   1708
   1709static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
   1710{
   1711	struct intel_pt *pt = ptq->pt;
   1712	union perf_event *event = ptq->event_buf;
   1713	struct perf_sample sample = { .ip = 0, };
   1714
   1715	if (intel_pt_skip_event(pt))
   1716		return 0;
   1717
   1718	intel_pt_prep_sample(pt, ptq, event, &sample);
   1719
   1720	sample.id = ptq->pt->instructions_id;
   1721	sample.stream_id = ptq->pt->instructions_id;
   1722	if (pt->synth_opts.quick)
   1723		sample.period = 1;
   1724	else
   1725		sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
   1726
   1727	if (ptq->sample_ipc)
   1728		sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
   1729	if (sample.cyc_cnt) {
   1730		sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
   1731		ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
   1732		ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
   1733	}
   1734
   1735	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
   1736
   1737	return intel_pt_deliver_synth_event(pt, event, &sample,
   1738					    pt->instructions_sample_type);
   1739}
   1740
   1741static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
   1742{
   1743	struct intel_pt *pt = ptq->pt;
   1744	union perf_event *event = ptq->event_buf;
   1745	struct perf_sample sample = { .ip = 0, };
   1746
   1747	if (intel_pt_skip_event(pt))
   1748		return 0;
   1749
   1750	intel_pt_prep_sample(pt, ptq, event, &sample);
   1751
   1752	sample.id = ptq->pt->transactions_id;
   1753	sample.stream_id = ptq->pt->transactions_id;
   1754
   1755	return intel_pt_deliver_synth_event(pt, event, &sample,
   1756					    pt->transactions_sample_type);
   1757}
   1758
   1759static void intel_pt_prep_p_sample(struct intel_pt *pt,
   1760				   struct intel_pt_queue *ptq,
   1761				   union perf_event *event,
   1762				   struct perf_sample *sample)
   1763{
   1764	intel_pt_prep_sample(pt, ptq, event, sample);
   1765
   1766	/*
   1767	 * Zero IP is used to mean "trace start" but that is not the case for
   1768	 * power or PTWRITE events with no IP, so clear the flags.
   1769	 */
   1770	if (!sample->ip)
   1771		sample->flags = 0;
   1772}
   1773
   1774static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
   1775{
   1776	struct intel_pt *pt = ptq->pt;
   1777	union perf_event *event = ptq->event_buf;
   1778	struct perf_sample sample = { .ip = 0, };
   1779	struct perf_synth_intel_ptwrite raw;
   1780
   1781	if (intel_pt_skip_event(pt))
   1782		return 0;
   1783
   1784	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   1785
   1786	sample.id = ptq->pt->ptwrites_id;
   1787	sample.stream_id = ptq->pt->ptwrites_id;
   1788
   1789	raw.flags = 0;
   1790	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
   1791	raw.payload = cpu_to_le64(ptq->state->ptw_payload);
   1792
   1793	sample.raw_size = perf_synth__raw_size(raw);
   1794	sample.raw_data = perf_synth__raw_data(&raw);
   1795
   1796	return intel_pt_deliver_synth_event(pt, event, &sample,
   1797					    pt->ptwrites_sample_type);
   1798}
   1799
   1800static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
   1801{
   1802	struct intel_pt *pt = ptq->pt;
   1803	union perf_event *event = ptq->event_buf;
   1804	struct perf_sample sample = { .ip = 0, };
   1805	struct perf_synth_intel_cbr raw;
   1806	u32 flags;
   1807
   1808	if (intel_pt_skip_cbr_event(pt))
   1809		return 0;
   1810
   1811	ptq->cbr_seen = ptq->state->cbr;
   1812
   1813	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   1814
   1815	sample.id = ptq->pt->cbr_id;
   1816	sample.stream_id = ptq->pt->cbr_id;
   1817
   1818	flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
   1819	raw.flags = cpu_to_le32(flags);
   1820	raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
   1821	raw.reserved3 = 0;
   1822
   1823	sample.raw_size = perf_synth__raw_size(raw);
   1824	sample.raw_data = perf_synth__raw_data(&raw);
   1825
   1826	return intel_pt_deliver_synth_event(pt, event, &sample,
   1827					    pt->pwr_events_sample_type);
   1828}
   1829
   1830static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq)
   1831{
   1832	struct intel_pt *pt = ptq->pt;
   1833	union perf_event *event = ptq->event_buf;
   1834	struct perf_sample sample = { .ip = 0, };
   1835	struct perf_synth_intel_psb raw;
   1836
   1837	if (intel_pt_skip_event(pt))
   1838		return 0;
   1839
   1840	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   1841
   1842	sample.id = ptq->pt->psb_id;
   1843	sample.stream_id = ptq->pt->psb_id;
   1844	sample.flags = 0;
   1845
   1846	raw.reserved = 0;
   1847	raw.offset = ptq->state->psb_offset;
   1848
   1849	sample.raw_size = perf_synth__raw_size(raw);
   1850	sample.raw_data = perf_synth__raw_data(&raw);
   1851
   1852	return intel_pt_deliver_synth_event(pt, event, &sample,
   1853					    pt->pwr_events_sample_type);
   1854}
   1855
   1856static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
   1857{
   1858	struct intel_pt *pt = ptq->pt;
   1859	union perf_event *event = ptq->event_buf;
   1860	struct perf_sample sample = { .ip = 0, };
   1861	struct perf_synth_intel_mwait raw;
   1862
   1863	if (intel_pt_skip_event(pt))
   1864		return 0;
   1865
   1866	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   1867
   1868	sample.id = ptq->pt->mwait_id;
   1869	sample.stream_id = ptq->pt->mwait_id;
   1870
   1871	raw.reserved = 0;
   1872	raw.payload = cpu_to_le64(ptq->state->mwait_payload);
   1873
   1874	sample.raw_size = perf_synth__raw_size(raw);
   1875	sample.raw_data = perf_synth__raw_data(&raw);
   1876
   1877	return intel_pt_deliver_synth_event(pt, event, &sample,
   1878					    pt->pwr_events_sample_type);
   1879}
   1880
   1881static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
   1882{
   1883	struct intel_pt *pt = ptq->pt;
   1884	union perf_event *event = ptq->event_buf;
   1885	struct perf_sample sample = { .ip = 0, };
   1886	struct perf_synth_intel_pwre raw;
   1887
   1888	if (intel_pt_skip_event(pt))
   1889		return 0;
   1890
   1891	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   1892
   1893	sample.id = ptq->pt->pwre_id;
   1894	sample.stream_id = ptq->pt->pwre_id;
   1895
   1896	raw.reserved = 0;
   1897	raw.payload = cpu_to_le64(ptq->state->pwre_payload);
   1898
   1899	sample.raw_size = perf_synth__raw_size(raw);
   1900	sample.raw_data = perf_synth__raw_data(&raw);
   1901
   1902	return intel_pt_deliver_synth_event(pt, event, &sample,
   1903					    pt->pwr_events_sample_type);
   1904}
   1905
   1906static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
   1907{
   1908	struct intel_pt *pt = ptq->pt;
   1909	union perf_event *event = ptq->event_buf;
   1910	struct perf_sample sample = { .ip = 0, };
   1911	struct perf_synth_intel_exstop raw;
   1912
   1913	if (intel_pt_skip_event(pt))
   1914		return 0;
   1915
   1916	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   1917
   1918	sample.id = ptq->pt->exstop_id;
   1919	sample.stream_id = ptq->pt->exstop_id;
   1920
   1921	raw.flags = 0;
   1922	raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
   1923
   1924	sample.raw_size = perf_synth__raw_size(raw);
   1925	sample.raw_data = perf_synth__raw_data(&raw);
   1926
   1927	return intel_pt_deliver_synth_event(pt, event, &sample,
   1928					    pt->pwr_events_sample_type);
   1929}
   1930
   1931static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
   1932{
   1933	struct intel_pt *pt = ptq->pt;
   1934	union perf_event *event = ptq->event_buf;
   1935	struct perf_sample sample = { .ip = 0, };
   1936	struct perf_synth_intel_pwrx raw;
   1937
   1938	if (intel_pt_skip_event(pt))
   1939		return 0;
   1940
   1941	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   1942
   1943	sample.id = ptq->pt->pwrx_id;
   1944	sample.stream_id = ptq->pt->pwrx_id;
   1945
   1946	raw.reserved = 0;
   1947	raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
   1948
   1949	sample.raw_size = perf_synth__raw_size(raw);
   1950	sample.raw_data = perf_synth__raw_data(&raw);
   1951
   1952	return intel_pt_deliver_synth_event(pt, event, &sample,
   1953					    pt->pwr_events_sample_type);
   1954}
   1955
   1956/*
   1957 * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
   1958 * intel_pt_add_gp_regs().
   1959 */
   1960static const int pebs_gp_regs[] = {
   1961	[PERF_REG_X86_FLAGS]	= 1,
   1962	[PERF_REG_X86_IP]	= 2,
   1963	[PERF_REG_X86_AX]	= 3,
   1964	[PERF_REG_X86_CX]	= 4,
   1965	[PERF_REG_X86_DX]	= 5,
   1966	[PERF_REG_X86_BX]	= 6,
   1967	[PERF_REG_X86_SP]	= 7,
   1968	[PERF_REG_X86_BP]	= 8,
   1969	[PERF_REG_X86_SI]	= 9,
   1970	[PERF_REG_X86_DI]	= 10,
   1971	[PERF_REG_X86_R8]	= 11,
   1972	[PERF_REG_X86_R9]	= 12,
   1973	[PERF_REG_X86_R10]	= 13,
   1974	[PERF_REG_X86_R11]	= 14,
   1975	[PERF_REG_X86_R12]	= 15,
   1976	[PERF_REG_X86_R13]	= 16,
   1977	[PERF_REG_X86_R14]	= 17,
   1978	[PERF_REG_X86_R15]	= 18,
   1979};
   1980
   1981static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
   1982				 const struct intel_pt_blk_items *items,
   1983				 u64 regs_mask)
   1984{
   1985	const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
   1986	u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
   1987	u32 bit;
   1988	int i;
   1989
   1990	for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
   1991		/* Get the PEBS gp_regs array index */
   1992		int n = pebs_gp_regs[i] - 1;
   1993
   1994		if (n < 0)
   1995			continue;
   1996		/*
   1997		 * Add only registers that were requested (i.e. 'regs_mask') and
   1998		 * that were provided (i.e. 'mask'), and update the resulting
   1999		 * mask (i.e. 'intr_regs->mask') accordingly.
   2000		 */
   2001		if (mask & 1 << n && regs_mask & bit) {
   2002			intr_regs->mask |= bit;
   2003			*pos++ = gp_regs[n];
   2004		}
   2005	}
   2006
   2007	return pos;
   2008}
   2009
   2010#ifndef PERF_REG_X86_XMM0
   2011#define PERF_REG_X86_XMM0 32
   2012#endif
   2013
   2014static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
   2015			     const struct intel_pt_blk_items *items,
   2016			     u64 regs_mask)
   2017{
   2018	u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
   2019	const u64 *xmm = items->xmm;
   2020
   2021	/*
   2022	 * If there are any XMM registers, then there should be all of them.
   2023	 * Nevertheless, follow the logic to add only registers that were
   2024	 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
   2025	 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
   2026	 */
   2027	intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
   2028
   2029	for (; mask; mask >>= 1, xmm++) {
   2030		if (mask & 1)
   2031			*pos++ = *xmm;
   2032	}
   2033}
   2034
   2035#define LBR_INFO_MISPRED	(1ULL << 63)
   2036#define LBR_INFO_IN_TX		(1ULL << 62)
   2037#define LBR_INFO_ABORT		(1ULL << 61)
   2038#define LBR_INFO_CYCLES		0xffff
   2039
   2040/* Refer kernel's intel_pmu_store_pebs_lbrs() */
   2041static u64 intel_pt_lbr_flags(u64 info)
   2042{
   2043	union {
   2044		struct branch_flags flags;
   2045		u64 result;
   2046	} u;
   2047
   2048	u.result	  = 0;
   2049	u.flags.mispred	  = !!(info & LBR_INFO_MISPRED);
   2050	u.flags.predicted = !(info & LBR_INFO_MISPRED);
   2051	u.flags.in_tx	  = !!(info & LBR_INFO_IN_TX);
   2052	u.flags.abort	  = !!(info & LBR_INFO_ABORT);
   2053	u.flags.cycles	  = info & LBR_INFO_CYCLES;
   2054
   2055	return u.result;
   2056}
   2057
   2058static void intel_pt_add_lbrs(struct branch_stack *br_stack,
   2059			      const struct intel_pt_blk_items *items)
   2060{
   2061	u64 *to;
   2062	int i;
   2063
   2064	br_stack->nr = 0;
   2065
   2066	to = &br_stack->entries[0].from;
   2067
   2068	for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
   2069		u32 mask = items->mask[i];
   2070		const u64 *from = items->val[i];
   2071
   2072		for (; mask; mask >>= 3, from += 3) {
   2073			if ((mask & 7) == 7) {
   2074				*to++ = from[0];
   2075				*to++ = from[1];
   2076				*to++ = intel_pt_lbr_flags(from[2]);
   2077				br_stack->nr += 1;
   2078			}
   2079		}
   2080	}
   2081}
   2082
   2083static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
   2084{
   2085	const struct intel_pt_blk_items *items = &ptq->state->items;
   2086	struct perf_sample sample = { .ip = 0, };
   2087	union perf_event *event = ptq->event_buf;
   2088	struct intel_pt *pt = ptq->pt;
   2089	u64 sample_type = evsel->core.attr.sample_type;
   2090	u8 cpumode;
   2091	u64 regs[8 * sizeof(sample.intr_regs.mask)];
   2092
   2093	if (intel_pt_skip_event(pt))
   2094		return 0;
   2095
   2096	intel_pt_prep_a_sample(ptq, event, &sample);
   2097
   2098	sample.id = id;
   2099	sample.stream_id = id;
   2100
   2101	if (!evsel->core.attr.freq)
   2102		sample.period = evsel->core.attr.sample_period;
   2103
   2104	/* No support for non-zero CS base */
   2105	if (items->has_ip)
   2106		sample.ip = items->ip;
   2107	else if (items->has_rip)
   2108		sample.ip = items->rip;
   2109	else
   2110		sample.ip = ptq->state->from_ip;
   2111
   2112	cpumode = intel_pt_cpumode(ptq, sample.ip, 0);
   2113
   2114	event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
   2115
   2116	sample.cpumode = cpumode;
   2117
   2118	if (sample_type & PERF_SAMPLE_TIME) {
   2119		u64 timestamp = 0;
   2120
   2121		if (items->has_timestamp)
   2122			timestamp = items->timestamp;
   2123		else if (!pt->timeless_decoding)
   2124			timestamp = ptq->timestamp;
   2125		if (timestamp)
   2126			sample.time = tsc_to_perf_time(timestamp, &pt->tc);
   2127	}
   2128
   2129	if (sample_type & PERF_SAMPLE_CALLCHAIN &&
   2130	    pt->synth_opts.callchain) {
   2131		thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
   2132				     pt->synth_opts.callchain_sz, sample.ip,
   2133				     pt->kernel_start);
   2134		sample.callchain = ptq->chain;
   2135	}
   2136
   2137	if (sample_type & PERF_SAMPLE_REGS_INTR &&
   2138	    (items->mask[INTEL_PT_GP_REGS_POS] ||
   2139	     items->mask[INTEL_PT_XMM_POS])) {
   2140		u64 regs_mask = evsel->core.attr.sample_regs_intr;
   2141		u64 *pos;
   2142
   2143		sample.intr_regs.abi = items->is_32_bit ?
   2144				       PERF_SAMPLE_REGS_ABI_32 :
   2145				       PERF_SAMPLE_REGS_ABI_64;
   2146		sample.intr_regs.regs = regs;
   2147
   2148		pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
   2149
   2150		intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
   2151	}
   2152
   2153	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
   2154		if (items->mask[INTEL_PT_LBR_0_POS] ||
   2155		    items->mask[INTEL_PT_LBR_1_POS] ||
   2156		    items->mask[INTEL_PT_LBR_2_POS]) {
   2157			intel_pt_add_lbrs(ptq->last_branch, items);
   2158		} else if (pt->synth_opts.last_branch) {
   2159			thread_stack__br_sample(ptq->thread, ptq->cpu,
   2160						ptq->last_branch,
   2161						pt->br_stack_sz);
   2162		} else {
   2163			ptq->last_branch->nr = 0;
   2164		}
   2165		sample.branch_stack = ptq->last_branch;
   2166	}
   2167
   2168	if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
   2169		sample.addr = items->mem_access_address;
   2170
   2171	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
   2172		/*
   2173		 * Refer kernel's setup_pebs_adaptive_sample_data() and
   2174		 * intel_hsw_weight().
   2175		 */
   2176		if (items->has_mem_access_latency) {
   2177			u64 weight = items->mem_access_latency >> 32;
   2178
   2179			/*
   2180			 * Starts from SPR, the mem access latency field
   2181			 * contains both cache latency [47:32] and instruction
   2182			 * latency [15:0]. The cache latency is the same as the
   2183			 * mem access latency on previous platforms.
   2184			 *
   2185			 * In practice, no memory access could last than 4G
   2186			 * cycles. Use latency >> 32 to distinguish the
   2187			 * different format of the mem access latency field.
   2188			 */
   2189			if (weight > 0) {
   2190				sample.weight = weight & 0xffff;
   2191				sample.ins_lat = items->mem_access_latency & 0xffff;
   2192			} else
   2193				sample.weight = items->mem_access_latency;
   2194		}
   2195		if (!sample.weight && items->has_tsx_aux_info) {
   2196			/* Cycles last block */
   2197			sample.weight = (u32)items->tsx_aux_info;
   2198		}
   2199	}
   2200
   2201	if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
   2202		u64 ax = items->has_rax ? items->rax : 0;
   2203		/* Refer kernel's intel_hsw_transaction() */
   2204		u64 txn = (u8)(items->tsx_aux_info >> 32);
   2205
   2206		/* For RTM XABORTs also log the abort code from AX */
   2207		if (txn & PERF_TXN_TRANSACTION && ax & 1)
   2208			txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
   2209		sample.transaction = txn;
   2210	}
   2211
   2212	return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
   2213}
   2214
   2215static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
   2216{
   2217	struct intel_pt *pt = ptq->pt;
   2218	struct evsel *evsel = pt->pebs_evsel;
   2219	u64 id = evsel->core.id[0];
   2220
   2221	return intel_pt_do_synth_pebs_sample(ptq, evsel, id);
   2222}
   2223
   2224static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
   2225{
   2226	const struct intel_pt_blk_items *items = &ptq->state->items;
   2227	struct intel_pt_pebs_event *pe;
   2228	struct intel_pt *pt = ptq->pt;
   2229	int err = -EINVAL;
   2230	int hw_id;
   2231
   2232	if (!items->has_applicable_counters || !items->applicable_counters) {
   2233		if (!pt->single_pebs)
   2234			pr_err("PEBS-via-PT record with no applicable_counters\n");
   2235		return intel_pt_synth_single_pebs_sample(ptq);
   2236	}
   2237
   2238	for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) {
   2239		pe = &ptq->pebs[hw_id];
   2240		if (!pe->evsel) {
   2241			if (!pt->single_pebs)
   2242				pr_err("PEBS-via-PT record with no matching event, hw_id %d\n",
   2243				       hw_id);
   2244			return intel_pt_synth_single_pebs_sample(ptq);
   2245		}
   2246		err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id);
   2247		if (err)
   2248			return err;
   2249	}
   2250
   2251	return err;
   2252}
   2253
   2254static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq)
   2255{
   2256	struct intel_pt *pt = ptq->pt;
   2257	union perf_event *event = ptq->event_buf;
   2258	struct perf_sample sample = { .ip = 0, };
   2259	struct {
   2260		struct perf_synth_intel_evt cfe;
   2261		struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS];
   2262	} raw;
   2263	int i;
   2264
   2265	if (intel_pt_skip_event(pt))
   2266		return 0;
   2267
   2268	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   2269
   2270	sample.id        = ptq->pt->evt_id;
   2271	sample.stream_id = ptq->pt->evt_id;
   2272
   2273	raw.cfe.type     = ptq->state->cfe_type;
   2274	raw.cfe.reserved = 0;
   2275	raw.cfe.ip       = !!(ptq->state->flags & INTEL_PT_FUP_IP);
   2276	raw.cfe.vector   = ptq->state->cfe_vector;
   2277	raw.cfe.evd_cnt  = ptq->state->evd_cnt;
   2278
   2279	for (i = 0; i < ptq->state->evd_cnt; i++) {
   2280		raw.evd[i].et       = 0;
   2281		raw.evd[i].evd_type = ptq->state->evd[i].type;
   2282		raw.evd[i].payload  = ptq->state->evd[i].payload;
   2283	}
   2284
   2285	sample.raw_size = perf_synth__raw_size(raw) +
   2286			  ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd);
   2287	sample.raw_data = perf_synth__raw_data(&raw);
   2288
   2289	return intel_pt_deliver_synth_event(pt, event, &sample,
   2290					    pt->evt_sample_type);
   2291}
   2292
   2293static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq)
   2294{
   2295	struct intel_pt *pt = ptq->pt;
   2296	union perf_event *event = ptq->event_buf;
   2297	struct perf_sample sample = { .ip = 0, };
   2298	struct perf_synth_intel_iflag_chg raw;
   2299
   2300	if (intel_pt_skip_event(pt))
   2301		return 0;
   2302
   2303	intel_pt_prep_p_sample(pt, ptq, event, &sample);
   2304
   2305	sample.id = ptq->pt->iflag_chg_id;
   2306	sample.stream_id = ptq->pt->iflag_chg_id;
   2307
   2308	raw.flags = 0;
   2309	raw.iflag = ptq->state->to_iflag;
   2310
   2311	if (ptq->state->type & INTEL_PT_BRANCH) {
   2312		raw.via_branch = 1;
   2313		raw.branch_ip = ptq->state->to_ip;
   2314	} else {
   2315		sample.addr = 0;
   2316	}
   2317	sample.flags = ptq->flags;
   2318
   2319	sample.raw_size = perf_synth__raw_size(raw);
   2320	sample.raw_data = perf_synth__raw_data(&raw);
   2321
   2322	return intel_pt_deliver_synth_event(pt, event, &sample,
   2323					    pt->iflag_chg_sample_type);
   2324}
   2325
   2326static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
   2327				pid_t pid, pid_t tid, u64 ip, u64 timestamp)
   2328{
   2329	union perf_event event;
   2330	char msg[MAX_AUXTRACE_ERROR_MSG];
   2331	int err;
   2332
   2333	if (pt->synth_opts.error_minus_flags) {
   2334		if (code == INTEL_PT_ERR_OVR &&
   2335		    pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW)
   2336			return 0;
   2337		if (code == INTEL_PT_ERR_LOST &&
   2338		    pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST)
   2339			return 0;
   2340	}
   2341
   2342	intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
   2343
   2344	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
   2345			     code, cpu, pid, tid, ip, msg, timestamp);
   2346
   2347	err = perf_session__deliver_synth_event(pt->session, &event, NULL);
   2348	if (err)
   2349		pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
   2350		       err);
   2351
   2352	return err;
   2353}
   2354
   2355static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
   2356				 const struct intel_pt_state *state)
   2357{
   2358	struct intel_pt *pt = ptq->pt;
   2359	u64 tm = ptq->timestamp;
   2360
   2361	tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
   2362
   2363	return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid,
   2364				    ptq->tid, state->from_ip, tm);
   2365}
   2366
   2367static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
   2368{
   2369	struct auxtrace_queue *queue;
   2370	pid_t tid = ptq->next_tid;
   2371	int err;
   2372
   2373	if (tid == -1)
   2374		return 0;
   2375
   2376	intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
   2377
   2378	err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
   2379
   2380	queue = &pt->queues.queue_array[ptq->queue_nr];
   2381	intel_pt_set_pid_tid_cpu(pt, queue);
   2382
   2383	ptq->next_tid = -1;
   2384
   2385	return err;
   2386}
   2387
   2388static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
   2389{
   2390	struct intel_pt *pt = ptq->pt;
   2391
   2392	return ip == pt->switch_ip &&
   2393	       (ptq->flags & PERF_IP_FLAG_BRANCH) &&
   2394	       !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
   2395			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
   2396}
   2397
   2398#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
   2399			  INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
   2400
   2401static int intel_pt_sample(struct intel_pt_queue *ptq)
   2402{
   2403	const struct intel_pt_state *state = ptq->state;
   2404	struct intel_pt *pt = ptq->pt;
   2405	int err;
   2406
   2407	if (!ptq->have_sample)
   2408		return 0;
   2409
   2410	ptq->have_sample = false;
   2411
   2412	if (pt->synth_opts.approx_ipc) {
   2413		ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
   2414		ptq->ipc_cyc_cnt = ptq->state->cycles;
   2415		ptq->sample_ipc = true;
   2416	} else {
   2417		ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
   2418		ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
   2419		ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC;
   2420	}
   2421
   2422	/* Ensure guest code maps are set up */
   2423	if (symbol_conf.guest_code && (state->from_nr || state->to_nr))
   2424		intel_pt_get_guest(ptq);
   2425
   2426	/*
   2427	 * Do PEBS first to allow for the possibility that the PEBS timestamp
   2428	 * precedes the current timestamp.
   2429	 */
   2430	if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
   2431		err = intel_pt_synth_pebs_sample(ptq);
   2432		if (err)
   2433			return err;
   2434	}
   2435
   2436	if (pt->synth_opts.intr_events) {
   2437		if (state->type & INTEL_PT_EVT) {
   2438			err = intel_pt_synth_events_sample(ptq);
   2439			if (err)
   2440				return err;
   2441		}
   2442		if (state->type & INTEL_PT_IFLAG_CHG) {
   2443			err = intel_pt_synth_iflag_chg_sample(ptq);
   2444			if (err)
   2445				return err;
   2446		}
   2447	}
   2448
   2449	if (pt->sample_pwr_events) {
   2450		if (state->type & INTEL_PT_PSB_EVT) {
   2451			err = intel_pt_synth_psb_sample(ptq);
   2452			if (err)
   2453				return err;
   2454		}
   2455		if (ptq->state->cbr != ptq->cbr_seen) {
   2456			err = intel_pt_synth_cbr_sample(ptq);
   2457			if (err)
   2458				return err;
   2459		}
   2460		if (state->type & INTEL_PT_PWR_EVT) {
   2461			if (state->type & INTEL_PT_MWAIT_OP) {
   2462				err = intel_pt_synth_mwait_sample(ptq);
   2463				if (err)
   2464					return err;
   2465			}
   2466			if (state->type & INTEL_PT_PWR_ENTRY) {
   2467				err = intel_pt_synth_pwre_sample(ptq);
   2468				if (err)
   2469					return err;
   2470			}
   2471			if (state->type & INTEL_PT_EX_STOP) {
   2472				err = intel_pt_synth_exstop_sample(ptq);
   2473				if (err)
   2474					return err;
   2475			}
   2476			if (state->type & INTEL_PT_PWR_EXIT) {
   2477				err = intel_pt_synth_pwrx_sample(ptq);
   2478				if (err)
   2479					return err;
   2480			}
   2481		}
   2482	}
   2483
   2484	if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
   2485		err = intel_pt_synth_instruction_sample(ptq);
   2486		if (err)
   2487			return err;
   2488	}
   2489
   2490	if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
   2491		err = intel_pt_synth_transaction_sample(ptq);
   2492		if (err)
   2493			return err;
   2494	}
   2495
   2496	if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
   2497		err = intel_pt_synth_ptwrite_sample(ptq);
   2498		if (err)
   2499			return err;
   2500	}
   2501
   2502	if (!(state->type & INTEL_PT_BRANCH))
   2503		return 0;
   2504
   2505	if (pt->use_thread_stack) {
   2506		thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
   2507				    state->from_ip, state->to_ip, ptq->insn_len,
   2508				    state->trace_nr, pt->callstack,
   2509				    pt->br_stack_sz_plus,
   2510				    pt->mispred_all);
   2511	} else {
   2512		thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
   2513	}
   2514
   2515	if (pt->sample_branches) {
   2516		if (state->from_nr != state->to_nr &&
   2517		    state->from_ip && state->to_ip) {
   2518			struct intel_pt_state *st = (struct intel_pt_state *)state;
   2519			u64 to_ip = st->to_ip;
   2520			u64 from_ip = st->from_ip;
   2521
   2522			/*
   2523			 * perf cannot handle having different machines for ip
   2524			 * and addr, so create 2 branches.
   2525			 */
   2526			st->to_ip = 0;
   2527			err = intel_pt_synth_branch_sample(ptq);
   2528			if (err)
   2529				return err;
   2530			st->from_ip = 0;
   2531			st->to_ip = to_ip;
   2532			err = intel_pt_synth_branch_sample(ptq);
   2533			st->from_ip = from_ip;
   2534		} else {
   2535			err = intel_pt_synth_branch_sample(ptq);
   2536		}
   2537		if (err)
   2538			return err;
   2539	}
   2540
   2541	if (!ptq->sync_switch)
   2542		return 0;
   2543
   2544	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
   2545		switch (ptq->switch_state) {
   2546		case INTEL_PT_SS_NOT_TRACING:
   2547		case INTEL_PT_SS_UNKNOWN:
   2548		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
   2549			err = intel_pt_next_tid(pt, ptq);
   2550			if (err)
   2551				return err;
   2552			ptq->switch_state = INTEL_PT_SS_TRACING;
   2553			break;
   2554		default:
   2555			ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
   2556			return 1;
   2557		}
   2558	} else if (!state->to_ip) {
   2559		ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
   2560	} else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
   2561		ptq->switch_state = INTEL_PT_SS_UNKNOWN;
   2562	} else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
   2563		   state->to_ip == pt->ptss_ip &&
   2564		   (ptq->flags & PERF_IP_FLAG_CALL)) {
   2565		ptq->switch_state = INTEL_PT_SS_TRACING;
   2566	}
   2567
   2568	return 0;
   2569}
   2570
   2571static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
   2572{
   2573	struct machine *machine = pt->machine;
   2574	struct map *map;
   2575	struct symbol *sym, *start;
   2576	u64 ip, switch_ip = 0;
   2577	const char *ptss;
   2578
   2579	if (ptss_ip)
   2580		*ptss_ip = 0;
   2581
   2582	map = machine__kernel_map(machine);
   2583	if (!map)
   2584		return 0;
   2585
   2586	if (map__load(map))
   2587		return 0;
   2588
   2589	start = dso__first_symbol(map->dso);
   2590
   2591	for (sym = start; sym; sym = dso__next_symbol(sym)) {
   2592		if (sym->binding == STB_GLOBAL &&
   2593		    !strcmp(sym->name, "__switch_to")) {
   2594			ip = map->unmap_ip(map, sym->start);
   2595			if (ip >= map->start && ip < map->end) {
   2596				switch_ip = ip;
   2597				break;
   2598			}
   2599		}
   2600	}
   2601
   2602	if (!switch_ip || !ptss_ip)
   2603		return 0;
   2604
   2605	if (pt->have_sched_switch == 1)
   2606		ptss = "perf_trace_sched_switch";
   2607	else
   2608		ptss = "__perf_event_task_sched_out";
   2609
   2610	for (sym = start; sym; sym = dso__next_symbol(sym)) {
   2611		if (!strcmp(sym->name, ptss)) {
   2612			ip = map->unmap_ip(map, sym->start);
   2613			if (ip >= map->start && ip < map->end) {
   2614				*ptss_ip = ip;
   2615				break;
   2616			}
   2617		}
   2618	}
   2619
   2620	return switch_ip;
   2621}
   2622
   2623static void intel_pt_enable_sync_switch(struct intel_pt *pt)
   2624{
   2625	unsigned int i;
   2626
   2627	pt->sync_switch = true;
   2628
   2629	for (i = 0; i < pt->queues.nr_queues; i++) {
   2630		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
   2631		struct intel_pt_queue *ptq = queue->priv;
   2632
   2633		if (ptq)
   2634			ptq->sync_switch = true;
   2635	}
   2636}
   2637
   2638/*
   2639 * To filter against time ranges, it is only necessary to look at the next start
   2640 * or end time.
   2641 */
   2642static bool intel_pt_next_time(struct intel_pt_queue *ptq)
   2643{
   2644	struct intel_pt *pt = ptq->pt;
   2645
   2646	if (ptq->sel_start) {
   2647		/* Next time is an end time */
   2648		ptq->sel_start = false;
   2649		ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
   2650		return true;
   2651	} else if (ptq->sel_idx + 1 < pt->range_cnt) {
   2652		/* Next time is a start time */
   2653		ptq->sel_start = true;
   2654		ptq->sel_idx += 1;
   2655		ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
   2656		return true;
   2657	}
   2658
   2659	/* No next time */
   2660	return false;
   2661}
   2662
   2663static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
   2664{
   2665	int err;
   2666
   2667	while (1) {
   2668		if (ptq->sel_start) {
   2669			if (ptq->timestamp >= ptq->sel_timestamp) {
   2670				/* After start time, so consider next time */
   2671				intel_pt_next_time(ptq);
   2672				if (!ptq->sel_timestamp) {
   2673					/* No end time */
   2674					return 0;
   2675				}
   2676				/* Check against end time */
   2677				continue;
   2678			}
   2679			/* Before start time, so fast forward */
   2680			ptq->have_sample = false;
   2681			if (ptq->sel_timestamp > *ff_timestamp) {
   2682				if (ptq->sync_switch) {
   2683					intel_pt_next_tid(ptq->pt, ptq);
   2684					ptq->switch_state = INTEL_PT_SS_UNKNOWN;
   2685				}
   2686				*ff_timestamp = ptq->sel_timestamp;
   2687				err = intel_pt_fast_forward(ptq->decoder,
   2688							    ptq->sel_timestamp);
   2689				if (err)
   2690					return err;
   2691			}
   2692			return 0;
   2693		} else if (ptq->timestamp > ptq->sel_timestamp) {
   2694			/* After end time, so consider next time */
   2695			if (!intel_pt_next_time(ptq)) {
   2696				/* No next time range, so stop decoding */
   2697				ptq->have_sample = false;
   2698				ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
   2699				return 1;
   2700			}
   2701			/* Check against next start time */
   2702			continue;
   2703		} else {
   2704			/* Before end time */
   2705			return 0;
   2706		}
   2707	}
   2708}
   2709
   2710static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
   2711{
   2712	const struct intel_pt_state *state = ptq->state;
   2713	struct intel_pt *pt = ptq->pt;
   2714	u64 ff_timestamp = 0;
   2715	int err;
   2716
   2717	if (!pt->kernel_start) {
   2718		pt->kernel_start = machine__kernel_start(pt->machine);
   2719		if (pt->per_cpu_mmaps &&
   2720		    (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
   2721		    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
   2722		    !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) {
   2723			pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
   2724			if (pt->switch_ip) {
   2725				intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
   2726					     pt->switch_ip, pt->ptss_ip);
   2727				intel_pt_enable_sync_switch(pt);
   2728			}
   2729		}
   2730	}
   2731
   2732	intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
   2733		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
   2734	while (1) {
   2735		err = intel_pt_sample(ptq);
   2736		if (err)
   2737			return err;
   2738
   2739		state = intel_pt_decode(ptq->decoder);
   2740		if (state->err) {
   2741			if (state->err == INTEL_PT_ERR_NODATA)
   2742				return 1;
   2743			if (ptq->sync_switch &&
   2744			    state->from_ip >= pt->kernel_start) {
   2745				ptq->sync_switch = false;
   2746				intel_pt_next_tid(pt, ptq);
   2747			}
   2748			ptq->timestamp = state->est_timestamp;
   2749			if (pt->synth_opts.errors) {
   2750				err = intel_ptq_synth_error(ptq, state);
   2751				if (err)
   2752					return err;
   2753			}
   2754			continue;
   2755		}
   2756
   2757		ptq->state = state;
   2758		ptq->have_sample = true;
   2759		intel_pt_sample_flags(ptq);
   2760
   2761		/* Use estimated TSC upon return to user space */
   2762		if (pt->est_tsc &&
   2763		    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
   2764		    state->to_ip && state->to_ip < pt->kernel_start) {
   2765			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
   2766				     state->timestamp, state->est_timestamp);
   2767			ptq->timestamp = state->est_timestamp;
   2768		/* Use estimated TSC in unknown switch state */
   2769		} else if (ptq->sync_switch &&
   2770			   ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
   2771			   intel_pt_is_switch_ip(ptq, state->to_ip) &&
   2772			   ptq->next_tid == -1) {
   2773			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
   2774				     state->timestamp, state->est_timestamp);
   2775			ptq->timestamp = state->est_timestamp;
   2776		} else if (state->timestamp > ptq->timestamp) {
   2777			ptq->timestamp = state->timestamp;
   2778		}
   2779
   2780		if (ptq->sel_timestamp) {
   2781			err = intel_pt_time_filter(ptq, &ff_timestamp);
   2782			if (err)
   2783				return err;
   2784		}
   2785
   2786		if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
   2787			*timestamp = ptq->timestamp;
   2788			return 0;
   2789		}
   2790	}
   2791	return 0;
   2792}
   2793
   2794static inline int intel_pt_update_queues(struct intel_pt *pt)
   2795{
   2796	if (pt->queues.new_data) {
   2797		pt->queues.new_data = false;
   2798		return intel_pt_setup_queues(pt);
   2799	}
   2800	return 0;
   2801}
   2802
   2803static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
   2804{
   2805	unsigned int queue_nr;
   2806	u64 ts;
   2807	int ret;
   2808
   2809	while (1) {
   2810		struct auxtrace_queue *queue;
   2811		struct intel_pt_queue *ptq;
   2812
   2813		if (!pt->heap.heap_cnt)
   2814			return 0;
   2815
   2816		if (pt->heap.heap_array[0].ordinal >= timestamp)
   2817			return 0;
   2818
   2819		queue_nr = pt->heap.heap_array[0].queue_nr;
   2820		queue = &pt->queues.queue_array[queue_nr];
   2821		ptq = queue->priv;
   2822
   2823		intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
   2824			     queue_nr, pt->heap.heap_array[0].ordinal,
   2825			     timestamp);
   2826
   2827		auxtrace_heap__pop(&pt->heap);
   2828
   2829		if (pt->heap.heap_cnt) {
   2830			ts = pt->heap.heap_array[0].ordinal + 1;
   2831			if (ts > timestamp)
   2832				ts = timestamp;
   2833		} else {
   2834			ts = timestamp;
   2835		}
   2836
   2837		intel_pt_set_pid_tid_cpu(pt, queue);
   2838
   2839		ret = intel_pt_run_decoder(ptq, &ts);
   2840
   2841		if (ret < 0) {
   2842			auxtrace_heap__add(&pt->heap, queue_nr, ts);
   2843			return ret;
   2844		}
   2845
   2846		if (!ret) {
   2847			ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
   2848			if (ret < 0)
   2849				return ret;
   2850		} else {
   2851			ptq->on_heap = false;
   2852		}
   2853	}
   2854
   2855	return 0;
   2856}
   2857
   2858static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
   2859					    u64 time_)
   2860{
   2861	struct auxtrace_queues *queues = &pt->queues;
   2862	unsigned int i;
   2863	u64 ts = 0;
   2864
   2865	for (i = 0; i < queues->nr_queues; i++) {
   2866		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
   2867		struct intel_pt_queue *ptq = queue->priv;
   2868
   2869		if (ptq && (tid == -1 || ptq->tid == tid)) {
   2870			ptq->time = time_;
   2871			intel_pt_set_pid_tid_cpu(pt, queue);
   2872			intel_pt_run_decoder(ptq, &ts);
   2873		}
   2874	}
   2875	return 0;
   2876}
   2877
   2878static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq,
   2879					    struct auxtrace_queue *queue,
   2880					    struct perf_sample *sample)
   2881{
   2882	struct machine *m = ptq->pt->machine;
   2883
   2884	ptq->pid = sample->pid;
   2885	ptq->tid = sample->tid;
   2886	ptq->cpu = queue->cpu;
   2887
   2888	intel_pt_log("queue %u cpu %d pid %d tid %d\n",
   2889		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
   2890
   2891	thread__zput(ptq->thread);
   2892
   2893	if (ptq->tid == -1)
   2894		return;
   2895
   2896	if (ptq->pid == -1) {
   2897		ptq->thread = machine__find_thread(m, -1, ptq->tid);
   2898		if (ptq->thread)
   2899			ptq->pid = ptq->thread->pid_;
   2900		return;
   2901	}
   2902
   2903	ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid);
   2904}
   2905
   2906static int intel_pt_process_timeless_sample(struct intel_pt *pt,
   2907					    struct perf_sample *sample)
   2908{
   2909	struct auxtrace_queue *queue;
   2910	struct intel_pt_queue *ptq;
   2911	u64 ts = 0;
   2912
   2913	queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
   2914	if (!queue)
   2915		return -EINVAL;
   2916
   2917	ptq = queue->priv;
   2918	if (!ptq)
   2919		return 0;
   2920
   2921	ptq->stop = false;
   2922	ptq->time = sample->time;
   2923	intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample);
   2924	intel_pt_run_decoder(ptq, &ts);
   2925	return 0;
   2926}
   2927
   2928static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
   2929{
   2930	return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
   2931				    sample->pid, sample->tid, 0, sample->time);
   2932}
   2933
   2934static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
   2935{
   2936	unsigned i, j;
   2937
   2938	if (cpu < 0 || !pt->queues.nr_queues)
   2939		return NULL;
   2940
   2941	if ((unsigned)cpu >= pt->queues.nr_queues)
   2942		i = pt->queues.nr_queues - 1;
   2943	else
   2944		i = cpu;
   2945
   2946	if (pt->queues.queue_array[i].cpu == cpu)
   2947		return pt->queues.queue_array[i].priv;
   2948
   2949	for (j = 0; i > 0; j++) {
   2950		if (pt->queues.queue_array[--i].cpu == cpu)
   2951			return pt->queues.queue_array[i].priv;
   2952	}
   2953
   2954	for (; j < pt->queues.nr_queues; j++) {
   2955		if (pt->queues.queue_array[j].cpu == cpu)
   2956			return pt->queues.queue_array[j].priv;
   2957	}
   2958
   2959	return NULL;
   2960}
   2961
   2962static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
   2963				u64 timestamp)
   2964{
   2965	struct intel_pt_queue *ptq;
   2966	int err;
   2967
   2968	if (!pt->sync_switch)
   2969		return 1;
   2970
   2971	ptq = intel_pt_cpu_to_ptq(pt, cpu);
   2972	if (!ptq || !ptq->sync_switch)
   2973		return 1;
   2974
   2975	switch (ptq->switch_state) {
   2976	case INTEL_PT_SS_NOT_TRACING:
   2977		break;
   2978	case INTEL_PT_SS_UNKNOWN:
   2979	case INTEL_PT_SS_TRACING:
   2980		ptq->next_tid = tid;
   2981		ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
   2982		return 0;
   2983	case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
   2984		if (!ptq->on_heap) {
   2985			ptq->timestamp = perf_time_to_tsc(timestamp,
   2986							  &pt->tc);
   2987			err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
   2988						 ptq->timestamp);
   2989			if (err)
   2990				return err;
   2991			ptq->on_heap = true;
   2992		}
   2993		ptq->switch_state = INTEL_PT_SS_TRACING;
   2994		break;
   2995	case INTEL_PT_SS_EXPECTING_SWITCH_IP:
   2996		intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
   2997		break;
   2998	default:
   2999		break;
   3000	}
   3001
   3002	ptq->next_tid = -1;
   3003
   3004	return 1;
   3005}
   3006
   3007static int intel_pt_process_switch(struct intel_pt *pt,
   3008				   struct perf_sample *sample)
   3009{
   3010	pid_t tid;
   3011	int cpu, ret;
   3012	struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id);
   3013
   3014	if (evsel != pt->switch_evsel)
   3015		return 0;
   3016
   3017	tid = evsel__intval(evsel, sample, "next_pid");
   3018	cpu = sample->cpu;
   3019
   3020	intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
   3021		     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
   3022		     &pt->tc));
   3023
   3024	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
   3025	if (ret <= 0)
   3026		return ret;
   3027
   3028	return machine__set_current_tid(pt->machine, cpu, -1, tid);
   3029}
   3030
   3031static int intel_pt_context_switch_in(struct intel_pt *pt,
   3032				      struct perf_sample *sample)
   3033{
   3034	pid_t pid = sample->pid;
   3035	pid_t tid = sample->tid;
   3036	int cpu = sample->cpu;
   3037
   3038	if (pt->sync_switch) {
   3039		struct intel_pt_queue *ptq;
   3040
   3041		ptq = intel_pt_cpu_to_ptq(pt, cpu);
   3042		if (ptq && ptq->sync_switch) {
   3043			ptq->next_tid = -1;
   3044			switch (ptq->switch_state) {
   3045			case INTEL_PT_SS_NOT_TRACING:
   3046			case INTEL_PT_SS_UNKNOWN:
   3047			case INTEL_PT_SS_TRACING:
   3048				break;
   3049			case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
   3050			case INTEL_PT_SS_EXPECTING_SWITCH_IP:
   3051				ptq->switch_state = INTEL_PT_SS_TRACING;
   3052				break;
   3053			default:
   3054				break;
   3055			}
   3056		}
   3057	}
   3058
   3059	/*
   3060	 * If the current tid has not been updated yet, ensure it is now that
   3061	 * a "switch in" event has occurred.
   3062	 */
   3063	if (machine__get_current_tid(pt->machine, cpu) == tid)
   3064		return 0;
   3065
   3066	return machine__set_current_tid(pt->machine, cpu, pid, tid);
   3067}
   3068
   3069static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
   3070				   struct perf_sample *sample)
   3071{
   3072	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
   3073	pid_t pid, tid;
   3074	int cpu, ret;
   3075
   3076	cpu = sample->cpu;
   3077
   3078	if (pt->have_sched_switch == 3) {
   3079		if (!out)
   3080			return intel_pt_context_switch_in(pt, sample);
   3081		if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
   3082			pr_err("Expecting CPU-wide context switch event\n");
   3083			return -EINVAL;
   3084		}
   3085		pid = event->context_switch.next_prev_pid;
   3086		tid = event->context_switch.next_prev_tid;
   3087	} else {
   3088		if (out)
   3089			return 0;
   3090		pid = sample->pid;
   3091		tid = sample->tid;
   3092	}
   3093
   3094	if (tid == -1)
   3095		intel_pt_log("context_switch event has no tid\n");
   3096
   3097	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
   3098	if (ret <= 0)
   3099		return ret;
   3100
   3101	return machine__set_current_tid(pt->machine, cpu, pid, tid);
   3102}
   3103
   3104static int intel_pt_process_itrace_start(struct intel_pt *pt,
   3105					 union perf_event *event,
   3106					 struct perf_sample *sample)
   3107{
   3108	if (!pt->per_cpu_mmaps)
   3109		return 0;
   3110
   3111	intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
   3112		     sample->cpu, event->itrace_start.pid,
   3113		     event->itrace_start.tid, sample->time,
   3114		     perf_time_to_tsc(sample->time, &pt->tc));
   3115
   3116	return machine__set_current_tid(pt->machine, sample->cpu,
   3117					event->itrace_start.pid,
   3118					event->itrace_start.tid);
   3119}
   3120
   3121static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
   3122					     union perf_event *event,
   3123					     struct perf_sample *sample)
   3124{
   3125	u64 hw_id = event->aux_output_hw_id.hw_id;
   3126	struct auxtrace_queue *queue;
   3127	struct intel_pt_queue *ptq;
   3128	struct evsel *evsel;
   3129
   3130	queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
   3131	evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id);
   3132	if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) {
   3133		pr_err("Bad AUX output hardware ID\n");
   3134		return -EINVAL;
   3135	}
   3136
   3137	ptq = queue->priv;
   3138
   3139	ptq->pebs[hw_id].evsel = evsel;
   3140	ptq->pebs[hw_id].id = sample->id;
   3141
   3142	return 0;
   3143}
   3144
   3145static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
   3146			     struct addr_location *al)
   3147{
   3148	if (!al->map || addr < al->map->start || addr >= al->map->end) {
   3149		if (!thread__find_map(thread, cpumode, addr, al))
   3150			return -1;
   3151	}
   3152
   3153	return 0;
   3154}
   3155
   3156/* Invalidate all instruction cache entries that overlap the text poke */
   3157static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
   3158{
   3159	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
   3160	u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
   3161	/* Assume text poke begins in a basic block no more than 4096 bytes */
   3162	int cnt = 4096 + event->text_poke.new_len;
   3163	struct thread *thread = pt->unknown_thread;
   3164	struct addr_location al = { .map = NULL };
   3165	struct machine *machine = pt->machine;
   3166	struct intel_pt_cache_entry *e;
   3167	u64 offset;
   3168
   3169	if (!event->text_poke.new_len)
   3170		return 0;
   3171
   3172	for (; cnt; cnt--, addr--) {
   3173		if (intel_pt_find_map(thread, cpumode, addr, &al)) {
   3174			if (addr < event->text_poke.addr)
   3175				return 0;
   3176			continue;
   3177		}
   3178
   3179		if (!al.map->dso || !al.map->dso->auxtrace_cache)
   3180			continue;
   3181
   3182		offset = al.map->map_ip(al.map, addr);
   3183
   3184		e = intel_pt_cache_lookup(al.map->dso, machine, offset);
   3185		if (!e)
   3186			continue;
   3187
   3188		if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
   3189			/*
   3190			 * No overlap. Working backwards there cannot be another
   3191			 * basic block that overlaps the text poke if there is a
   3192			 * branch instruction before the text poke address.
   3193			 */
   3194			if (e->branch != INTEL_PT_BR_NO_BRANCH)
   3195				return 0;
   3196		} else {
   3197			intel_pt_cache_invalidate(al.map->dso, machine, offset);
   3198			intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
   3199				     al.map->dso->long_name, addr);
   3200		}
   3201	}
   3202
   3203	return 0;
   3204}
   3205
   3206static int intel_pt_process_event(struct perf_session *session,
   3207				  union perf_event *event,
   3208				  struct perf_sample *sample,
   3209				  struct perf_tool *tool)
   3210{
   3211	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
   3212					   auxtrace);
   3213	u64 timestamp;
   3214	int err = 0;
   3215
   3216	if (dump_trace)
   3217		return 0;
   3218
   3219	if (!tool->ordered_events) {
   3220		pr_err("Intel Processor Trace requires ordered events\n");
   3221		return -EINVAL;
   3222	}
   3223
   3224	if (sample->time && sample->time != (u64)-1)
   3225		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
   3226	else
   3227		timestamp = 0;
   3228
   3229	if (timestamp || pt->timeless_decoding) {
   3230		err = intel_pt_update_queues(pt);
   3231		if (err)
   3232			return err;
   3233	}
   3234
   3235	if (pt->timeless_decoding) {
   3236		if (pt->sampling_mode) {
   3237			if (sample->aux_sample.size)
   3238				err = intel_pt_process_timeless_sample(pt,
   3239								       sample);
   3240		} else if (event->header.type == PERF_RECORD_EXIT) {
   3241			err = intel_pt_process_timeless_queues(pt,
   3242							       event->fork.tid,
   3243							       sample->time);
   3244		}
   3245	} else if (timestamp) {
   3246		if (!pt->first_timestamp)
   3247			intel_pt_first_timestamp(pt, timestamp);
   3248		err = intel_pt_process_queues(pt, timestamp);
   3249	}
   3250	if (err)
   3251		return err;
   3252
   3253	if (event->header.type == PERF_RECORD_SAMPLE) {
   3254		if (pt->synth_opts.add_callchain && !sample->callchain)
   3255			intel_pt_add_callchain(pt, sample);
   3256		if (pt->synth_opts.add_last_branch && !sample->branch_stack)
   3257			intel_pt_add_br_stack(pt, sample);
   3258	}
   3259
   3260	if (event->header.type == PERF_RECORD_AUX &&
   3261	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
   3262	    pt->synth_opts.errors) {
   3263		err = intel_pt_lost(pt, sample);
   3264		if (err)
   3265			return err;
   3266	}
   3267
   3268	if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
   3269		err = intel_pt_process_switch(pt, sample);
   3270	else if (event->header.type == PERF_RECORD_ITRACE_START)
   3271		err = intel_pt_process_itrace_start(pt, event, sample);
   3272	else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID)
   3273		err = intel_pt_process_aux_output_hw_id(pt, event, sample);
   3274	else if (event->header.type == PERF_RECORD_SWITCH ||
   3275		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
   3276		err = intel_pt_context_switch(pt, event, sample);
   3277
   3278	if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
   3279		err = intel_pt_text_poke(pt, event);
   3280
   3281	if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) {
   3282		intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
   3283			     event->header.type, sample->cpu, sample->time, timestamp);
   3284		intel_pt_log_event(event);
   3285	}
   3286
   3287	return err;
   3288}
   3289
   3290static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
   3291{
   3292	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
   3293					   auxtrace);
   3294	int ret;
   3295
   3296	if (dump_trace)
   3297		return 0;
   3298
   3299	if (!tool->ordered_events)
   3300		return -EINVAL;
   3301
   3302	ret = intel_pt_update_queues(pt);
   3303	if (ret < 0)
   3304		return ret;
   3305
   3306	if (pt->timeless_decoding)
   3307		return intel_pt_process_timeless_queues(pt, -1,
   3308							MAX_TIMESTAMP - 1);
   3309
   3310	return intel_pt_process_queues(pt, MAX_TIMESTAMP);
   3311}
   3312
   3313static void intel_pt_free_events(struct perf_session *session)
   3314{
   3315	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
   3316					   auxtrace);
   3317	struct auxtrace_queues *queues = &pt->queues;
   3318	unsigned int i;
   3319
   3320	for (i = 0; i < queues->nr_queues; i++) {
   3321		intel_pt_free_queue(queues->queue_array[i].priv);
   3322		queues->queue_array[i].priv = NULL;
   3323	}
   3324	intel_pt_log_disable();
   3325	auxtrace_queues__free(queues);
   3326}
   3327
   3328static void intel_pt_free(struct perf_session *session)
   3329{
   3330	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
   3331					   auxtrace);
   3332
   3333	auxtrace_heap__free(&pt->heap);
   3334	intel_pt_free_events(session);
   3335	session->auxtrace = NULL;
   3336	intel_pt_free_vmcs_info(pt);
   3337	thread__put(pt->unknown_thread);
   3338	addr_filters__exit(&pt->filts);
   3339	zfree(&pt->chain);
   3340	zfree(&pt->filter);
   3341	zfree(&pt->time_ranges);
   3342	free(pt);
   3343}
   3344
   3345static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
   3346				       struct evsel *evsel)
   3347{
   3348	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
   3349					   auxtrace);
   3350
   3351	return evsel->core.attr.type == pt->pmu_type;
   3352}
   3353
   3354static int intel_pt_process_auxtrace_event(struct perf_session *session,
   3355					   union perf_event *event,
   3356					   struct perf_tool *tool __maybe_unused)
   3357{
   3358	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
   3359					   auxtrace);
   3360
   3361	if (!pt->data_queued) {
   3362		struct auxtrace_buffer *buffer;
   3363		off_t data_offset;
   3364		int fd = perf_data__fd(session->data);
   3365		int err;
   3366
   3367		if (perf_data__is_pipe(session->data)) {
   3368			data_offset = 0;
   3369		} else {
   3370			data_offset = lseek(fd, 0, SEEK_CUR);
   3371			if (data_offset == -1)
   3372				return -errno;
   3373		}
   3374
   3375		err = auxtrace_queues__add_event(&pt->queues, session, event,
   3376						 data_offset, &buffer);
   3377		if (err)
   3378			return err;
   3379
   3380		/* Dump here now we have copied a piped trace out of the pipe */
   3381		if (dump_trace) {
   3382			if (auxtrace_buffer__get_data(buffer, fd)) {
   3383				intel_pt_dump_event(pt, buffer->data,
   3384						    buffer->size);
   3385				auxtrace_buffer__put_data(buffer);
   3386			}
   3387		}
   3388	}
   3389
   3390	return 0;
   3391}
   3392
   3393static int intel_pt_queue_data(struct perf_session *session,
   3394			       struct perf_sample *sample,
   3395			       union perf_event *event, u64 data_offset)
   3396{
   3397	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
   3398					   auxtrace);
   3399	u64 timestamp;
   3400
   3401	if (event) {
   3402		return auxtrace_queues__add_event(&pt->queues, session, event,
   3403						  data_offset, NULL);
   3404	}
   3405
   3406	if (sample->time && sample->time != (u64)-1)
   3407		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
   3408	else
   3409		timestamp = 0;
   3410
   3411	return auxtrace_queues__add_sample(&pt->queues, session, sample,
   3412					   data_offset, timestamp);
   3413}
   3414
   3415struct intel_pt_synth {
   3416	struct perf_tool dummy_tool;
   3417	struct perf_session *session;
   3418};
   3419
   3420static int intel_pt_event_synth(struct perf_tool *tool,
   3421				union perf_event *event,
   3422				struct perf_sample *sample __maybe_unused,
   3423				struct machine *machine __maybe_unused)
   3424{
   3425	struct intel_pt_synth *intel_pt_synth =
   3426			container_of(tool, struct intel_pt_synth, dummy_tool);
   3427
   3428	return perf_session__deliver_synth_event(intel_pt_synth->session, event,
   3429						 NULL);
   3430}
   3431
   3432static int intel_pt_synth_event(struct perf_session *session, const char *name,
   3433				struct perf_event_attr *attr, u64 id)
   3434{
   3435	struct intel_pt_synth intel_pt_synth;
   3436	int err;
   3437
   3438	pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
   3439		 name, id, (u64)attr->sample_type);
   3440
   3441	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
   3442	intel_pt_synth.session = session;
   3443
   3444	err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
   3445					  &id, intel_pt_event_synth);
   3446	if (err)
   3447		pr_err("%s: failed to synthesize '%s' event type\n",
   3448		       __func__, name);
   3449
   3450	return err;
   3451}
   3452
   3453static void intel_pt_set_event_name(struct evlist *evlist, u64 id,
   3454				    const char *name)
   3455{
   3456	struct evsel *evsel;
   3457
   3458	evlist__for_each_entry(evlist, evsel) {
   3459		if (evsel->core.id && evsel->core.id[0] == id) {
   3460			if (evsel->name)
   3461				zfree(&evsel->name);
   3462			evsel->name = strdup(name);
   3463			break;
   3464		}
   3465	}
   3466}
   3467
   3468static struct evsel *intel_pt_evsel(struct intel_pt *pt,
   3469					 struct evlist *evlist)
   3470{
   3471	struct evsel *evsel;
   3472
   3473	evlist__for_each_entry(evlist, evsel) {
   3474		if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids)
   3475			return evsel;
   3476	}
   3477
   3478	return NULL;
   3479}
   3480
   3481static int intel_pt_synth_events(struct intel_pt *pt,
   3482				 struct perf_session *session)
   3483{
   3484	struct evlist *evlist = session->evlist;
   3485	struct evsel *evsel = intel_pt_evsel(pt, evlist);
   3486	struct perf_event_attr attr;
   3487	u64 id;
   3488	int err;
   3489
   3490	if (!evsel) {
   3491		pr_debug("There are no selected events with Intel Processor Trace data\n");
   3492		return 0;
   3493	}
   3494
   3495	memset(&attr, 0, sizeof(struct perf_event_attr));
   3496	attr.size = sizeof(struct perf_event_attr);
   3497	attr.type = PERF_TYPE_HARDWARE;
   3498	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
   3499	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
   3500			    PERF_SAMPLE_PERIOD;
   3501	if (pt->timeless_decoding)
   3502		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
   3503	else
   3504		attr.sample_type |= PERF_SAMPLE_TIME;
   3505	if (!pt->per_cpu_mmaps)
   3506		attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
   3507	attr.exclude_user = evsel->core.attr.exclude_user;
   3508	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
   3509	attr.exclude_hv = evsel->core.attr.exclude_hv;
   3510	attr.exclude_host = evsel->core.attr.exclude_host;
   3511	attr.exclude_guest = evsel->core.attr.exclude_guest;
   3512	attr.sample_id_all = evsel->core.attr.sample_id_all;
   3513	attr.read_format = evsel->core.attr.read_format;
   3514
   3515	id = evsel->core.id[0] + 1000000000;
   3516	if (!id)
   3517		id = 1;
   3518
   3519	if (pt->synth_opts.branches) {
   3520		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
   3521		attr.sample_period = 1;
   3522		attr.sample_type |= PERF_SAMPLE_ADDR;
   3523		err = intel_pt_synth_event(session, "branches", &attr, id);
   3524		if (err)
   3525			return err;
   3526		pt->sample_branches = true;
   3527		pt->branches_sample_type = attr.sample_type;
   3528		pt->branches_id = id;
   3529		id += 1;
   3530		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
   3531	}
   3532
   3533	if (pt->synth_opts.callchain)
   3534		attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
   3535	if (pt->synth_opts.last_branch) {
   3536		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
   3537		/*
   3538		 * We don't use the hardware index, but the sample generation
   3539		 * code uses the new format branch_stack with this field,
   3540		 * so the event attributes must indicate that it's present.
   3541		 */
   3542		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
   3543	}
   3544
   3545	if (pt->synth_opts.instructions) {
   3546		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
   3547		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
   3548			attr.sample_period =
   3549				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
   3550		else
   3551			attr.sample_period = pt->synth_opts.period;
   3552		err = intel_pt_synth_event(session, "instructions", &attr, id);
   3553		if (err)
   3554			return err;
   3555		pt->sample_instructions = true;
   3556		pt->instructions_sample_type = attr.sample_type;
   3557		pt->instructions_id = id;
   3558		id += 1;
   3559	}
   3560
   3561	attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
   3562	attr.sample_period = 1;
   3563
   3564	if (pt->synth_opts.transactions) {
   3565		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
   3566		err = intel_pt_synth_event(session, "transactions", &attr, id);
   3567		if (err)
   3568			return err;
   3569		pt->sample_transactions = true;
   3570		pt->transactions_sample_type = attr.sample_type;
   3571		pt->transactions_id = id;
   3572		intel_pt_set_event_name(evlist, id, "transactions");
   3573		id += 1;
   3574	}
   3575
   3576	attr.type = PERF_TYPE_SYNTH;
   3577	attr.sample_type |= PERF_SAMPLE_RAW;
   3578
   3579	if (pt->synth_opts.ptwrites) {
   3580		attr.config = PERF_SYNTH_INTEL_PTWRITE;
   3581		err = intel_pt_synth_event(session, "ptwrite", &attr, id);
   3582		if (err)
   3583			return err;
   3584		pt->sample_ptwrites = true;
   3585		pt->ptwrites_sample_type = attr.sample_type;
   3586		pt->ptwrites_id = id;
   3587		intel_pt_set_event_name(evlist, id, "ptwrite");
   3588		id += 1;
   3589	}
   3590
   3591	if (pt->synth_opts.pwr_events) {
   3592		pt->sample_pwr_events = true;
   3593		pt->pwr_events_sample_type = attr.sample_type;
   3594
   3595		attr.config = PERF_SYNTH_INTEL_CBR;
   3596		err = intel_pt_synth_event(session, "cbr", &attr, id);
   3597		if (err)
   3598			return err;
   3599		pt->cbr_id = id;
   3600		intel_pt_set_event_name(evlist, id, "cbr");
   3601		id += 1;
   3602
   3603		attr.config = PERF_SYNTH_INTEL_PSB;
   3604		err = intel_pt_synth_event(session, "psb", &attr, id);
   3605		if (err)
   3606			return err;
   3607		pt->psb_id = id;
   3608		intel_pt_set_event_name(evlist, id, "psb");
   3609		id += 1;
   3610	}
   3611
   3612	if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) {
   3613		attr.config = PERF_SYNTH_INTEL_MWAIT;
   3614		err = intel_pt_synth_event(session, "mwait", &attr, id);
   3615		if (err)
   3616			return err;
   3617		pt->mwait_id = id;
   3618		intel_pt_set_event_name(evlist, id, "mwait");
   3619		id += 1;
   3620
   3621		attr.config = PERF_SYNTH_INTEL_PWRE;
   3622		err = intel_pt_synth_event(session, "pwre", &attr, id);
   3623		if (err)
   3624			return err;
   3625		pt->pwre_id = id;
   3626		intel_pt_set_event_name(evlist, id, "pwre");
   3627		id += 1;
   3628
   3629		attr.config = PERF_SYNTH_INTEL_EXSTOP;
   3630		err = intel_pt_synth_event(session, "exstop", &attr, id);
   3631		if (err)
   3632			return err;
   3633		pt->exstop_id = id;
   3634		intel_pt_set_event_name(evlist, id, "exstop");
   3635		id += 1;
   3636
   3637		attr.config = PERF_SYNTH_INTEL_PWRX;
   3638		err = intel_pt_synth_event(session, "pwrx", &attr, id);
   3639		if (err)
   3640			return err;
   3641		pt->pwrx_id = id;
   3642		intel_pt_set_event_name(evlist, id, "pwrx");
   3643		id += 1;
   3644	}
   3645
   3646	if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) {
   3647		attr.config = PERF_SYNTH_INTEL_EVT;
   3648		err = intel_pt_synth_event(session, "evt", &attr, id);
   3649		if (err)
   3650			return err;
   3651		pt->evt_sample_type = attr.sample_type;
   3652		pt->evt_id = id;
   3653		intel_pt_set_event_name(evlist, id, "evt");
   3654		id += 1;
   3655	}
   3656
   3657	if (pt->synth_opts.intr_events && pt->cap_event_trace) {
   3658		attr.config = PERF_SYNTH_INTEL_IFLAG_CHG;
   3659		err = intel_pt_synth_event(session, "iflag", &attr, id);
   3660		if (err)
   3661			return err;
   3662		pt->iflag_chg_sample_type = attr.sample_type;
   3663		pt->iflag_chg_id = id;
   3664		intel_pt_set_event_name(evlist, id, "iflag");
   3665		id += 1;
   3666	}
   3667
   3668	return 0;
   3669}
   3670
   3671static void intel_pt_setup_pebs_events(struct intel_pt *pt)
   3672{
   3673	struct evsel *evsel;
   3674
   3675	if (!pt->synth_opts.other_events)
   3676		return;
   3677
   3678	evlist__for_each_entry(pt->session->evlist, evsel) {
   3679		if (evsel->core.attr.aux_output && evsel->core.id) {
   3680			if (pt->single_pebs) {
   3681				pt->single_pebs = false;
   3682				return;
   3683			}
   3684			pt->single_pebs = true;
   3685			pt->sample_pebs = true;
   3686			pt->pebs_evsel = evsel;
   3687		}
   3688	}
   3689}
   3690
   3691static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist)
   3692{
   3693	struct evsel *evsel;
   3694
   3695	evlist__for_each_entry_reverse(evlist, evsel) {
   3696		const char *name = evsel__name(evsel);
   3697
   3698		if (!strcmp(name, "sched:sched_switch"))
   3699			return evsel;
   3700	}
   3701
   3702	return NULL;
   3703}
   3704
   3705static bool intel_pt_find_switch(struct evlist *evlist)
   3706{
   3707	struct evsel *evsel;
   3708
   3709	evlist__for_each_entry(evlist, evsel) {
   3710		if (evsel->core.attr.context_switch)
   3711			return true;
   3712	}
   3713
   3714	return false;
   3715}
   3716
   3717static int intel_pt_perf_config(const char *var, const char *value, void *data)
   3718{
   3719	struct intel_pt *pt = data;
   3720
   3721	if (!strcmp(var, "intel-pt.mispred-all"))
   3722		pt->mispred_all = perf_config_bool(var, value);
   3723
   3724	if (!strcmp(var, "intel-pt.max-loops"))
   3725		perf_config_int(&pt->max_loops, var, value);
   3726
   3727	return 0;
   3728}
   3729
   3730/* Find least TSC which converts to ns or later */
   3731static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
   3732{
   3733	u64 tsc, tm;
   3734
   3735	tsc = perf_time_to_tsc(ns, &pt->tc);
   3736
   3737	while (1) {
   3738		tm = tsc_to_perf_time(tsc, &pt->tc);
   3739		if (tm < ns)
   3740			break;
   3741		tsc -= 1;
   3742	}
   3743
   3744	while (tm < ns)
   3745		tm = tsc_to_perf_time(++tsc, &pt->tc);
   3746
   3747	return tsc;
   3748}
   3749
   3750/* Find greatest TSC which converts to ns or earlier */
   3751static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
   3752{
   3753	u64 tsc, tm;
   3754
   3755	tsc = perf_time_to_tsc(ns, &pt->tc);
   3756
   3757	while (1) {
   3758		tm = tsc_to_perf_time(tsc, &pt->tc);
   3759		if (tm > ns)
   3760			break;
   3761		tsc += 1;
   3762	}
   3763
   3764	while (tm > ns)
   3765		tm = tsc_to_perf_time(--tsc, &pt->tc);
   3766
   3767	return tsc;
   3768}
   3769
   3770static int intel_pt_setup_time_ranges(struct intel_pt *pt,
   3771				      struct itrace_synth_opts *opts)
   3772{
   3773	struct perf_time_interval *p = opts->ptime_range;
   3774	int n = opts->range_num;
   3775	int i;
   3776
   3777	if (!n || !p || pt->timeless_decoding)
   3778		return 0;
   3779
   3780	pt->time_ranges = calloc(n, sizeof(struct range));
   3781	if (!pt->time_ranges)
   3782		return -ENOMEM;
   3783
   3784	pt->range_cnt = n;
   3785
   3786	intel_pt_log("%s: %u range(s)\n", __func__, n);
   3787
   3788	for (i = 0; i < n; i++) {
   3789		struct range *r = &pt->time_ranges[i];
   3790		u64 ts = p[i].start;
   3791		u64 te = p[i].end;
   3792
   3793		/*
   3794		 * Take care to ensure the TSC range matches the perf-time range
   3795		 * when converted back to perf-time.
   3796		 */
   3797		r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
   3798		r->end   = te ? intel_pt_tsc_end(te, pt) : 0;
   3799
   3800		intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
   3801			     i, ts, te);
   3802		intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
   3803			     i, r->start, r->end);
   3804	}
   3805
   3806	return 0;
   3807}
   3808
   3809static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args)
   3810{
   3811	struct intel_pt_vmcs_info *vmcs_info;
   3812	u64 tsc_offset, vmcs;
   3813	char *p = *args;
   3814
   3815	errno = 0;
   3816
   3817	p = skip_spaces(p);
   3818	if (!*p)
   3819		return 1;
   3820
   3821	tsc_offset = strtoull(p, &p, 0);
   3822	if (errno)
   3823		return -errno;
   3824	p = skip_spaces(p);
   3825	if (*p != ':') {
   3826		pt->dflt_tsc_offset = tsc_offset;
   3827		*args = p;
   3828		return 0;
   3829	}
   3830	p += 1;
   3831	while (1) {
   3832		vmcs = strtoull(p, &p, 0);
   3833		if (errno)
   3834			return -errno;
   3835		if (!vmcs)
   3836			return -EINVAL;
   3837		vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset);
   3838		if (!vmcs_info)
   3839			return -ENOMEM;
   3840		p = skip_spaces(p);
   3841		if (*p != ',')
   3842			break;
   3843		p += 1;
   3844	}
   3845	*args = p;
   3846	return 0;
   3847}
   3848
   3849static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt)
   3850{
   3851	char *args = pt->synth_opts.vm_tm_corr_args;
   3852	int ret;
   3853
   3854	if (!args)
   3855		return 0;
   3856
   3857	do {
   3858		ret = intel_pt_parse_vm_tm_corr_arg(pt, &args);
   3859	} while (!ret);
   3860
   3861	if (ret < 0) {
   3862		pr_err("Failed to parse VM Time Correlation options\n");
   3863		return ret;
   3864	}
   3865
   3866	return 0;
   3867}
   3868
   3869static const char * const intel_pt_info_fmts[] = {
   3870	[INTEL_PT_PMU_TYPE]		= "  PMU Type            %"PRId64"\n",
   3871	[INTEL_PT_TIME_SHIFT]		= "  Time Shift          %"PRIu64"\n",
   3872	[INTEL_PT_TIME_MULT]		= "  Time Muliplier      %"PRIu64"\n",
   3873	[INTEL_PT_TIME_ZERO]		= "  Time Zero           %"PRIu64"\n",
   3874	[INTEL_PT_CAP_USER_TIME_ZERO]	= "  Cap Time Zero       %"PRId64"\n",
   3875	[INTEL_PT_TSC_BIT]		= "  TSC bit             %#"PRIx64"\n",
   3876	[INTEL_PT_NORETCOMP_BIT]	= "  NoRETComp bit       %#"PRIx64"\n",
   3877	[INTEL_PT_HAVE_SCHED_SWITCH]	= "  Have sched_switch   %"PRId64"\n",
   3878	[INTEL_PT_SNAPSHOT_MODE]	= "  Snapshot mode       %"PRId64"\n",
   3879	[INTEL_PT_PER_CPU_MMAPS]	= "  Per-cpu maps        %"PRId64"\n",
   3880	[INTEL_PT_MTC_BIT]		= "  MTC bit             %#"PRIx64"\n",
   3881	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
   3882	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
   3883	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
   3884	[INTEL_PT_MAX_NONTURBO_RATIO]	= "  Max non-turbo ratio %"PRIu64"\n",
   3885	[INTEL_PT_FILTER_STR_LEN]	= "  Filter string len.  %"PRIu64"\n",
   3886};
   3887
   3888static void intel_pt_print_info(__u64 *arr, int start, int finish)
   3889{
   3890	int i;
   3891
   3892	if (!dump_trace)
   3893		return;
   3894
   3895	for (i = start; i <= finish; i++)
   3896		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
   3897}
   3898
   3899static void intel_pt_print_info_str(const char *name, const char *str)
   3900{
   3901	if (!dump_trace)
   3902		return;
   3903
   3904	fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
   3905}
   3906
   3907static bool intel_pt_has(struct perf_record_auxtrace_info *auxtrace_info, int pos)
   3908{
   3909	return auxtrace_info->header.size >=
   3910		sizeof(struct perf_record_auxtrace_info) + (sizeof(u64) * (pos + 1));
   3911}
   3912
   3913int intel_pt_process_auxtrace_info(union perf_event *event,
   3914				   struct perf_session *session)
   3915{
   3916	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
   3917	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
   3918	struct intel_pt *pt;
   3919	void *info_end;
   3920	__u64 *info;
   3921	int err;
   3922
   3923	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
   3924					min_sz)
   3925		return -EINVAL;
   3926
   3927	pt = zalloc(sizeof(struct intel_pt));
   3928	if (!pt)
   3929		return -ENOMEM;
   3930
   3931	pt->vmcs_info = RB_ROOT;
   3932
   3933	addr_filters__init(&pt->filts);
   3934
   3935	err = perf_config(intel_pt_perf_config, pt);
   3936	if (err)
   3937		goto err_free;
   3938
   3939	err = auxtrace_queues__init(&pt->queues);
   3940	if (err)
   3941		goto err_free;
   3942
   3943	if (session->itrace_synth_opts->set) {
   3944		pt->synth_opts = *session->itrace_synth_opts;
   3945	} else {
   3946		struct itrace_synth_opts *opts = session->itrace_synth_opts;
   3947
   3948		itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample);
   3949		if (!opts->default_no_sample && !opts->inject) {
   3950			pt->synth_opts.branches = false;
   3951			pt->synth_opts.callchain = true;
   3952			pt->synth_opts.add_callchain = true;
   3953		}
   3954		pt->synth_opts.thread_stack = opts->thread_stack;
   3955	}
   3956
   3957	if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT))
   3958		intel_pt_log_set_name(INTEL_PT_PMU_NAME);
   3959
   3960	pt->session = session;
   3961	pt->machine = &session->machines.host; /* No kvm support */
   3962	pt->auxtrace_type = auxtrace_info->type;
   3963	pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
   3964	pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
   3965	pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
   3966	pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
   3967	pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
   3968	pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
   3969	pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
   3970	pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
   3971	pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
   3972	pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
   3973	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
   3974			    INTEL_PT_PER_CPU_MMAPS);
   3975
   3976	if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
   3977		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
   3978		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
   3979		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
   3980		pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
   3981		pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
   3982		intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
   3983				    INTEL_PT_CYC_BIT);
   3984	}
   3985
   3986	if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
   3987		pt->max_non_turbo_ratio =
   3988			auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
   3989		intel_pt_print_info(&auxtrace_info->priv[0],
   3990				    INTEL_PT_MAX_NONTURBO_RATIO,
   3991				    INTEL_PT_MAX_NONTURBO_RATIO);
   3992	}
   3993
   3994	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
   3995	info_end = (void *)auxtrace_info + auxtrace_info->header.size;
   3996
   3997	if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
   3998		size_t len;
   3999
   4000		len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
   4001		intel_pt_print_info(&auxtrace_info->priv[0],
   4002				    INTEL_PT_FILTER_STR_LEN,
   4003				    INTEL_PT_FILTER_STR_LEN);
   4004		if (len) {
   4005			const char *filter = (const char *)info;
   4006
   4007			len = roundup(len + 1, 8);
   4008			info += len >> 3;
   4009			if ((void *)info > info_end) {
   4010				pr_err("%s: bad filter string length\n", __func__);
   4011				err = -EINVAL;
   4012				goto err_free_queues;
   4013			}
   4014			pt->filter = memdup(filter, len);
   4015			if (!pt->filter) {
   4016				err = -ENOMEM;
   4017				goto err_free_queues;
   4018			}
   4019			if (session->header.needs_swap)
   4020				mem_bswap_64(pt->filter, len);
   4021			if (pt->filter[len - 1]) {
   4022				pr_err("%s: filter string not null terminated\n", __func__);
   4023				err = -EINVAL;
   4024				goto err_free_queues;
   4025			}
   4026			err = addr_filters__parse_bare_filter(&pt->filts,
   4027							      filter);
   4028			if (err)
   4029				goto err_free_queues;
   4030		}
   4031		intel_pt_print_info_str("Filter string", pt->filter);
   4032	}
   4033
   4034	if ((void *)info < info_end) {
   4035		pt->cap_event_trace = *info++;
   4036		if (dump_trace)
   4037			fprintf(stdout, "  Cap Event Trace     %d\n",
   4038				pt->cap_event_trace);
   4039	}
   4040
   4041	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
   4042	if (pt->timeless_decoding && !pt->tc.time_mult)
   4043		pt->tc.time_mult = 1;
   4044	pt->have_tsc = intel_pt_have_tsc(pt);
   4045	pt->sampling_mode = intel_pt_sampling_mode(pt);
   4046	pt->est_tsc = !pt->timeless_decoding;
   4047
   4048	if (pt->synth_opts.vm_time_correlation) {
   4049		if (pt->timeless_decoding) {
   4050			pr_err("Intel PT has no time information for VM Time Correlation\n");
   4051			err = -EINVAL;
   4052			goto err_free_queues;
   4053		}
   4054		if (session->itrace_synth_opts->ptime_range) {
   4055			pr_err("Time ranges cannot be specified with VM Time Correlation\n");
   4056			err = -EINVAL;
   4057			goto err_free_queues;
   4058		}
   4059		/* Currently TSC Offset is calculated using MTC packets */
   4060		if (!intel_pt_have_mtc(pt)) {
   4061			pr_err("MTC packets must have been enabled for VM Time Correlation\n");
   4062			err = -EINVAL;
   4063			goto err_free_queues;
   4064		}
   4065		err = intel_pt_parse_vm_tm_corr_args(pt);
   4066		if (err)
   4067			goto err_free_queues;
   4068	}
   4069
   4070	pt->unknown_thread = thread__new(999999999, 999999999);
   4071	if (!pt->unknown_thread) {
   4072		err = -ENOMEM;
   4073		goto err_free_queues;
   4074	}
   4075
   4076	/*
   4077	 * Since this thread will not be kept in any rbtree not in a
   4078	 * list, initialize its list node so that at thread__put() the
   4079	 * current thread lifetime assumption is kept and we don't segfault
   4080	 * at list_del_init().
   4081	 */
   4082	INIT_LIST_HEAD(&pt->unknown_thread->node);
   4083
   4084	err = thread__set_comm(pt->unknown_thread, "unknown", 0);
   4085	if (err)
   4086		goto err_delete_thread;
   4087	if (thread__init_maps(pt->unknown_thread, pt->machine)) {
   4088		err = -ENOMEM;
   4089		goto err_delete_thread;
   4090	}
   4091
   4092	pt->auxtrace.process_event = intel_pt_process_event;
   4093	pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
   4094	pt->auxtrace.queue_data = intel_pt_queue_data;
   4095	pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample;
   4096	pt->auxtrace.flush_events = intel_pt_flush;
   4097	pt->auxtrace.free_events = intel_pt_free_events;
   4098	pt->auxtrace.free = intel_pt_free;
   4099	pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace;
   4100	session->auxtrace = &pt->auxtrace;
   4101
   4102	if (dump_trace)
   4103		return 0;
   4104
   4105	if (pt->have_sched_switch == 1) {
   4106		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
   4107		if (!pt->switch_evsel) {
   4108			pr_err("%s: missing sched_switch event\n", __func__);
   4109			err = -EINVAL;
   4110			goto err_delete_thread;
   4111		}
   4112	} else if (pt->have_sched_switch == 2 &&
   4113		   !intel_pt_find_switch(session->evlist)) {
   4114		pr_err("%s: missing context_switch attribute flag\n", __func__);
   4115		err = -EINVAL;
   4116		goto err_delete_thread;
   4117	}
   4118
   4119	if (pt->synth_opts.log)
   4120		intel_pt_log_enable();
   4121
   4122	/* Maximum non-turbo ratio is TSC freq / 100 MHz */
   4123	if (pt->tc.time_mult) {
   4124		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
   4125
   4126		if (!pt->max_non_turbo_ratio)
   4127			pt->max_non_turbo_ratio =
   4128					(tsc_freq + 50000000) / 100000000;
   4129		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
   4130		intel_pt_log("Maximum non-turbo ratio %u\n",
   4131			     pt->max_non_turbo_ratio);
   4132		pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
   4133	}
   4134
   4135	err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
   4136	if (err)
   4137		goto err_delete_thread;
   4138
   4139	if (pt->synth_opts.calls)
   4140		pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
   4141				       PERF_IP_FLAG_TRACE_END;
   4142	if (pt->synth_opts.returns)
   4143		pt->branches_filter |= PERF_IP_FLAG_RETURN |
   4144				       PERF_IP_FLAG_TRACE_BEGIN;
   4145
   4146	if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) &&
   4147	    !symbol_conf.use_callchain) {
   4148		symbol_conf.use_callchain = true;
   4149		if (callchain_register_param(&callchain_param) < 0) {
   4150			symbol_conf.use_callchain = false;
   4151			pt->synth_opts.callchain = false;
   4152			pt->synth_opts.add_callchain = false;
   4153		}
   4154	}
   4155
   4156	if (pt->synth_opts.add_callchain) {
   4157		err = intel_pt_callchain_init(pt);
   4158		if (err)
   4159			goto err_delete_thread;
   4160	}
   4161
   4162	if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
   4163		pt->br_stack_sz = pt->synth_opts.last_branch_sz;
   4164		pt->br_stack_sz_plus = pt->br_stack_sz;
   4165	}
   4166
   4167	if (pt->synth_opts.add_last_branch) {
   4168		err = intel_pt_br_stack_init(pt);
   4169		if (err)
   4170			goto err_delete_thread;
   4171		/*
   4172		 * Additional branch stack size to cater for tracing from the
   4173		 * actual sample ip to where the sample time is recorded.
   4174		 * Measured at about 200 branches, but generously set to 1024.
   4175		 * If kernel space is not being traced, then add just 1 for the
   4176		 * branch to kernel space.
   4177		 */
   4178		if (intel_pt_tracing_kernel(pt))
   4179			pt->br_stack_sz_plus += 1024;
   4180		else
   4181			pt->br_stack_sz_plus += 1;
   4182	}
   4183
   4184	pt->use_thread_stack = pt->synth_opts.callchain ||
   4185			       pt->synth_opts.add_callchain ||
   4186			       pt->synth_opts.thread_stack ||
   4187			       pt->synth_opts.last_branch ||
   4188			       pt->synth_opts.add_last_branch;
   4189
   4190	pt->callstack = pt->synth_opts.callchain ||
   4191			pt->synth_opts.add_callchain ||
   4192			pt->synth_opts.thread_stack;
   4193
   4194	err = intel_pt_synth_events(pt, session);
   4195	if (err)
   4196		goto err_delete_thread;
   4197
   4198	intel_pt_setup_pebs_events(pt);
   4199
   4200	if (pt->sampling_mode || list_empty(&session->auxtrace_index))
   4201		err = auxtrace_queue_data(session, true, true);
   4202	else
   4203		err = auxtrace_queues__process_index(&pt->queues, session);
   4204	if (err)
   4205		goto err_delete_thread;
   4206
   4207	if (pt->queues.populated)
   4208		pt->data_queued = true;
   4209
   4210	if (pt->timeless_decoding)
   4211		pr_debug2("Intel PT decoding without timestamps\n");
   4212
   4213	return 0;
   4214
   4215err_delete_thread:
   4216	zfree(&pt->chain);
   4217	thread__zput(pt->unknown_thread);
   4218err_free_queues:
   4219	intel_pt_log_disable();
   4220	auxtrace_queues__free(&pt->queues);
   4221	session->auxtrace = NULL;
   4222err_free:
   4223	addr_filters__exit(&pt->filts);
   4224	zfree(&pt->filter);
   4225	zfree(&pt->time_ranges);
   4226	free(pt);
   4227	return err;
   4228}