cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

thread-stack.c (30236B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * thread-stack.c: Synthesize a thread's stack using call / return events
      4 * Copyright (c) 2014, Intel Corporation.
      5 */
      6
      7#include <linux/rbtree.h>
      8#include <linux/list.h>
      9#include <linux/log2.h>
     10#include <linux/zalloc.h>
     11#include <errno.h>
     12#include <stdlib.h>
     13#include <string.h>
     14#include "thread.h"
     15#include "event.h"
     16#include "machine.h"
     17#include "env.h"
     18#include "debug.h"
     19#include "symbol.h"
     20#include "comm.h"
     21#include "call-path.h"
     22#include "thread-stack.h"
     23
     24#define STACK_GROWTH 2048
     25
     26/*
     27 * State of retpoline detection.
     28 *
     29 * RETPOLINE_NONE: no retpoline detection
     30 * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
     31 * X86_RETPOLINE_DETECTED: x86 retpoline detected
     32 */
     33enum retpoline_state_t {
     34	RETPOLINE_NONE,
     35	X86_RETPOLINE_POSSIBLE,
     36	X86_RETPOLINE_DETECTED,
     37};
     38
     39/**
     40 * struct thread_stack_entry - thread stack entry.
     41 * @ret_addr: return address
     42 * @timestamp: timestamp (if known)
     43 * @ref: external reference (e.g. db_id of sample)
     44 * @branch_count: the branch count when the entry was created
     45 * @insn_count: the instruction count when the entry was created
     46 * @cyc_count the cycle count when the entry was created
     47 * @db_id: id used for db-export
     48 * @cp: call path
     49 * @no_call: a 'call' was not seen
     50 * @trace_end: a 'call' but trace ended
     51 * @non_call: a branch but not a 'call' to the start of a different symbol
     52 */
     53struct thread_stack_entry {
     54	u64 ret_addr;
     55	u64 timestamp;
     56	u64 ref;
     57	u64 branch_count;
     58	u64 insn_count;
     59	u64 cyc_count;
     60	u64 db_id;
     61	struct call_path *cp;
     62	bool no_call;
     63	bool trace_end;
     64	bool non_call;
     65};
     66
     67/**
     68 * struct thread_stack - thread stack constructed from 'call' and 'return'
     69 *                       branch samples.
     70 * @stack: array that holds the stack
     71 * @cnt: number of entries in the stack
     72 * @sz: current maximum stack size
     73 * @trace_nr: current trace number
     74 * @branch_count: running branch count
     75 * @insn_count: running  instruction count
     76 * @cyc_count running  cycle count
     77 * @kernel_start: kernel start address
     78 * @last_time: last timestamp
     79 * @crp: call/return processor
     80 * @comm: current comm
     81 * @arr_sz: size of array if this is the first element of an array
     82 * @rstate: used to detect retpolines
     83 * @br_stack_rb: branch stack (ring buffer)
     84 * @br_stack_sz: maximum branch stack size
     85 * @br_stack_pos: current position in @br_stack_rb
     86 * @mispred_all: mark all branches as mispredicted
     87 */
     88struct thread_stack {
     89	struct thread_stack_entry *stack;
     90	size_t cnt;
     91	size_t sz;
     92	u64 trace_nr;
     93	u64 branch_count;
     94	u64 insn_count;
     95	u64 cyc_count;
     96	u64 kernel_start;
     97	u64 last_time;
     98	struct call_return_processor *crp;
     99	struct comm *comm;
    100	unsigned int arr_sz;
    101	enum retpoline_state_t rstate;
    102	struct branch_stack *br_stack_rb;
    103	unsigned int br_stack_sz;
    104	unsigned int br_stack_pos;
    105	bool mispred_all;
    106};
    107
    108/*
    109 * Assume pid == tid == 0 identifies the idle task as defined by
    110 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
    111 * and therefore requires a stack for each cpu.
    112 */
    113static inline bool thread_stack__per_cpu(struct thread *thread)
    114{
    115	return !(thread->tid || thread->pid_);
    116}
    117
    118static int thread_stack__grow(struct thread_stack *ts)
    119{
    120	struct thread_stack_entry *new_stack;
    121	size_t sz, new_sz;
    122
    123	new_sz = ts->sz + STACK_GROWTH;
    124	sz = new_sz * sizeof(struct thread_stack_entry);
    125
    126	new_stack = realloc(ts->stack, sz);
    127	if (!new_stack)
    128		return -ENOMEM;
    129
    130	ts->stack = new_stack;
    131	ts->sz = new_sz;
    132
    133	return 0;
    134}
    135
    136static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
    137			      struct call_return_processor *crp,
    138			      bool callstack, unsigned int br_stack_sz)
    139{
    140	int err;
    141
    142	if (callstack) {
    143		err = thread_stack__grow(ts);
    144		if (err)
    145			return err;
    146	}
    147
    148	if (br_stack_sz) {
    149		size_t sz = sizeof(struct branch_stack);
    150
    151		sz += br_stack_sz * sizeof(struct branch_entry);
    152		ts->br_stack_rb = zalloc(sz);
    153		if (!ts->br_stack_rb)
    154			return -ENOMEM;
    155		ts->br_stack_sz = br_stack_sz;
    156	}
    157
    158	if (thread->maps && thread->maps->machine) {
    159		struct machine *machine = thread->maps->machine;
    160		const char *arch = perf_env__arch(machine->env);
    161
    162		ts->kernel_start = machine__kernel_start(machine);
    163		if (!strcmp(arch, "x86"))
    164			ts->rstate = X86_RETPOLINE_POSSIBLE;
    165	} else {
    166		ts->kernel_start = 1ULL << 63;
    167	}
    168	ts->crp = crp;
    169
    170	return 0;
    171}
    172
    173static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
    174					      struct call_return_processor *crp,
    175					      bool callstack,
    176					      unsigned int br_stack_sz)
    177{
    178	struct thread_stack *ts = thread->ts, *new_ts;
    179	unsigned int old_sz = ts ? ts->arr_sz : 0;
    180	unsigned int new_sz = 1;
    181
    182	if (thread_stack__per_cpu(thread) && cpu > 0)
    183		new_sz = roundup_pow_of_two(cpu + 1);
    184
    185	if (!ts || new_sz > old_sz) {
    186		new_ts = calloc(new_sz, sizeof(*ts));
    187		if (!new_ts)
    188			return NULL;
    189		if (ts)
    190			memcpy(new_ts, ts, old_sz * sizeof(*ts));
    191		new_ts->arr_sz = new_sz;
    192		zfree(&thread->ts);
    193		thread->ts = new_ts;
    194		ts = new_ts;
    195	}
    196
    197	if (thread_stack__per_cpu(thread) && cpu > 0 &&
    198	    (unsigned int)cpu < ts->arr_sz)
    199		ts += cpu;
    200
    201	if (!ts->stack &&
    202	    thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
    203		return NULL;
    204
    205	return ts;
    206}
    207
    208static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
    209{
    210	struct thread_stack *ts = thread->ts;
    211
    212	if (cpu < 0)
    213		cpu = 0;
    214
    215	if (!ts || (unsigned int)cpu >= ts->arr_sz)
    216		return NULL;
    217
    218	ts += cpu;
    219
    220	if (!ts->stack)
    221		return NULL;
    222
    223	return ts;
    224}
    225
    226static inline struct thread_stack *thread__stack(struct thread *thread,
    227						    int cpu)
    228{
    229	if (!thread)
    230		return NULL;
    231
    232	if (thread_stack__per_cpu(thread))
    233		return thread__cpu_stack(thread, cpu);
    234
    235	return thread->ts;
    236}
    237
    238static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
    239			      bool trace_end)
    240{
    241	int err = 0;
    242
    243	if (ts->cnt == ts->sz) {
    244		err = thread_stack__grow(ts);
    245		if (err) {
    246			pr_warning("Out of memory: discarding thread stack\n");
    247			ts->cnt = 0;
    248		}
    249	}
    250
    251	ts->stack[ts->cnt].trace_end = trace_end;
    252	ts->stack[ts->cnt++].ret_addr = ret_addr;
    253
    254	return err;
    255}
    256
    257static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
    258{
    259	size_t i;
    260
    261	/*
    262	 * In some cases there may be functions which are not seen to return.
    263	 * For example when setjmp / longjmp has been used.  Or the perf context
    264	 * switch in the kernel which doesn't stop and start tracing in exactly
    265	 * the same code path.  When that happens the return address will be
    266	 * further down the stack.  If the return address is not found at all,
    267	 * we assume the opposite (i.e. this is a return for a call that wasn't
    268	 * seen for some reason) and leave the stack alone.
    269	 */
    270	for (i = ts->cnt; i; ) {
    271		if (ts->stack[--i].ret_addr == ret_addr) {
    272			ts->cnt = i;
    273			return;
    274		}
    275	}
    276}
    277
    278static void thread_stack__pop_trace_end(struct thread_stack *ts)
    279{
    280	size_t i;
    281
    282	for (i = ts->cnt; i; ) {
    283		if (ts->stack[--i].trace_end)
    284			ts->cnt = i;
    285		else
    286			return;
    287	}
    288}
    289
    290static bool thread_stack__in_kernel(struct thread_stack *ts)
    291{
    292	if (!ts->cnt)
    293		return false;
    294
    295	return ts->stack[ts->cnt - 1].cp->in_kernel;
    296}
    297
    298static int thread_stack__call_return(struct thread *thread,
    299				     struct thread_stack *ts, size_t idx,
    300				     u64 timestamp, u64 ref, bool no_return)
    301{
    302	struct call_return_processor *crp = ts->crp;
    303	struct thread_stack_entry *tse;
    304	struct call_return cr = {
    305		.thread = thread,
    306		.comm = ts->comm,
    307		.db_id = 0,
    308	};
    309	u64 *parent_db_id;
    310
    311	tse = &ts->stack[idx];
    312	cr.cp = tse->cp;
    313	cr.call_time = tse->timestamp;
    314	cr.return_time = timestamp;
    315	cr.branch_count = ts->branch_count - tse->branch_count;
    316	cr.insn_count = ts->insn_count - tse->insn_count;
    317	cr.cyc_count = ts->cyc_count - tse->cyc_count;
    318	cr.db_id = tse->db_id;
    319	cr.call_ref = tse->ref;
    320	cr.return_ref = ref;
    321	if (tse->no_call)
    322		cr.flags |= CALL_RETURN_NO_CALL;
    323	if (no_return)
    324		cr.flags |= CALL_RETURN_NO_RETURN;
    325	if (tse->non_call)
    326		cr.flags |= CALL_RETURN_NON_CALL;
    327
    328	/*
    329	 * The parent db_id must be assigned before exporting the child. Note
    330	 * it is not possible to export the parent first because its information
    331	 * is not yet complete because its 'return' has not yet been processed.
    332	 */
    333	parent_db_id = idx ? &(tse - 1)->db_id : NULL;
    334
    335	return crp->process(&cr, parent_db_id, crp->data);
    336}
    337
    338static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
    339{
    340	struct call_return_processor *crp = ts->crp;
    341	int err;
    342
    343	if (!crp) {
    344		ts->cnt = 0;
    345		ts->br_stack_pos = 0;
    346		if (ts->br_stack_rb)
    347			ts->br_stack_rb->nr = 0;
    348		return 0;
    349	}
    350
    351	while (ts->cnt) {
    352		err = thread_stack__call_return(thread, ts, --ts->cnt,
    353						ts->last_time, 0, true);
    354		if (err) {
    355			pr_err("Error flushing thread stack!\n");
    356			ts->cnt = 0;
    357			return err;
    358		}
    359	}
    360
    361	return 0;
    362}
    363
    364int thread_stack__flush(struct thread *thread)
    365{
    366	struct thread_stack *ts = thread->ts;
    367	unsigned int pos;
    368	int err = 0;
    369
    370	if (ts) {
    371		for (pos = 0; pos < ts->arr_sz; pos++) {
    372			int ret = __thread_stack__flush(thread, ts + pos);
    373
    374			if (ret)
    375				err = ret;
    376		}
    377	}
    378
    379	return err;
    380}
    381
    382static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
    383					  u64 from_ip, u64 to_ip)
    384{
    385	struct branch_stack *bs = ts->br_stack_rb;
    386	struct branch_entry *be;
    387
    388	if (!ts->br_stack_pos)
    389		ts->br_stack_pos = ts->br_stack_sz;
    390
    391	ts->br_stack_pos -= 1;
    392
    393	be              = &bs->entries[ts->br_stack_pos];
    394	be->from        = from_ip;
    395	be->to          = to_ip;
    396	be->flags.value = 0;
    397	be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
    398	be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
    399	/* No support for mispredict */
    400	be->flags.mispred = ts->mispred_all;
    401
    402	if (bs->nr < ts->br_stack_sz)
    403		bs->nr += 1;
    404}
    405
    406int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
    407			u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
    408			unsigned int br_stack_sz, bool mispred_all)
    409{
    410	struct thread_stack *ts = thread__stack(thread, cpu);
    411
    412	if (!thread)
    413		return -EINVAL;
    414
    415	if (!ts) {
    416		ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
    417		if (!ts) {
    418			pr_warning("Out of memory: no thread stack\n");
    419			return -ENOMEM;
    420		}
    421		ts->trace_nr = trace_nr;
    422		ts->mispred_all = mispred_all;
    423	}
    424
    425	/*
    426	 * When the trace is discontinuous, the trace_nr changes.  In that case
    427	 * the stack might be completely invalid.  Better to report nothing than
    428	 * to report something misleading, so flush the stack.
    429	 */
    430	if (trace_nr != ts->trace_nr) {
    431		if (ts->trace_nr)
    432			__thread_stack__flush(thread, ts);
    433		ts->trace_nr = trace_nr;
    434	}
    435
    436	if (br_stack_sz)
    437		thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
    438
    439	/*
    440	 * Stop here if thread_stack__process() is in use, or not recording call
    441	 * stack.
    442	 */
    443	if (ts->crp || !callstack)
    444		return 0;
    445
    446	if (flags & PERF_IP_FLAG_CALL) {
    447		u64 ret_addr;
    448
    449		if (!to_ip)
    450			return 0;
    451		ret_addr = from_ip + insn_len;
    452		if (ret_addr == to_ip)
    453			return 0; /* Zero-length calls are excluded */
    454		return thread_stack__push(ts, ret_addr,
    455					  flags & PERF_IP_FLAG_TRACE_END);
    456	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
    457		/*
    458		 * If the caller did not change the trace number (which would
    459		 * have flushed the stack) then try to make sense of the stack.
    460		 * Possibly, tracing began after returning to the current
    461		 * address, so try to pop that. Also, do not expect a call made
    462		 * when the trace ended, to return, so pop that.
    463		 */
    464		thread_stack__pop(ts, to_ip);
    465		thread_stack__pop_trace_end(ts);
    466	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
    467		thread_stack__pop(ts, to_ip);
    468	}
    469
    470	return 0;
    471}
    472
    473void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
    474{
    475	struct thread_stack *ts = thread__stack(thread, cpu);
    476
    477	if (!ts)
    478		return;
    479
    480	if (trace_nr != ts->trace_nr) {
    481		if (ts->trace_nr)
    482			__thread_stack__flush(thread, ts);
    483		ts->trace_nr = trace_nr;
    484	}
    485}
    486
    487static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
    488{
    489	__thread_stack__flush(thread, ts);
    490	zfree(&ts->stack);
    491	zfree(&ts->br_stack_rb);
    492}
    493
    494static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
    495{
    496	unsigned int arr_sz = ts->arr_sz;
    497
    498	__thread_stack__free(thread, ts);
    499	memset(ts, 0, sizeof(*ts));
    500	ts->arr_sz = arr_sz;
    501}
    502
    503void thread_stack__free(struct thread *thread)
    504{
    505	struct thread_stack *ts = thread->ts;
    506	unsigned int pos;
    507
    508	if (ts) {
    509		for (pos = 0; pos < ts->arr_sz; pos++)
    510			__thread_stack__free(thread, ts + pos);
    511		zfree(&thread->ts);
    512	}
    513}
    514
    515static inline u64 callchain_context(u64 ip, u64 kernel_start)
    516{
    517	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
    518}
    519
    520void thread_stack__sample(struct thread *thread, int cpu,
    521			  struct ip_callchain *chain,
    522			  size_t sz, u64 ip, u64 kernel_start)
    523{
    524	struct thread_stack *ts = thread__stack(thread, cpu);
    525	u64 context = callchain_context(ip, kernel_start);
    526	u64 last_context;
    527	size_t i, j;
    528
    529	if (sz < 2) {
    530		chain->nr = 0;
    531		return;
    532	}
    533
    534	chain->ips[0] = context;
    535	chain->ips[1] = ip;
    536
    537	if (!ts) {
    538		chain->nr = 2;
    539		return;
    540	}
    541
    542	last_context = context;
    543
    544	for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
    545		ip = ts->stack[ts->cnt - j].ret_addr;
    546		context = callchain_context(ip, kernel_start);
    547		if (context != last_context) {
    548			if (i >= sz - 1)
    549				break;
    550			chain->ips[i++] = context;
    551			last_context = context;
    552		}
    553		chain->ips[i] = ip;
    554	}
    555
    556	chain->nr = i;
    557}
    558
    559/*
    560 * Hardware sample records, created some time after the event occurred, need to
    561 * have subsequent addresses removed from the call chain.
    562 */
    563void thread_stack__sample_late(struct thread *thread, int cpu,
    564			       struct ip_callchain *chain, size_t sz,
    565			       u64 sample_ip, u64 kernel_start)
    566{
    567	struct thread_stack *ts = thread__stack(thread, cpu);
    568	u64 sample_context = callchain_context(sample_ip, kernel_start);
    569	u64 last_context, context, ip;
    570	size_t nr = 0, j;
    571
    572	if (sz < 2) {
    573		chain->nr = 0;
    574		return;
    575	}
    576
    577	if (!ts)
    578		goto out;
    579
    580	/*
    581	 * When tracing kernel space, kernel addresses occur at the top of the
    582	 * call chain after the event occurred but before tracing stopped.
    583	 * Skip them.
    584	 */
    585	for (j = 1; j <= ts->cnt; j++) {
    586		ip = ts->stack[ts->cnt - j].ret_addr;
    587		context = callchain_context(ip, kernel_start);
    588		if (context == PERF_CONTEXT_USER ||
    589		    (context == sample_context && ip == sample_ip))
    590			break;
    591	}
    592
    593	last_context = sample_ip; /* Use sample_ip as an invalid context */
    594
    595	for (; nr < sz && j <= ts->cnt; nr++, j++) {
    596		ip = ts->stack[ts->cnt - j].ret_addr;
    597		context = callchain_context(ip, kernel_start);
    598		if (context != last_context) {
    599			if (nr >= sz - 1)
    600				break;
    601			chain->ips[nr++] = context;
    602			last_context = context;
    603		}
    604		chain->ips[nr] = ip;
    605	}
    606out:
    607	if (nr) {
    608		chain->nr = nr;
    609	} else {
    610		chain->ips[0] = sample_context;
    611		chain->ips[1] = sample_ip;
    612		chain->nr = 2;
    613	}
    614}
    615
    616void thread_stack__br_sample(struct thread *thread, int cpu,
    617			     struct branch_stack *dst, unsigned int sz)
    618{
    619	struct thread_stack *ts = thread__stack(thread, cpu);
    620	const size_t bsz = sizeof(struct branch_entry);
    621	struct branch_stack *src;
    622	struct branch_entry *be;
    623	unsigned int nr;
    624
    625	dst->nr = 0;
    626
    627	if (!ts)
    628		return;
    629
    630	src = ts->br_stack_rb;
    631	if (!src->nr)
    632		return;
    633
    634	dst->nr = min((unsigned int)src->nr, sz);
    635
    636	be = &dst->entries[0];
    637	nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
    638	memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
    639
    640	if (src->nr >= ts->br_stack_sz) {
    641		sz -= nr;
    642		be = &dst->entries[nr];
    643		nr = min(ts->br_stack_pos, sz);
    644		memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
    645	}
    646}
    647
    648/* Start of user space branch entries */
    649static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
    650{
    651	if (!*start)
    652		*start = be->to && be->to < kernel_start;
    653
    654	return *start;
    655}
    656
    657/*
    658 * Start of branch entries after the ip fell in between 2 branches, or user
    659 * space branch entries.
    660 */
    661static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
    662		     bool *start, struct branch_entry *nb)
    663{
    664	if (!*start) {
    665		*start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
    666			 be->from < kernel_start ||
    667			 (be->to && be->to < kernel_start);
    668	}
    669
    670	return *start;
    671}
    672
    673/*
    674 * Hardware sample records, created some time after the event occurred, need to
    675 * have subsequent addresses removed from the branch stack.
    676 */
    677void thread_stack__br_sample_late(struct thread *thread, int cpu,
    678				  struct branch_stack *dst, unsigned int sz,
    679				  u64 ip, u64 kernel_start)
    680{
    681	struct thread_stack *ts = thread__stack(thread, cpu);
    682	struct branch_entry *d, *s, *spos, *ssz;
    683	struct branch_stack *src;
    684	unsigned int nr = 0;
    685	bool start = false;
    686
    687	dst->nr = 0;
    688
    689	if (!ts)
    690		return;
    691
    692	src = ts->br_stack_rb;
    693	if (!src->nr)
    694		return;
    695
    696	spos = &src->entries[ts->br_stack_pos];
    697	ssz  = &src->entries[ts->br_stack_sz];
    698
    699	d = &dst->entries[0];
    700	s = spos;
    701
    702	if (ip < kernel_start) {
    703		/*
    704		 * User space sample: start copying branch entries when the
    705		 * branch is in user space.
    706		 */
    707		for (s = spos; s < ssz && nr < sz; s++) {
    708			if (us_start(s, kernel_start, &start)) {
    709				*d++ = *s;
    710				nr += 1;
    711			}
    712		}
    713
    714		if (src->nr >= ts->br_stack_sz) {
    715			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
    716				if (us_start(s, kernel_start, &start)) {
    717					*d++ = *s;
    718					nr += 1;
    719				}
    720			}
    721		}
    722	} else {
    723		struct branch_entry *nb = NULL;
    724
    725		/*
    726		 * Kernel space sample: start copying branch entries when the ip
    727		 * falls in between 2 branches (or the branch is in user space
    728		 * because then the start must have been missed).
    729		 */
    730		for (s = spos; s < ssz && nr < sz; s++) {
    731			if (ks_start(s, ip, kernel_start, &start, nb)) {
    732				*d++ = *s;
    733				nr += 1;
    734			}
    735			nb = s;
    736		}
    737
    738		if (src->nr >= ts->br_stack_sz) {
    739			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
    740				if (ks_start(s, ip, kernel_start, &start, nb)) {
    741					*d++ = *s;
    742					nr += 1;
    743				}
    744				nb = s;
    745			}
    746		}
    747	}
    748
    749	dst->nr = nr;
    750}
    751
    752struct call_return_processor *
    753call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
    754			   void *data)
    755{
    756	struct call_return_processor *crp;
    757
    758	crp = zalloc(sizeof(struct call_return_processor));
    759	if (!crp)
    760		return NULL;
    761	crp->cpr = call_path_root__new();
    762	if (!crp->cpr)
    763		goto out_free;
    764	crp->process = process;
    765	crp->data = data;
    766	return crp;
    767
    768out_free:
    769	free(crp);
    770	return NULL;
    771}
    772
    773void call_return_processor__free(struct call_return_processor *crp)
    774{
    775	if (crp) {
    776		call_path_root__free(crp->cpr);
    777		free(crp);
    778	}
    779}
    780
    781static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
    782				 u64 timestamp, u64 ref, struct call_path *cp,
    783				 bool no_call, bool trace_end)
    784{
    785	struct thread_stack_entry *tse;
    786	int err;
    787
    788	if (!cp)
    789		return -ENOMEM;
    790
    791	if (ts->cnt == ts->sz) {
    792		err = thread_stack__grow(ts);
    793		if (err)
    794			return err;
    795	}
    796
    797	tse = &ts->stack[ts->cnt++];
    798	tse->ret_addr = ret_addr;
    799	tse->timestamp = timestamp;
    800	tse->ref = ref;
    801	tse->branch_count = ts->branch_count;
    802	tse->insn_count = ts->insn_count;
    803	tse->cyc_count = ts->cyc_count;
    804	tse->cp = cp;
    805	tse->no_call = no_call;
    806	tse->trace_end = trace_end;
    807	tse->non_call = false;
    808	tse->db_id = 0;
    809
    810	return 0;
    811}
    812
    813static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
    814				u64 ret_addr, u64 timestamp, u64 ref,
    815				struct symbol *sym)
    816{
    817	int err;
    818
    819	if (!ts->cnt)
    820		return 1;
    821
    822	if (ts->cnt == 1) {
    823		struct thread_stack_entry *tse = &ts->stack[0];
    824
    825		if (tse->cp->sym == sym)
    826			return thread_stack__call_return(thread, ts, --ts->cnt,
    827							 timestamp, ref, false);
    828	}
    829
    830	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
    831	    !ts->stack[ts->cnt - 1].non_call) {
    832		return thread_stack__call_return(thread, ts, --ts->cnt,
    833						 timestamp, ref, false);
    834	} else {
    835		size_t i = ts->cnt - 1;
    836
    837		while (i--) {
    838			if (ts->stack[i].ret_addr != ret_addr ||
    839			    ts->stack[i].non_call)
    840				continue;
    841			i += 1;
    842			while (ts->cnt > i) {
    843				err = thread_stack__call_return(thread, ts,
    844								--ts->cnt,
    845								timestamp, ref,
    846								true);
    847				if (err)
    848					return err;
    849			}
    850			return thread_stack__call_return(thread, ts, --ts->cnt,
    851							 timestamp, ref, false);
    852		}
    853	}
    854
    855	return 1;
    856}
    857
    858static int thread_stack__bottom(struct thread_stack *ts,
    859				struct perf_sample *sample,
    860				struct addr_location *from_al,
    861				struct addr_location *to_al, u64 ref)
    862{
    863	struct call_path_root *cpr = ts->crp->cpr;
    864	struct call_path *cp;
    865	struct symbol *sym;
    866	u64 ip;
    867
    868	if (sample->ip) {
    869		ip = sample->ip;
    870		sym = from_al->sym;
    871	} else if (sample->addr) {
    872		ip = sample->addr;
    873		sym = to_al->sym;
    874	} else {
    875		return 0;
    876	}
    877
    878	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
    879				ts->kernel_start);
    880
    881	return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
    882				     true, false);
    883}
    884
    885static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
    886				struct perf_sample *sample, u64 ref)
    887{
    888	u64 tm = sample->time;
    889	int err;
    890
    891	/* Return to userspace, so pop all kernel addresses */
    892	while (thread_stack__in_kernel(ts)) {
    893		err = thread_stack__call_return(thread, ts, --ts->cnt,
    894						tm, ref, true);
    895		if (err)
    896			return err;
    897	}
    898
    899	return 0;
    900}
    901
    902static int thread_stack__no_call_return(struct thread *thread,
    903					struct thread_stack *ts,
    904					struct perf_sample *sample,
    905					struct addr_location *from_al,
    906					struct addr_location *to_al, u64 ref)
    907{
    908	struct call_path_root *cpr = ts->crp->cpr;
    909	struct call_path *root = &cpr->call_path;
    910	struct symbol *fsym = from_al->sym;
    911	struct symbol *tsym = to_al->sym;
    912	struct call_path *cp, *parent;
    913	u64 ks = ts->kernel_start;
    914	u64 addr = sample->addr;
    915	u64 tm = sample->time;
    916	u64 ip = sample->ip;
    917	int err;
    918
    919	if (ip >= ks && addr < ks) {
    920		/* Return to userspace, so pop all kernel addresses */
    921		err = thread_stack__pop_ks(thread, ts, sample, ref);
    922		if (err)
    923			return err;
    924
    925		/* If the stack is empty, push the userspace address */
    926		if (!ts->cnt) {
    927			cp = call_path__findnew(cpr, root, tsym, addr, ks);
    928			return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
    929						     false);
    930		}
    931	} else if (thread_stack__in_kernel(ts) && ip < ks) {
    932		/* Return to userspace, so pop all kernel addresses */
    933		err = thread_stack__pop_ks(thread, ts, sample, ref);
    934		if (err)
    935			return err;
    936	}
    937
    938	if (ts->cnt)
    939		parent = ts->stack[ts->cnt - 1].cp;
    940	else
    941		parent = root;
    942
    943	if (parent->sym == from_al->sym) {
    944		/*
    945		 * At the bottom of the stack, assume the missing 'call' was
    946		 * before the trace started. So, pop the current symbol and push
    947		 * the 'to' symbol.
    948		 */
    949		if (ts->cnt == 1) {
    950			err = thread_stack__call_return(thread, ts, --ts->cnt,
    951							tm, ref, false);
    952			if (err)
    953				return err;
    954		}
    955
    956		if (!ts->cnt) {
    957			cp = call_path__findnew(cpr, root, tsym, addr, ks);
    958
    959			return thread_stack__push_cp(ts, addr, tm, ref, cp,
    960						     true, false);
    961		}
    962
    963		/*
    964		 * Otherwise assume the 'return' is being used as a jump (e.g.
    965		 * retpoline) and just push the 'to' symbol.
    966		 */
    967		cp = call_path__findnew(cpr, parent, tsym, addr, ks);
    968
    969		err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
    970		if (!err)
    971			ts->stack[ts->cnt - 1].non_call = true;
    972
    973		return err;
    974	}
    975
    976	/*
    977	 * Assume 'parent' has not yet returned, so push 'to', and then push and
    978	 * pop 'from'.
    979	 */
    980
    981	cp = call_path__findnew(cpr, parent, tsym, addr, ks);
    982
    983	err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
    984	if (err)
    985		return err;
    986
    987	cp = call_path__findnew(cpr, cp, fsym, ip, ks);
    988
    989	err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
    990	if (err)
    991		return err;
    992
    993	return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
    994}
    995
    996static int thread_stack__trace_begin(struct thread *thread,
    997				     struct thread_stack *ts, u64 timestamp,
    998				     u64 ref)
    999{
   1000	struct thread_stack_entry *tse;
   1001	int err;
   1002
   1003	if (!ts->cnt)
   1004		return 0;
   1005
   1006	/* Pop trace end */
   1007	tse = &ts->stack[ts->cnt - 1];
   1008	if (tse->trace_end) {
   1009		err = thread_stack__call_return(thread, ts, --ts->cnt,
   1010						timestamp, ref, false);
   1011		if (err)
   1012			return err;
   1013	}
   1014
   1015	return 0;
   1016}
   1017
   1018static int thread_stack__trace_end(struct thread_stack *ts,
   1019				   struct perf_sample *sample, u64 ref)
   1020{
   1021	struct call_path_root *cpr = ts->crp->cpr;
   1022	struct call_path *cp;
   1023	u64 ret_addr;
   1024
   1025	/* No point having 'trace end' on the bottom of the stack */
   1026	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
   1027		return 0;
   1028
   1029	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
   1030				ts->kernel_start);
   1031
   1032	ret_addr = sample->ip + sample->insn_len;
   1033
   1034	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
   1035				     false, true);
   1036}
   1037
   1038static bool is_x86_retpoline(const char *name)
   1039{
   1040	const char *p = strstr(name, "__x86_indirect_thunk_");
   1041
   1042	return p == name || !strcmp(name, "__indirect_thunk_start");
   1043}
   1044
   1045/*
   1046 * x86 retpoline functions pollute the call graph. This function removes them.
   1047 * This does not handle function return thunks, nor is there any improvement
   1048 * for the handling of inline thunks or extern thunks.
   1049 */
   1050static int thread_stack__x86_retpoline(struct thread_stack *ts,
   1051				       struct perf_sample *sample,
   1052				       struct addr_location *to_al)
   1053{
   1054	struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
   1055	struct call_path_root *cpr = ts->crp->cpr;
   1056	struct symbol *sym = tse->cp->sym;
   1057	struct symbol *tsym = to_al->sym;
   1058	struct call_path *cp;
   1059
   1060	if (sym && is_x86_retpoline(sym->name)) {
   1061		/*
   1062		 * This is a x86 retpoline fn. It pollutes the call graph by
   1063		 * showing up everywhere there is an indirect branch, but does
   1064		 * not itself mean anything. Here the top-of-stack is removed,
   1065		 * by decrementing the stack count, and then further down, the
   1066		 * resulting top-of-stack is replaced with the actual target.
   1067		 * The result is that the retpoline functions will no longer
   1068		 * appear in the call graph. Note this only affects the call
   1069		 * graph, since all the original branches are left unchanged.
   1070		 */
   1071		ts->cnt -= 1;
   1072		sym = ts->stack[ts->cnt - 2].cp->sym;
   1073		if (sym && sym == tsym && to_al->addr != tsym->start) {
   1074			/*
   1075			 * Target is back to the middle of the symbol we came
   1076			 * from so assume it is an indirect jmp and forget it
   1077			 * altogether.
   1078			 */
   1079			ts->cnt -= 1;
   1080			return 0;
   1081		}
   1082	} else if (sym && sym == tsym) {
   1083		/*
   1084		 * Target is back to the symbol we came from so assume it is an
   1085		 * indirect jmp and forget it altogether.
   1086		 */
   1087		ts->cnt -= 1;
   1088		return 0;
   1089	}
   1090
   1091	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
   1092				sample->addr, ts->kernel_start);
   1093	if (!cp)
   1094		return -ENOMEM;
   1095
   1096	/* Replace the top-of-stack with the actual target */
   1097	ts->stack[ts->cnt - 1].cp = cp;
   1098
   1099	return 0;
   1100}
   1101
   1102int thread_stack__process(struct thread *thread, struct comm *comm,
   1103			  struct perf_sample *sample,
   1104			  struct addr_location *from_al,
   1105			  struct addr_location *to_al, u64 ref,
   1106			  struct call_return_processor *crp)
   1107{
   1108	struct thread_stack *ts = thread__stack(thread, sample->cpu);
   1109	enum retpoline_state_t rstate;
   1110	int err = 0;
   1111
   1112	if (ts && !ts->crp) {
   1113		/* Supersede thread_stack__event() */
   1114		thread_stack__reset(thread, ts);
   1115		ts = NULL;
   1116	}
   1117
   1118	if (!ts) {
   1119		ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
   1120		if (!ts)
   1121			return -ENOMEM;
   1122		ts->comm = comm;
   1123	}
   1124
   1125	rstate = ts->rstate;
   1126	if (rstate == X86_RETPOLINE_DETECTED)
   1127		ts->rstate = X86_RETPOLINE_POSSIBLE;
   1128
   1129	/* Flush stack on exec */
   1130	if (ts->comm != comm && thread->pid_ == thread->tid) {
   1131		err = __thread_stack__flush(thread, ts);
   1132		if (err)
   1133			return err;
   1134		ts->comm = comm;
   1135	}
   1136
   1137	/* If the stack is empty, put the current symbol on the stack */
   1138	if (!ts->cnt) {
   1139		err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
   1140		if (err)
   1141			return err;
   1142	}
   1143
   1144	ts->branch_count += 1;
   1145	ts->insn_count += sample->insn_cnt;
   1146	ts->cyc_count += sample->cyc_cnt;
   1147	ts->last_time = sample->time;
   1148
   1149	if (sample->flags & PERF_IP_FLAG_CALL) {
   1150		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
   1151		struct call_path_root *cpr = ts->crp->cpr;
   1152		struct call_path *cp;
   1153		u64 ret_addr;
   1154
   1155		if (!sample->ip || !sample->addr)
   1156			return 0;
   1157
   1158		ret_addr = sample->ip + sample->insn_len;
   1159		if (ret_addr == sample->addr)
   1160			return 0; /* Zero-length calls are excluded */
   1161
   1162		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
   1163					to_al->sym, sample->addr,
   1164					ts->kernel_start);
   1165		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
   1166					    cp, false, trace_end);
   1167
   1168		/*
   1169		 * A call to the same symbol but not the start of the symbol,
   1170		 * may be the start of a x86 retpoline.
   1171		 */
   1172		if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
   1173		    from_al->sym == to_al->sym &&
   1174		    to_al->addr != to_al->sym->start)
   1175			ts->rstate = X86_RETPOLINE_DETECTED;
   1176
   1177	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
   1178		if (!sample->addr) {
   1179			u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
   1180						 PERF_IP_FLAG_INTERRUPT;
   1181
   1182			if (!(sample->flags & return_from_kernel))
   1183				return 0;
   1184
   1185			/* Pop kernel stack */
   1186			return thread_stack__pop_ks(thread, ts, sample, ref);
   1187		}
   1188
   1189		if (!sample->ip)
   1190			return 0;
   1191
   1192		/* x86 retpoline 'return' doesn't match the stack */
   1193		if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
   1194		    ts->stack[ts->cnt - 1].ret_addr != sample->addr)
   1195			return thread_stack__x86_retpoline(ts, sample, to_al);
   1196
   1197		err = thread_stack__pop_cp(thread, ts, sample->addr,
   1198					   sample->time, ref, from_al->sym);
   1199		if (err) {
   1200			if (err < 0)
   1201				return err;
   1202			err = thread_stack__no_call_return(thread, ts, sample,
   1203							   from_al, to_al, ref);
   1204		}
   1205	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
   1206		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
   1207	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
   1208		err = thread_stack__trace_end(ts, sample, ref);
   1209	} else if (sample->flags & PERF_IP_FLAG_BRANCH &&
   1210		   from_al->sym != to_al->sym && to_al->sym &&
   1211		   to_al->addr == to_al->sym->start) {
   1212		struct call_path_root *cpr = ts->crp->cpr;
   1213		struct call_path *cp;
   1214
   1215		/*
   1216		 * The compiler might optimize a call/ret combination by making
   1217		 * it a jmp. Make that visible by recording on the stack a
   1218		 * branch to the start of a different symbol. Note, that means
   1219		 * when a ret pops the stack, all jmps must be popped off first.
   1220		 */
   1221		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
   1222					to_al->sym, sample->addr,
   1223					ts->kernel_start);
   1224		err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
   1225					    false);
   1226		if (!err)
   1227			ts->stack[ts->cnt - 1].non_call = true;
   1228	}
   1229
   1230	return err;
   1231}
   1232
   1233size_t thread_stack__depth(struct thread *thread, int cpu)
   1234{
   1235	struct thread_stack *ts = thread__stack(thread, cpu);
   1236
   1237	if (!ts)
   1238		return 0;
   1239	return ts->cnt;
   1240}