cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

stat-shadow.c (40150B)


      1// SPDX-License-Identifier: GPL-2.0
      2#include <math.h>
      3#include <stdio.h>
      4#include "evsel.h"
      5#include "stat.h"
      6#include "color.h"
      7#include "debug.h"
      8#include "pmu.h"
      9#include "rblist.h"
     10#include "evlist.h"
     11#include "expr.h"
     12#include "metricgroup.h"
     13#include "cgroup.h"
     14#include "units.h"
     15#include <linux/zalloc.h>
     16#include "iostat.h"
     17
     18/*
     19 * AGGR_GLOBAL: Use CPU 0
     20 * AGGR_SOCKET: Use first CPU of socket
     21 * AGGR_DIE: Use first CPU of die
     22 * AGGR_CORE: Use first CPU of core
     23 * AGGR_NONE: Use matching CPU
     24 * AGGR_THREAD: Not supported?
     25 */
     26
     27struct runtime_stat rt_stat;
     28struct stats walltime_nsecs_stats;
     29struct rusage_stats ru_stats;
     30
     31struct saved_value {
     32	struct rb_node rb_node;
     33	struct evsel *evsel;
     34	enum stat_type type;
     35	int ctx;
     36	int cpu_map_idx;
     37	struct cgroup *cgrp;
     38	struct runtime_stat *stat;
     39	struct stats stats;
     40	u64 metric_total;
     41	int metric_other;
     42};
     43
     44static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
     45{
     46	struct saved_value *a = container_of(rb_node,
     47					     struct saved_value,
     48					     rb_node);
     49	const struct saved_value *b = entry;
     50
     51	if (a->cpu_map_idx != b->cpu_map_idx)
     52		return a->cpu_map_idx - b->cpu_map_idx;
     53
     54	/*
     55	 * Previously the rbtree was used to link generic metrics.
     56	 * The keys were evsel/cpu. Now the rbtree is extended to support
     57	 * per-thread shadow stats. For shadow stats case, the keys
     58	 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
     59	 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
     60	 */
     61	if (a->type != b->type)
     62		return a->type - b->type;
     63
     64	if (a->ctx != b->ctx)
     65		return a->ctx - b->ctx;
     66
     67	if (a->cgrp != b->cgrp)
     68		return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
     69
     70	if (a->evsel == NULL && b->evsel == NULL) {
     71		if (a->stat == b->stat)
     72			return 0;
     73
     74		if ((char *)a->stat < (char *)b->stat)
     75			return -1;
     76
     77		return 1;
     78	}
     79
     80	if (a->evsel == b->evsel)
     81		return 0;
     82	if ((char *)a->evsel < (char *)b->evsel)
     83		return -1;
     84	return +1;
     85}
     86
     87static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
     88				     const void *entry)
     89{
     90	struct saved_value *nd = malloc(sizeof(struct saved_value));
     91
     92	if (!nd)
     93		return NULL;
     94	memcpy(nd, entry, sizeof(struct saved_value));
     95	return &nd->rb_node;
     96}
     97
     98static void saved_value_delete(struct rblist *rblist __maybe_unused,
     99			       struct rb_node *rb_node)
    100{
    101	struct saved_value *v;
    102
    103	BUG_ON(!rb_node);
    104	v = container_of(rb_node, struct saved_value, rb_node);
    105	free(v);
    106}
    107
    108static struct saved_value *saved_value_lookup(struct evsel *evsel,
    109					      int cpu_map_idx,
    110					      bool create,
    111					      enum stat_type type,
    112					      int ctx,
    113					      struct runtime_stat *st,
    114					      struct cgroup *cgrp)
    115{
    116	struct rblist *rblist;
    117	struct rb_node *nd;
    118	struct saved_value dm = {
    119		.cpu_map_idx = cpu_map_idx,
    120		.evsel = evsel,
    121		.type = type,
    122		.ctx = ctx,
    123		.stat = st,
    124		.cgrp = cgrp,
    125	};
    126
    127	rblist = &st->value_list;
    128
    129	/* don't use context info for clock events */
    130	if (type == STAT_NSECS)
    131		dm.ctx = 0;
    132
    133	nd = rblist__find(rblist, &dm);
    134	if (nd)
    135		return container_of(nd, struct saved_value, rb_node);
    136	if (create) {
    137		rblist__add_node(rblist, &dm);
    138		nd = rblist__find(rblist, &dm);
    139		if (nd)
    140			return container_of(nd, struct saved_value, rb_node);
    141	}
    142	return NULL;
    143}
    144
    145void runtime_stat__init(struct runtime_stat *st)
    146{
    147	struct rblist *rblist = &st->value_list;
    148
    149	rblist__init(rblist);
    150	rblist->node_cmp = saved_value_cmp;
    151	rblist->node_new = saved_value_new;
    152	rblist->node_delete = saved_value_delete;
    153}
    154
    155void runtime_stat__exit(struct runtime_stat *st)
    156{
    157	rblist__exit(&st->value_list);
    158}
    159
    160void perf_stat__init_shadow_stats(void)
    161{
    162	runtime_stat__init(&rt_stat);
    163}
    164
    165static int evsel_context(struct evsel *evsel)
    166{
    167	int ctx = 0;
    168
    169	if (evsel->core.attr.exclude_kernel)
    170		ctx |= CTX_BIT_KERNEL;
    171	if (evsel->core.attr.exclude_user)
    172		ctx |= CTX_BIT_USER;
    173	if (evsel->core.attr.exclude_hv)
    174		ctx |= CTX_BIT_HV;
    175	if (evsel->core.attr.exclude_host)
    176		ctx |= CTX_BIT_HOST;
    177	if (evsel->core.attr.exclude_idle)
    178		ctx |= CTX_BIT_IDLE;
    179
    180	return ctx;
    181}
    182
    183static void reset_stat(struct runtime_stat *st)
    184{
    185	struct rblist *rblist;
    186	struct rb_node *pos, *next;
    187
    188	rblist = &st->value_list;
    189	next = rb_first_cached(&rblist->entries);
    190	while (next) {
    191		pos = next;
    192		next = rb_next(pos);
    193		memset(&container_of(pos, struct saved_value, rb_node)->stats,
    194		       0,
    195		       sizeof(struct stats));
    196	}
    197}
    198
    199void perf_stat__reset_shadow_stats(void)
    200{
    201	reset_stat(&rt_stat);
    202	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
    203	memset(&ru_stats, 0, sizeof(ru_stats));
    204}
    205
    206void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
    207{
    208	reset_stat(st);
    209}
    210
    211struct runtime_stat_data {
    212	int ctx;
    213	struct cgroup *cgrp;
    214};
    215
    216static void update_runtime_stat(struct runtime_stat *st,
    217				enum stat_type type,
    218				int cpu_map_idx, u64 count,
    219				struct runtime_stat_data *rsd)
    220{
    221	struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type,
    222						   rsd->ctx, st, rsd->cgrp);
    223
    224	if (v)
    225		update_stats(&v->stats, count);
    226}
    227
    228/*
    229 * Update various tracking values we maintain to print
    230 * more semantic information such as miss/hit ratios,
    231 * instruction rates, etc:
    232 */
    233void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
    234				    int cpu_map_idx, struct runtime_stat *st)
    235{
    236	u64 count_ns = count;
    237	struct saved_value *v;
    238	struct runtime_stat_data rsd = {
    239		.ctx = evsel_context(counter),
    240		.cgrp = counter->cgrp,
    241	};
    242
    243	count *= counter->scale;
    244
    245	if (evsel__is_clock(counter))
    246		update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd);
    247	else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
    248		update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd);
    249	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
    250		update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd);
    251	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
    252		update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd);
    253	else if (perf_stat_evsel__is(counter, ELISION_START))
    254		update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd);
    255	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
    256		update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
    257				    cpu_map_idx, count, &rsd);
    258	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
    259		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
    260				    cpu_map_idx, count, &rsd);
    261	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
    262		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
    263				    cpu_map_idx, count, &rsd);
    264	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
    265		update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
    266				    cpu_map_idx, count, &rsd);
    267	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
    268		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
    269				    cpu_map_idx, count, &rsd);
    270	else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
    271		update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
    272				    cpu_map_idx, count, &rsd);
    273	else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
    274		update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
    275				    cpu_map_idx, count, &rsd);
    276	else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
    277		update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
    278				    cpu_map_idx, count, &rsd);
    279	else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
    280		update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
    281				    cpu_map_idx, count, &rsd);
    282	else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
    283		update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
    284				    cpu_map_idx, count, &rsd);
    285	else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
    286		update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
    287				    cpu_map_idx, count, &rsd);
    288	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
    289		update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
    290				    cpu_map_idx, count, &rsd);
    291	else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
    292		update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
    293				    cpu_map_idx, count, &rsd);
    294	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
    295		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
    296				    cpu_map_idx, count, &rsd);
    297	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
    298		update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
    299				    cpu_map_idx, count, &rsd);
    300	else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
    301		update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd);
    302	else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
    303		update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd);
    304	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
    305		update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd);
    306	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
    307		update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd);
    308	else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
    309		update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd);
    310	else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
    311		update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd);
    312	else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
    313		update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd);
    314	else if (perf_stat_evsel__is(counter, SMI_NUM))
    315		update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd);
    316	else if (perf_stat_evsel__is(counter, APERF))
    317		update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd);
    318
    319	if (counter->collect_stat) {
    320		v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st,
    321				       rsd.cgrp);
    322		update_stats(&v->stats, count);
    323		if (counter->metric_leader)
    324			v->metric_total += count;
    325	} else if (counter->metric_leader) {
    326		v = saved_value_lookup(counter->metric_leader,
    327				       cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
    328		v->metric_total += count;
    329		v->metric_other++;
    330	}
    331}
    332
    333/* used for get_ratio_color() */
    334enum grc_type {
    335	GRC_STALLED_CYCLES_FE,
    336	GRC_STALLED_CYCLES_BE,
    337	GRC_CACHE_MISSES,
    338	GRC_MAX_NR
    339};
    340
    341static const char *get_ratio_color(enum grc_type type, double ratio)
    342{
    343	static const double grc_table[GRC_MAX_NR][3] = {
    344		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
    345		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
    346		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
    347	};
    348	const char *color = PERF_COLOR_NORMAL;
    349
    350	if (ratio > grc_table[type][0])
    351		color = PERF_COLOR_RED;
    352	else if (ratio > grc_table[type][1])
    353		color = PERF_COLOR_MAGENTA;
    354	else if (ratio > grc_table[type][2])
    355		color = PERF_COLOR_YELLOW;
    356
    357	return color;
    358}
    359
    360static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
    361						const char *name)
    362{
    363	struct evsel *c2;
    364
    365	evlist__for_each_entry (evsel_list, c2) {
    366		if (!strcasecmp(c2->name, name) && !c2->collect_stat)
    367			return c2;
    368	}
    369	return NULL;
    370}
    371
    372/* Mark MetricExpr target events and link events using them to them. */
    373void perf_stat__collect_metric_expr(struct evlist *evsel_list)
    374{
    375	struct evsel *counter, *leader, **metric_events, *oc;
    376	bool found;
    377	struct expr_parse_ctx *ctx;
    378	struct hashmap_entry *cur;
    379	size_t bkt;
    380	int i;
    381
    382	ctx = expr__ctx_new();
    383	if (!ctx) {
    384		pr_debug("expr__ctx_new failed");
    385		return;
    386	}
    387	evlist__for_each_entry(evsel_list, counter) {
    388		bool invalid = false;
    389
    390		leader = evsel__leader(counter);
    391		if (!counter->metric_expr)
    392			continue;
    393
    394		expr__ctx_clear(ctx);
    395		metric_events = counter->metric_events;
    396		if (!metric_events) {
    397			if (expr__find_ids(counter->metric_expr,
    398					   counter->name,
    399					   ctx) < 0)
    400				continue;
    401
    402			metric_events = calloc(sizeof(struct evsel *),
    403					       hashmap__size(ctx->ids) + 1);
    404			if (!metric_events) {
    405				expr__ctx_free(ctx);
    406				return;
    407			}
    408			counter->metric_events = metric_events;
    409		}
    410
    411		i = 0;
    412		hashmap__for_each_entry(ctx->ids, cur, bkt) {
    413			const char *metric_name = (const char *)cur->key;
    414
    415			found = false;
    416			if (leader) {
    417				/* Search in group */
    418				for_each_group_member (oc, leader) {
    419					if (!strcasecmp(oc->name,
    420							metric_name) &&
    421						!oc->collect_stat) {
    422						found = true;
    423						break;
    424					}
    425				}
    426			}
    427			if (!found) {
    428				/* Search ignoring groups */
    429				oc = perf_stat__find_event(evsel_list,
    430							   metric_name);
    431			}
    432			if (!oc) {
    433				/* Deduping one is good enough to handle duplicated PMUs. */
    434				static char *printed;
    435
    436				/*
    437				 * Adding events automatically would be difficult, because
    438				 * it would risk creating groups that are not schedulable.
    439				 * perf stat doesn't understand all the scheduling constraints
    440				 * of events. So we ask the user instead to add the missing
    441				 * events.
    442				 */
    443				if (!printed ||
    444				    strcasecmp(printed, metric_name)) {
    445					fprintf(stderr,
    446						"Add %s event to groups to get metric expression for %s\n",
    447						metric_name,
    448						counter->name);
    449					free(printed);
    450					printed = strdup(metric_name);
    451				}
    452				invalid = true;
    453				continue;
    454			}
    455			metric_events[i++] = oc;
    456			oc->collect_stat = true;
    457		}
    458		metric_events[i] = NULL;
    459		if (invalid) {
    460			free(metric_events);
    461			counter->metric_events = NULL;
    462			counter->metric_expr = NULL;
    463		}
    464	}
    465	expr__ctx_free(ctx);
    466}
    467
    468static double runtime_stat_avg(struct runtime_stat *st,
    469			       enum stat_type type, int cpu_map_idx,
    470			       struct runtime_stat_data *rsd)
    471{
    472	struct saved_value *v;
    473
    474	v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
    475	if (!v)
    476		return 0.0;
    477
    478	return avg_stats(&v->stats);
    479}
    480
    481static double runtime_stat_n(struct runtime_stat *st,
    482			     enum stat_type type, int cpu_map_idx,
    483			     struct runtime_stat_data *rsd)
    484{
    485	struct saved_value *v;
    486
    487	v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
    488	if (!v)
    489		return 0.0;
    490
    491	return v->stats.n;
    492}
    493
    494static void print_stalled_cycles_frontend(struct perf_stat_config *config,
    495					  int cpu_map_idx, double avg,
    496					  struct perf_stat_output_ctx *out,
    497					  struct runtime_stat *st,
    498					  struct runtime_stat_data *rsd)
    499{
    500	double total, ratio = 0.0;
    501	const char *color;
    502
    503	total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
    504
    505	if (total)
    506		ratio = avg / total * 100.0;
    507
    508	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
    509
    510	if (ratio)
    511		out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
    512				  ratio);
    513	else
    514		out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
    515}
    516
    517static void print_stalled_cycles_backend(struct perf_stat_config *config,
    518					 int cpu_map_idx, double avg,
    519					 struct perf_stat_output_ctx *out,
    520					 struct runtime_stat *st,
    521					 struct runtime_stat_data *rsd)
    522{
    523	double total, ratio = 0.0;
    524	const char *color;
    525
    526	total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
    527
    528	if (total)
    529		ratio = avg / total * 100.0;
    530
    531	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
    532
    533	out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
    534}
    535
    536static void print_branch_misses(struct perf_stat_config *config,
    537				int cpu_map_idx, double avg,
    538				struct perf_stat_output_ctx *out,
    539				struct runtime_stat *st,
    540				struct runtime_stat_data *rsd)
    541{
    542	double total, ratio = 0.0;
    543	const char *color;
    544
    545	total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd);
    546
    547	if (total)
    548		ratio = avg / total * 100.0;
    549
    550	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    551
    552	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
    553}
    554
    555static void print_l1_dcache_misses(struct perf_stat_config *config,
    556				   int cpu_map_idx, double avg,
    557				   struct perf_stat_output_ctx *out,
    558				   struct runtime_stat *st,
    559				   struct runtime_stat_data *rsd)
    560{
    561	double total, ratio = 0.0;
    562	const char *color;
    563
    564	total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd);
    565
    566	if (total)
    567		ratio = avg / total * 100.0;
    568
    569	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    570
    571	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
    572}
    573
    574static void print_l1_icache_misses(struct perf_stat_config *config,
    575				   int cpu_map_idx, double avg,
    576				   struct perf_stat_output_ctx *out,
    577				   struct runtime_stat *st,
    578				   struct runtime_stat_data *rsd)
    579{
    580	double total, ratio = 0.0;
    581	const char *color;
    582
    583	total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd);
    584
    585	if (total)
    586		ratio = avg / total * 100.0;
    587
    588	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    589	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
    590}
    591
    592static void print_dtlb_cache_misses(struct perf_stat_config *config,
    593				    int cpu_map_idx, double avg,
    594				    struct perf_stat_output_ctx *out,
    595				    struct runtime_stat *st,
    596				    struct runtime_stat_data *rsd)
    597{
    598	double total, ratio = 0.0;
    599	const char *color;
    600
    601	total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd);
    602
    603	if (total)
    604		ratio = avg / total * 100.0;
    605
    606	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    607	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
    608}
    609
    610static void print_itlb_cache_misses(struct perf_stat_config *config,
    611				    int cpu_map_idx, double avg,
    612				    struct perf_stat_output_ctx *out,
    613				    struct runtime_stat *st,
    614				    struct runtime_stat_data *rsd)
    615{
    616	double total, ratio = 0.0;
    617	const char *color;
    618
    619	total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd);
    620
    621	if (total)
    622		ratio = avg / total * 100.0;
    623
    624	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    625	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
    626}
    627
    628static void print_ll_cache_misses(struct perf_stat_config *config,
    629				  int cpu_map_idx, double avg,
    630				  struct perf_stat_output_ctx *out,
    631				  struct runtime_stat *st,
    632				  struct runtime_stat_data *rsd)
    633{
    634	double total, ratio = 0.0;
    635	const char *color;
    636
    637	total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd);
    638
    639	if (total)
    640		ratio = avg / total * 100.0;
    641
    642	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
    643	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
    644}
    645
    646/*
    647 * High level "TopDown" CPU core pipe line bottleneck break down.
    648 *
    649 * Basic concept following
    650 * Yasin, A Top Down Method for Performance analysis and Counter architecture
    651 * ISPASS14
    652 *
    653 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
    654 *
    655 * Frontend -> Backend -> Retiring
    656 * BadSpeculation in addition means out of order execution that is thrown away
    657 * (for example branch mispredictions)
    658 * Frontend is instruction decoding.
    659 * Backend is execution, like computation and accessing data in memory
    660 * Retiring is good execution that is not directly bottlenecked
    661 *
    662 * The formulas are computed in slots.
    663 * A slot is an entry in the pipeline each for the pipeline width
    664 * (for example a 4-wide pipeline has 4 slots for each cycle)
    665 *
    666 * Formulas:
    667 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
    668 *			TotalSlots
    669 * Retiring = SlotsRetired / TotalSlots
    670 * FrontendBound = FetchBubbles / TotalSlots
    671 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
    672 *
    673 * The kernel provides the mapping to the low level CPU events and any scaling
    674 * needed for the CPU pipeline width, for example:
    675 *
    676 * TotalSlots = Cycles * 4
    677 *
    678 * The scaling factor is communicated in the sysfs unit.
    679 *
    680 * In some cases the CPU may not be able to measure all the formulas due to
    681 * missing events. In this case multiple formulas are combined, as possible.
    682 *
    683 * Full TopDown supports more levels to sub-divide each area: for example
    684 * BackendBound into computing bound and memory bound. For now we only
    685 * support Level 1 TopDown.
    686 */
    687
    688static double sanitize_val(double x)
    689{
    690	if (x < 0 && x >= -0.02)
    691		return 0.0;
    692	return x;
    693}
    694
    695static double td_total_slots(int cpu_map_idx, struct runtime_stat *st,
    696			     struct runtime_stat_data *rsd)
    697{
    698	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd);
    699}
    700
    701static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st,
    702			  struct runtime_stat_data *rsd)
    703{
    704	double bad_spec = 0;
    705	double total_slots;
    706	double total;
    707
    708	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) -
    709		runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) +
    710		runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd);
    711
    712	total_slots = td_total_slots(cpu_map_idx, st, rsd);
    713	if (total_slots)
    714		bad_spec = total / total_slots;
    715	return sanitize_val(bad_spec);
    716}
    717
    718static double td_retiring(int cpu_map_idx, struct runtime_stat *st,
    719			  struct runtime_stat_data *rsd)
    720{
    721	double retiring = 0;
    722	double total_slots = td_total_slots(cpu_map_idx, st, rsd);
    723	double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
    724					    cpu_map_idx, rsd);
    725
    726	if (total_slots)
    727		retiring = ret_slots / total_slots;
    728	return retiring;
    729}
    730
    731static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st,
    732			  struct runtime_stat_data *rsd)
    733{
    734	double fe_bound = 0;
    735	double total_slots = td_total_slots(cpu_map_idx, st, rsd);
    736	double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
    737					    cpu_map_idx, rsd);
    738
    739	if (total_slots)
    740		fe_bound = fetch_bub / total_slots;
    741	return fe_bound;
    742}
    743
    744static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
    745			  struct runtime_stat_data *rsd)
    746{
    747	double sum = (td_fe_bound(cpu_map_idx, st, rsd) +
    748		      td_bad_spec(cpu_map_idx, st, rsd) +
    749		      td_retiring(cpu_map_idx, st, rsd));
    750	if (sum == 0)
    751		return 0;
    752	return sanitize_val(1.0 - sum);
    753}
    754
    755/*
    756 * Kernel reports metrics multiplied with slots. To get back
    757 * the ratios we need to recreate the sum.
    758 */
    759
    760static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
    761			      struct runtime_stat *stat,
    762			      struct runtime_stat_data *rsd)
    763{
    764	double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) +
    765		runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) +
    766		runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) +
    767		runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd);
    768	double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd);
    769
    770	if (sum)
    771		return d / sum;
    772	return 0;
    773}
    774
    775/*
    776 * ... but only if most of the values are actually available.
    777 * We allow two missing.
    778 */
    779
    780static bool full_td(int cpu_map_idx, struct runtime_stat *stat,
    781		    struct runtime_stat_data *rsd)
    782{
    783	int c = 0;
    784
    785	if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0)
    786		c++;
    787	if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0)
    788		c++;
    789	if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0)
    790		c++;
    791	if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0)
    792		c++;
    793	return c >= 2;
    794}
    795
    796static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
    797			   struct perf_stat_output_ctx *out,
    798			   struct runtime_stat *st,
    799			   struct runtime_stat_data *rsd)
    800{
    801	double smi_num, aperf, cycles, cost = 0.0;
    802	const char *color = NULL;
    803
    804	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd);
    805	aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd);
    806	cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
    807
    808	if ((cycles == 0) || (aperf == 0))
    809		return;
    810
    811	if (smi_num)
    812		cost = (aperf - cycles) / aperf * 100.00;
    813
    814	if (cost > 10)
    815		color = PERF_COLOR_RED;
    816	out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
    817	out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
    818}
    819
    820static int prepare_metric(struct evsel **metric_events,
    821			  struct metric_ref *metric_refs,
    822			  struct expr_parse_ctx *pctx,
    823			  int cpu_map_idx,
    824			  struct runtime_stat *st)
    825{
    826	double scale;
    827	char *n;
    828	int i, j, ret;
    829
    830	for (i = 0; metric_events[i]; i++) {
    831		struct saved_value *v;
    832		struct stats *stats;
    833		u64 metric_total = 0;
    834		int source_count;
    835
    836		if (evsel__is_tool(metric_events[i])) {
    837			source_count = 1;
    838			switch (metric_events[i]->tool_event) {
    839			case PERF_TOOL_DURATION_TIME:
    840				stats = &walltime_nsecs_stats;
    841				scale = 1e-9;
    842				break;
    843			case PERF_TOOL_USER_TIME:
    844				stats = &ru_stats.ru_utime_usec_stat;
    845				scale = 1e-6;
    846				break;
    847			case PERF_TOOL_SYSTEM_TIME:
    848				stats = &ru_stats.ru_stime_usec_stat;
    849				scale = 1e-6;
    850				break;
    851			case PERF_TOOL_NONE:
    852				pr_err("Invalid tool event 'none'");
    853				abort();
    854			case PERF_TOOL_MAX:
    855				pr_err("Invalid tool event 'max'");
    856				abort();
    857			default:
    858				pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
    859				abort();
    860			}
    861		} else {
    862			v = saved_value_lookup(metric_events[i], cpu_map_idx, false,
    863					       STAT_NONE, 0, st,
    864					       metric_events[i]->cgrp);
    865			if (!v)
    866				break;
    867			stats = &v->stats;
    868			scale = 1.0;
    869			source_count = evsel__source_count(metric_events[i]);
    870
    871			if (v->metric_other)
    872				metric_total = v->metric_total;
    873		}
    874		n = strdup(evsel__metric_id(metric_events[i]));
    875		if (!n)
    876			return -ENOMEM;
    877
    878		expr__add_id_val_source_count(pctx, n,
    879					metric_total ? : avg_stats(stats) * scale,
    880					source_count);
    881	}
    882
    883	for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
    884		ret = expr__add_ref(pctx, &metric_refs[j]);
    885		if (ret)
    886			return ret;
    887	}
    888
    889	return i;
    890}
    891
    892static void generic_metric(struct perf_stat_config *config,
    893			   const char *metric_expr,
    894			   struct evsel **metric_events,
    895			   struct metric_ref *metric_refs,
    896			   char *name,
    897			   const char *metric_name,
    898			   const char *metric_unit,
    899			   int runtime,
    900			   int cpu_map_idx,
    901			   struct perf_stat_output_ctx *out,
    902			   struct runtime_stat *st)
    903{
    904	print_metric_t print_metric = out->print_metric;
    905	struct expr_parse_ctx *pctx;
    906	double ratio, scale;
    907	int i;
    908	void *ctxp = out->ctx;
    909
    910	pctx = expr__ctx_new();
    911	if (!pctx)
    912		return;
    913
    914	pctx->runtime = runtime;
    915	i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st);
    916	if (i < 0) {
    917		expr__ctx_free(pctx);
    918		return;
    919	}
    920	if (!metric_events[i]) {
    921		if (expr__parse(&ratio, pctx, metric_expr) == 0) {
    922			char *unit;
    923			char metric_bf[64];
    924
    925			if (metric_unit && metric_name) {
    926				if (perf_pmu__convert_scale(metric_unit,
    927					&unit, &scale) >= 0) {
    928					ratio *= scale;
    929				}
    930				if (strstr(metric_expr, "?"))
    931					scnprintf(metric_bf, sizeof(metric_bf),
    932					  "%s  %s_%d", unit, metric_name, runtime);
    933				else
    934					scnprintf(metric_bf, sizeof(metric_bf),
    935					  "%s  %s", unit, metric_name);
    936
    937				print_metric(config, ctxp, NULL, "%8.1f",
    938					     metric_bf, ratio);
    939			} else {
    940				print_metric(config, ctxp, NULL, "%8.2f",
    941					metric_name ?
    942					metric_name :
    943					out->force_header ?  name : "",
    944					ratio);
    945			}
    946		} else {
    947			print_metric(config, ctxp, NULL, NULL,
    948				     out->force_header ?
    949				     (metric_name ? metric_name : name) : "", 0);
    950		}
    951	} else {
    952		print_metric(config, ctxp, NULL, NULL,
    953			     out->force_header ?
    954			     (metric_name ? metric_name : name) : "", 0);
    955	}
    956
    957	expr__ctx_free(pctx);
    958}
    959
    960double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st)
    961{
    962	struct expr_parse_ctx *pctx;
    963	double ratio = 0.0;
    964
    965	pctx = expr__ctx_new();
    966	if (!pctx)
    967		return NAN;
    968
    969	if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0)
    970		goto out;
    971
    972	if (expr__parse(&ratio, pctx, mexp->metric_expr))
    973		ratio = 0.0;
    974
    975out:
    976	expr__ctx_free(pctx);
    977	return ratio;
    978}
    979
    980void perf_stat__print_shadow_stats(struct perf_stat_config *config,
    981				   struct evsel *evsel,
    982				   double avg, int cpu_map_idx,
    983				   struct perf_stat_output_ctx *out,
    984				   struct rblist *metric_events,
    985				   struct runtime_stat *st)
    986{
    987	void *ctxp = out->ctx;
    988	print_metric_t print_metric = out->print_metric;
    989	double total, ratio = 0.0, total2;
    990	const char *color = NULL;
    991	struct runtime_stat_data rsd = {
    992		.ctx = evsel_context(evsel),
    993		.cgrp = evsel->cgrp,
    994	};
    995	struct metric_event *me;
    996	int num = 1;
    997
    998	if (config->iostat_run) {
    999		iostat_print_metric(config, evsel, out);
   1000	} else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
   1001		total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
   1002
   1003		if (total) {
   1004			ratio = avg / total;
   1005			print_metric(config, ctxp, NULL, "%7.2f ",
   1006					"insn per cycle", ratio);
   1007		} else {
   1008			print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
   1009		}
   1010
   1011		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd);
   1012
   1013		total = max(total, runtime_stat_avg(st,
   1014						    STAT_STALLED_CYCLES_BACK,
   1015						    cpu_map_idx, &rsd));
   1016
   1017		if (total && avg) {
   1018			out->new_line(config, ctxp);
   1019			ratio = total / avg;
   1020			print_metric(config, ctxp, NULL, "%7.2f ",
   1021					"stalled cycles per insn",
   1022					ratio);
   1023		}
   1024	} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
   1025		if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0)
   1026			print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd);
   1027		else
   1028			print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
   1029	} else if (
   1030		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
   1031		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
   1032					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
   1033					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
   1034
   1035		if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0)
   1036			print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd);
   1037		else
   1038			print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
   1039	} else if (
   1040		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
   1041		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
   1042					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
   1043					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
   1044
   1045		if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0)
   1046			print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd);
   1047		else
   1048			print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
   1049	} else if (
   1050		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
   1051		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
   1052					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
   1053					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
   1054
   1055		if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0)
   1056			print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
   1057		else
   1058			print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
   1059	} else if (
   1060		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
   1061		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
   1062					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
   1063					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
   1064
   1065		if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0)
   1066			print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
   1067		else
   1068			print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
   1069	} else if (
   1070		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
   1071		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
   1072					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
   1073					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
   1074
   1075		if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0)
   1076			print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
   1077		else
   1078			print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
   1079	} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
   1080		total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd);
   1081
   1082		if (total)
   1083			ratio = avg * 100 / total;
   1084
   1085		if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0)
   1086			print_metric(config, ctxp, NULL, "%8.3f %%",
   1087				     "of all cache refs", ratio);
   1088		else
   1089			print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
   1090	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
   1091		print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd);
   1092	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
   1093		print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd);
   1094	} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
   1095		total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
   1096
   1097		if (total) {
   1098			ratio = avg / total;
   1099			print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
   1100		} else {
   1101			print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
   1102		}
   1103	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
   1104		total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
   1105
   1106		if (total)
   1107			print_metric(config, ctxp, NULL,
   1108					"%7.2f%%", "transactional cycles",
   1109					100.0 * (avg / total));
   1110		else
   1111			print_metric(config, ctxp, NULL, NULL, "transactional cycles",
   1112				     0);
   1113	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
   1114		total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
   1115		total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
   1116
   1117		if (total2 < avg)
   1118			total2 = avg;
   1119		if (total)
   1120			print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
   1121				100.0 * ((total2-avg) / total));
   1122		else
   1123			print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
   1124	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
   1125		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
   1126
   1127		if (avg)
   1128			ratio = total / avg;
   1129
   1130		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0)
   1131			print_metric(config, ctxp, NULL, "%8.0f",
   1132				     "cycles / transaction", ratio);
   1133		else
   1134			print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
   1135				      0);
   1136	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
   1137		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
   1138
   1139		if (avg)
   1140			ratio = total / avg;
   1141
   1142		print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
   1143	} else if (evsel__is_clock(evsel)) {
   1144		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
   1145			print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
   1146				     avg / (ratio * evsel->scale));
   1147		else
   1148			print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
   1149	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
   1150		double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd);
   1151
   1152		if (fe_bound > 0.2)
   1153			color = PERF_COLOR_RED;
   1154		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
   1155				fe_bound * 100.);
   1156	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
   1157		double retiring = td_retiring(cpu_map_idx, st, &rsd);
   1158
   1159		if (retiring > 0.7)
   1160			color = PERF_COLOR_GREEN;
   1161		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
   1162				retiring * 100.);
   1163	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
   1164		double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd);
   1165
   1166		if (bad_spec > 0.1)
   1167			color = PERF_COLOR_RED;
   1168		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
   1169				bad_spec * 100.);
   1170	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
   1171		double be_bound = td_be_bound(cpu_map_idx, st, &rsd);
   1172		const char *name = "backend bound";
   1173		static int have_recovery_bubbles = -1;
   1174
   1175		/* In case the CPU does not support topdown-recovery-bubbles */
   1176		if (have_recovery_bubbles < 0)
   1177			have_recovery_bubbles = pmu_have_event("cpu",
   1178					"topdown-recovery-bubbles");
   1179		if (!have_recovery_bubbles)
   1180			name = "backend bound/bad spec";
   1181
   1182		if (be_bound > 0.2)
   1183			color = PERF_COLOR_RED;
   1184		if (td_total_slots(cpu_map_idx, st, &rsd) > 0)
   1185			print_metric(config, ctxp, color, "%8.1f%%", name,
   1186					be_bound * 100.);
   1187		else
   1188			print_metric(config, ctxp, NULL, NULL, name, 0);
   1189	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
   1190		   full_td(cpu_map_idx, st, &rsd)) {
   1191		double retiring = td_metric_ratio(cpu_map_idx,
   1192						  STAT_TOPDOWN_RETIRING, st,
   1193						  &rsd);
   1194		if (retiring > 0.7)
   1195			color = PERF_COLOR_GREEN;
   1196		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
   1197				retiring * 100.);
   1198	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
   1199		   full_td(cpu_map_idx, st, &rsd)) {
   1200		double fe_bound = td_metric_ratio(cpu_map_idx,
   1201						  STAT_TOPDOWN_FE_BOUND, st,
   1202						  &rsd);
   1203		if (fe_bound > 0.2)
   1204			color = PERF_COLOR_RED;
   1205		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
   1206				fe_bound * 100.);
   1207	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
   1208		   full_td(cpu_map_idx, st, &rsd)) {
   1209		double be_bound = td_metric_ratio(cpu_map_idx,
   1210						  STAT_TOPDOWN_BE_BOUND, st,
   1211						  &rsd);
   1212		if (be_bound > 0.2)
   1213			color = PERF_COLOR_RED;
   1214		print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
   1215				be_bound * 100.);
   1216	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
   1217		   full_td(cpu_map_idx, st, &rsd)) {
   1218		double bad_spec = td_metric_ratio(cpu_map_idx,
   1219						  STAT_TOPDOWN_BAD_SPEC, st,
   1220						  &rsd);
   1221		if (bad_spec > 0.1)
   1222			color = PERF_COLOR_RED;
   1223		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
   1224				bad_spec * 100.);
   1225	} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
   1226			full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
   1227		double retiring = td_metric_ratio(cpu_map_idx,
   1228						  STAT_TOPDOWN_RETIRING, st,
   1229						  &rsd);
   1230		double heavy_ops = td_metric_ratio(cpu_map_idx,
   1231						   STAT_TOPDOWN_HEAVY_OPS, st,
   1232						   &rsd);
   1233		double light_ops = retiring - heavy_ops;
   1234
   1235		if (retiring > 0.7 && heavy_ops > 0.1)
   1236			color = PERF_COLOR_GREEN;
   1237		print_metric(config, ctxp, color, "%8.1f%%", "heavy operations",
   1238				heavy_ops * 100.);
   1239		if (retiring > 0.7 && light_ops > 0.6)
   1240			color = PERF_COLOR_GREEN;
   1241		else
   1242			color = NULL;
   1243		print_metric(config, ctxp, color, "%8.1f%%", "light operations",
   1244				light_ops * 100.);
   1245	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
   1246			full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
   1247		double bad_spec = td_metric_ratio(cpu_map_idx,
   1248						  STAT_TOPDOWN_BAD_SPEC, st,
   1249						  &rsd);
   1250		double br_mis = td_metric_ratio(cpu_map_idx,
   1251						STAT_TOPDOWN_BR_MISPREDICT, st,
   1252						&rsd);
   1253		double m_clears = bad_spec - br_mis;
   1254
   1255		if (bad_spec > 0.1 && br_mis > 0.05)
   1256			color = PERF_COLOR_RED;
   1257		print_metric(config, ctxp, color, "%8.1f%%", "branch mispredict",
   1258				br_mis * 100.);
   1259		if (bad_spec > 0.1 && m_clears > 0.05)
   1260			color = PERF_COLOR_RED;
   1261		else
   1262			color = NULL;
   1263		print_metric(config, ctxp, color, "%8.1f%%", "machine clears",
   1264				m_clears * 100.);
   1265	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
   1266			full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
   1267		double fe_bound = td_metric_ratio(cpu_map_idx,
   1268						  STAT_TOPDOWN_FE_BOUND, st,
   1269						  &rsd);
   1270		double fetch_lat = td_metric_ratio(cpu_map_idx,
   1271						   STAT_TOPDOWN_FETCH_LAT, st,
   1272						   &rsd);
   1273		double fetch_bw = fe_bound - fetch_lat;
   1274
   1275		if (fe_bound > 0.2 && fetch_lat > 0.15)
   1276			color = PERF_COLOR_RED;
   1277		print_metric(config, ctxp, color, "%8.1f%%", "fetch latency",
   1278				fetch_lat * 100.);
   1279		if (fe_bound > 0.2 && fetch_bw > 0.1)
   1280			color = PERF_COLOR_RED;
   1281		else
   1282			color = NULL;
   1283		print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth",
   1284				fetch_bw * 100.);
   1285	} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
   1286			full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
   1287		double be_bound = td_metric_ratio(cpu_map_idx,
   1288						  STAT_TOPDOWN_BE_BOUND, st,
   1289						  &rsd);
   1290		double mem_bound = td_metric_ratio(cpu_map_idx,
   1291						   STAT_TOPDOWN_MEM_BOUND, st,
   1292						   &rsd);
   1293		double core_bound = be_bound - mem_bound;
   1294
   1295		if (be_bound > 0.2 && mem_bound > 0.2)
   1296			color = PERF_COLOR_RED;
   1297		print_metric(config, ctxp, color, "%8.1f%%", "memory bound",
   1298				mem_bound * 100.);
   1299		if (be_bound > 0.2 && core_bound > 0.1)
   1300			color = PERF_COLOR_RED;
   1301		else
   1302			color = NULL;
   1303		print_metric(config, ctxp, color, "%8.1f%%", "Core bound",
   1304				core_bound * 100.);
   1305	} else if (evsel->metric_expr) {
   1306		generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
   1307				evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st);
   1308	} else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) {
   1309		char unit = ' ';
   1310		char unit_buf[10] = "/sec";
   1311
   1312		total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
   1313		if (total)
   1314			ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
   1315
   1316		if (unit != ' ')
   1317			snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
   1318		print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
   1319	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
   1320		print_smi_cost(config, cpu_map_idx, out, st, &rsd);
   1321	} else {
   1322		num = 0;
   1323	}
   1324
   1325	if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
   1326		struct metric_expr *mexp;
   1327
   1328		list_for_each_entry (mexp, &me->head, nd) {
   1329			if (num++ > 0)
   1330				out->new_line(config, ctxp);
   1331			generic_metric(config, mexp->metric_expr, mexp->metric_events,
   1332					mexp->metric_refs, evsel->name, mexp->metric_name,
   1333					mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st);
   1334		}
   1335	}
   1336	if (num == 0)
   1337		print_metric(config, ctxp, NULL, NULL, NULL, 0);
   1338}