cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

builtin-stat.c (72876B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * builtin-stat.c
      4 *
      5 * Builtin stat command: Give a precise performance counters summary
      6 * overview about any workload, CPU or specific PID.
      7 *
      8 * Sample output:
      9
     10   $ perf stat ./hackbench 10
     11
     12  Time: 0.118
     13
     14  Performance counter stats for './hackbench 10':
     15
     16       1708.761321 task-clock                #   11.037 CPUs utilized
     17            41,190 context-switches          #    0.024 M/sec
     18             6,735 CPU-migrations            #    0.004 M/sec
     19            17,318 page-faults               #    0.010 M/sec
     20     5,205,202,243 cycles                    #    3.046 GHz
     21     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     22     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     23     2,603,501,247 instructions              #    0.50  insns per cycle
     24                                             #    1.48  stalled cycles per insn
     25       484,357,498 branches                  #  283.455 M/sec
     26         6,388,934 branch-misses             #    1.32% of all branches
     27
     28        0.154822978  seconds time elapsed
     29
     30 *
     31 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
     32 *
     33 * Improvements and fixes by:
     34 *
     35 *   Arjan van de Ven <arjan@linux.intel.com>
     36 *   Yanmin Zhang <yanmin.zhang@intel.com>
     37 *   Wu Fengguang <fengguang.wu@intel.com>
     38 *   Mike Galbraith <efault@gmx.de>
     39 *   Paul Mackerras <paulus@samba.org>
     40 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
     41 */
     42
     43#include "builtin.h"
     44#include "perf.h"
     45#include "util/cgroup.h"
     46#include <subcmd/parse-options.h>
     47#include "util/parse-events.h"
     48#include "util/pmu.h"
     49#include "util/event.h"
     50#include "util/evlist.h"
     51#include "util/evlist-hybrid.h"
     52#include "util/evsel.h"
     53#include "util/debug.h"
     54#include "util/color.h"
     55#include "util/stat.h"
     56#include "util/header.h"
     57#include "util/cpumap.h"
     58#include "util/thread_map.h"
     59#include "util/counts.h"
     60#include "util/topdown.h"
     61#include "util/session.h"
     62#include "util/tool.h"
     63#include "util/string2.h"
     64#include "util/metricgroup.h"
     65#include "util/synthetic-events.h"
     66#include "util/target.h"
     67#include "util/time-utils.h"
     68#include "util/top.h"
     69#include "util/affinity.h"
     70#include "util/pfm.h"
     71#include "util/bpf_counter.h"
     72#include "util/iostat.h"
     73#include "util/pmu-hybrid.h"
     74#include "asm/bug.h"
     75
     76#include <linux/time64.h>
     77#include <linux/zalloc.h>
     78#include <api/fs/fs.h>
     79#include <errno.h>
     80#include <signal.h>
     81#include <stdlib.h>
     82#include <sys/prctl.h>
     83#include <inttypes.h>
     84#include <locale.h>
     85#include <math.h>
     86#include <sys/types.h>
     87#include <sys/stat.h>
     88#include <sys/wait.h>
     89#include <unistd.h>
     90#include <sys/time.h>
     91#include <sys/resource.h>
     92#include <linux/err.h>
     93
     94#include <linux/ctype.h>
     95#include <perf/evlist.h>
     96
     97#define DEFAULT_SEPARATOR	" "
     98#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
     99
    100static void print_counters(struct timespec *ts, int argc, const char **argv);
    101
    102/* Default events used for perf stat -T */
    103static const char *transaction_attrs = {
    104	"task-clock,"
    105	"{"
    106	"instructions,"
    107	"cycles,"
    108	"cpu/cycles-t/,"
    109	"cpu/tx-start/,"
    110	"cpu/el-start/,"
    111	"cpu/cycles-ct/"
    112	"}"
    113};
    114
    115/* More limited version when the CPU does not have all events. */
    116static const char * transaction_limited_attrs = {
    117	"task-clock,"
    118	"{"
    119	"instructions,"
    120	"cycles,"
    121	"cpu/cycles-t/,"
    122	"cpu/tx-start/"
    123	"}"
    124};
    125
    126static const char * topdown_attrs[] = {
    127	"topdown-total-slots",
    128	"topdown-slots-retired",
    129	"topdown-recovery-bubbles",
    130	"topdown-fetch-bubbles",
    131	"topdown-slots-issued",
    132	NULL,
    133};
    134
    135static const char *topdown_metric_attrs[] = {
    136	"slots",
    137	"topdown-retiring",
    138	"topdown-bad-spec",
    139	"topdown-fe-bound",
    140	"topdown-be-bound",
    141	NULL,
    142};
    143
    144static const char *topdown_metric_L2_attrs[] = {
    145	"slots",
    146	"topdown-retiring",
    147	"topdown-bad-spec",
    148	"topdown-fe-bound",
    149	"topdown-be-bound",
    150	"topdown-heavy-ops",
    151	"topdown-br-mispredict",
    152	"topdown-fetch-lat",
    153	"topdown-mem-bound",
    154	NULL,
    155};
    156
    157#define TOPDOWN_MAX_LEVEL			2
    158
    159static const char *smi_cost_attrs = {
    160	"{"
    161	"msr/aperf/,"
    162	"msr/smi/,"
    163	"cycles"
    164	"}"
    165};
    166
    167static struct evlist	*evsel_list;
    168static bool all_counters_use_bpf = true;
    169
    170static struct target target = {
    171	.uid	= UINT_MAX,
    172};
    173
    174#define METRIC_ONLY_LEN 20
    175
    176static volatile pid_t		child_pid			= -1;
    177static int			detailed_run			=  0;
    178static bool			transaction_run;
    179static bool			topdown_run			= false;
    180static bool			smi_cost			= false;
    181static bool			smi_reset			= false;
    182static int			big_num_opt			=  -1;
    183static bool			group				= false;
    184static const char		*pre_cmd			= NULL;
    185static const char		*post_cmd			= NULL;
    186static bool			sync_run			= false;
    187static bool			forever				= false;
    188static bool			force_metric_only		= false;
    189static struct timespec		ref_time;
    190static bool			append_file;
    191static bool			interval_count;
    192static const char		*output_name;
    193static int			output_fd;
    194
    195struct perf_stat {
    196	bool			 record;
    197	struct perf_data	 data;
    198	struct perf_session	*session;
    199	u64			 bytes_written;
    200	struct perf_tool	 tool;
    201	bool			 maps_allocated;
    202	struct perf_cpu_map	*cpus;
    203	struct perf_thread_map *threads;
    204	enum aggr_mode		 aggr_mode;
    205};
    206
    207static struct perf_stat		perf_stat;
    208#define STAT_RECORD		perf_stat.record
    209
    210static volatile int done = 0;
    211
    212static struct perf_stat_config stat_config = {
    213	.aggr_mode		= AGGR_GLOBAL,
    214	.scale			= true,
    215	.unit_width		= 4, /* strlen("unit") */
    216	.run_count		= 1,
    217	.metric_only_len	= METRIC_ONLY_LEN,
    218	.walltime_nsecs_stats	= &walltime_nsecs_stats,
    219	.ru_stats		= &ru_stats,
    220	.big_num		= true,
    221	.ctl_fd			= -1,
    222	.ctl_fd_ack		= -1,
    223	.iostat_run		= false,
    224};
    225
    226static bool cpus_map_matched(struct evsel *a, struct evsel *b)
    227{
    228	if (!a->core.cpus && !b->core.cpus)
    229		return true;
    230
    231	if (!a->core.cpus || !b->core.cpus)
    232		return false;
    233
    234	if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
    235		return false;
    236
    237	for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
    238		if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
    239		    perf_cpu_map__cpu(b->core.cpus, i).cpu)
    240			return false;
    241	}
    242
    243	return true;
    244}
    245
    246static void evlist__check_cpu_maps(struct evlist *evlist)
    247{
    248	struct evsel *evsel, *pos, *leader;
    249	char buf[1024];
    250
    251	if (evlist__has_hybrid(evlist))
    252		evlist__warn_hybrid_group(evlist);
    253
    254	evlist__for_each_entry(evlist, evsel) {
    255		leader = evsel__leader(evsel);
    256
    257		/* Check that leader matches cpus with each member. */
    258		if (leader == evsel)
    259			continue;
    260		if (cpus_map_matched(leader, evsel))
    261			continue;
    262
    263		/* If there's mismatch disable the group and warn user. */
    264		WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
    265		evsel__group_desc(leader, buf, sizeof(buf));
    266		pr_warning("  %s\n", buf);
    267
    268		if (verbose) {
    269			cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
    270			pr_warning("     %s: %s\n", leader->name, buf);
    271			cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
    272			pr_warning("     %s: %s\n", evsel->name, buf);
    273		}
    274
    275		for_each_group_evsel(pos, leader)
    276			evsel__remove_from_group(pos, leader);
    277	}
    278}
    279
    280static inline void diff_timespec(struct timespec *r, struct timespec *a,
    281				 struct timespec *b)
    282{
    283	r->tv_sec = a->tv_sec - b->tv_sec;
    284	if (a->tv_nsec < b->tv_nsec) {
    285		r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
    286		r->tv_sec--;
    287	} else {
    288		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
    289	}
    290}
    291
    292static void perf_stat__reset_stats(void)
    293{
    294	int i;
    295
    296	evlist__reset_stats(evsel_list);
    297	perf_stat__reset_shadow_stats();
    298
    299	for (i = 0; i < stat_config.stats_num; i++)
    300		perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
    301}
    302
    303static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
    304				     union perf_event *event,
    305				     struct perf_sample *sample __maybe_unused,
    306				     struct machine *machine __maybe_unused)
    307{
    308	if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
    309		pr_err("failed to write perf data, error: %m\n");
    310		return -1;
    311	}
    312
    313	perf_stat.bytes_written += event->header.size;
    314	return 0;
    315}
    316
    317static int write_stat_round_event(u64 tm, u64 type)
    318{
    319	return perf_event__synthesize_stat_round(NULL, tm, type,
    320						 process_synthesized_event,
    321						 NULL);
    322}
    323
    324#define WRITE_STAT_ROUND_EVENT(time, interval) \
    325	write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
    326
    327#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
    328
    329static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread,
    330				   struct perf_counts_values *count)
    331{
    332	struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread);
    333	struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx);
    334
    335	return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
    336					   process_synthesized_event, NULL);
    337}
    338
    339static int read_single_counter(struct evsel *counter, int cpu_map_idx,
    340			       int thread, struct timespec *rs)
    341{
    342	switch(counter->tool_event) {
    343		case PERF_TOOL_DURATION_TIME: {
    344			u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
    345			struct perf_counts_values *count =
    346				perf_counts(counter->counts, cpu_map_idx, thread);
    347			count->ena = count->run = val;
    348			count->val = val;
    349			return 0;
    350		}
    351		case PERF_TOOL_USER_TIME:
    352		case PERF_TOOL_SYSTEM_TIME: {
    353			u64 val;
    354			struct perf_counts_values *count =
    355				perf_counts(counter->counts, cpu_map_idx, thread);
    356			if (counter->tool_event == PERF_TOOL_USER_TIME)
    357				val = ru_stats.ru_utime_usec_stat.mean;
    358			else
    359				val = ru_stats.ru_stime_usec_stat.mean;
    360			count->ena = count->run = val;
    361			count->val = val;
    362			return 0;
    363		}
    364		default:
    365		case PERF_TOOL_NONE:
    366			return evsel__read_counter(counter, cpu_map_idx, thread);
    367		case PERF_TOOL_MAX:
    368			/* This should never be reached */
    369			return 0;
    370	}
    371}
    372
    373/*
    374 * Read out the results of a single counter:
    375 * do not aggregate counts across CPUs in system-wide mode
    376 */
    377static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx)
    378{
    379	int nthreads = perf_thread_map__nr(evsel_list->core.threads);
    380	int thread;
    381
    382	if (!counter->supported)
    383		return -ENOENT;
    384
    385	for (thread = 0; thread < nthreads; thread++) {
    386		struct perf_counts_values *count;
    387
    388		count = perf_counts(counter->counts, cpu_map_idx, thread);
    389
    390		/*
    391		 * The leader's group read loads data into its group members
    392		 * (via evsel__read_counter()) and sets their count->loaded.
    393		 */
    394		if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
    395		    read_single_counter(counter, cpu_map_idx, thread, rs)) {
    396			counter->counts->scaled = -1;
    397			perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
    398			perf_counts(counter->counts, cpu_map_idx, thread)->run = 0;
    399			return -1;
    400		}
    401
    402		perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false);
    403
    404		if (STAT_RECORD) {
    405			if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) {
    406				pr_err("failed to write stat event\n");
    407				return -1;
    408			}
    409		}
    410
    411		if (verbose > 1) {
    412			fprintf(stat_config.output,
    413				"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
    414					evsel__name(counter),
    415					perf_cpu_map__cpu(evsel__cpus(counter),
    416							  cpu_map_idx).cpu,
    417					count->val, count->ena, count->run);
    418		}
    419	}
    420
    421	return 0;
    422}
    423
    424static int read_affinity_counters(struct timespec *rs)
    425{
    426	struct evlist_cpu_iterator evlist_cpu_itr;
    427	struct affinity saved_affinity, *affinity;
    428
    429	if (all_counters_use_bpf)
    430		return 0;
    431
    432	if (!target__has_cpu(&target) || target__has_per_thread(&target))
    433		affinity = NULL;
    434	else if (affinity__setup(&saved_affinity) < 0)
    435		return -1;
    436	else
    437		affinity = &saved_affinity;
    438
    439	evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
    440		struct evsel *counter = evlist_cpu_itr.evsel;
    441
    442		if (evsel__is_bpf(counter))
    443			continue;
    444
    445		if (!counter->err) {
    446			counter->err = read_counter_cpu(counter, rs,
    447							evlist_cpu_itr.cpu_map_idx);
    448		}
    449	}
    450	if (affinity)
    451		affinity__cleanup(&saved_affinity);
    452
    453	return 0;
    454}
    455
    456static int read_bpf_map_counters(void)
    457{
    458	struct evsel *counter;
    459	int err;
    460
    461	evlist__for_each_entry(evsel_list, counter) {
    462		if (!evsel__is_bpf(counter))
    463			continue;
    464
    465		err = bpf_counter__read(counter);
    466		if (err)
    467			return err;
    468	}
    469	return 0;
    470}
    471
    472static void read_counters(struct timespec *rs)
    473{
    474	struct evsel *counter;
    475
    476	if (!stat_config.stop_read_counter) {
    477		if (read_bpf_map_counters() ||
    478		    read_affinity_counters(rs))
    479			return;
    480	}
    481
    482	evlist__for_each_entry(evsel_list, counter) {
    483		if (counter->err)
    484			pr_debug("failed to read counter %s\n", counter->name);
    485		if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
    486			pr_warning("failed to process counter %s\n", counter->name);
    487		counter->err = 0;
    488	}
    489}
    490
    491static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
    492{
    493	int i;
    494
    495	config->stats = calloc(nthreads, sizeof(struct runtime_stat));
    496	if (!config->stats)
    497		return -1;
    498
    499	config->stats_num = nthreads;
    500
    501	for (i = 0; i < nthreads; i++)
    502		runtime_stat__init(&config->stats[i]);
    503
    504	return 0;
    505}
    506
    507static void runtime_stat_delete(struct perf_stat_config *config)
    508{
    509	int i;
    510
    511	if (!config->stats)
    512		return;
    513
    514	for (i = 0; i < config->stats_num; i++)
    515		runtime_stat__exit(&config->stats[i]);
    516
    517	zfree(&config->stats);
    518}
    519
    520static void runtime_stat_reset(struct perf_stat_config *config)
    521{
    522	int i;
    523
    524	if (!config->stats)
    525		return;
    526
    527	for (i = 0; i < config->stats_num; i++)
    528		perf_stat__reset_shadow_per_stat(&config->stats[i]);
    529}
    530
    531static void process_interval(void)
    532{
    533	struct timespec ts, rs;
    534
    535	clock_gettime(CLOCK_MONOTONIC, &ts);
    536	diff_timespec(&rs, &ts, &ref_time);
    537
    538	perf_stat__reset_shadow_per_stat(&rt_stat);
    539	runtime_stat_reset(&stat_config);
    540	read_counters(&rs);
    541
    542	if (STAT_RECORD) {
    543		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
    544			pr_err("failed to write stat round event\n");
    545	}
    546
    547	init_stats(&walltime_nsecs_stats);
    548	update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
    549	print_counters(&rs, 0, NULL);
    550}
    551
    552static bool handle_interval(unsigned int interval, int *times)
    553{
    554	if (interval) {
    555		process_interval();
    556		if (interval_count && !(--(*times)))
    557			return true;
    558	}
    559	return false;
    560}
    561
    562static int enable_counters(void)
    563{
    564	struct evsel *evsel;
    565	int err;
    566
    567	evlist__for_each_entry(evsel_list, evsel) {
    568		if (!evsel__is_bpf(evsel))
    569			continue;
    570
    571		err = bpf_counter__enable(evsel);
    572		if (err)
    573			return err;
    574	}
    575
    576	if (stat_config.initial_delay < 0) {
    577		pr_info(EVLIST_DISABLED_MSG);
    578		return 0;
    579	}
    580
    581	if (stat_config.initial_delay > 0) {
    582		pr_info(EVLIST_DISABLED_MSG);
    583		usleep(stat_config.initial_delay * USEC_PER_MSEC);
    584	}
    585
    586	/*
    587	 * We need to enable counters only if:
    588	 * - we don't have tracee (attaching to task or cpu)
    589	 * - we have initial delay configured
    590	 */
    591	if (!target__none(&target) || stat_config.initial_delay) {
    592		if (!all_counters_use_bpf)
    593			evlist__enable(evsel_list);
    594		if (stat_config.initial_delay > 0)
    595			pr_info(EVLIST_ENABLED_MSG);
    596	}
    597	return 0;
    598}
    599
    600static void disable_counters(void)
    601{
    602	struct evsel *counter;
    603
    604	/*
    605	 * If we don't have tracee (attaching to task or cpu), counters may
    606	 * still be running. To get accurate group ratios, we must stop groups
    607	 * from counting before reading their constituent counters.
    608	 */
    609	if (!target__none(&target)) {
    610		evlist__for_each_entry(evsel_list, counter)
    611			bpf_counter__disable(counter);
    612		if (!all_counters_use_bpf)
    613			evlist__disable(evsel_list);
    614	}
    615}
    616
    617static volatile int workload_exec_errno;
    618
    619/*
    620 * evlist__prepare_workload will send a SIGUSR1
    621 * if the fork fails, since we asked by setting its
    622 * want_signal to true.
    623 */
    624static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
    625					void *ucontext __maybe_unused)
    626{
    627	workload_exec_errno = info->si_value.sival_int;
    628}
    629
    630static bool evsel__should_store_id(struct evsel *counter)
    631{
    632	return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
    633}
    634
    635static bool is_target_alive(struct target *_target,
    636			    struct perf_thread_map *threads)
    637{
    638	struct stat st;
    639	int i;
    640
    641	if (!target__has_task(_target))
    642		return true;
    643
    644	for (i = 0; i < threads->nr; i++) {
    645		char path[PATH_MAX];
    646
    647		scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
    648			  threads->map[i].pid);
    649
    650		if (!stat(path, &st))
    651			return true;
    652	}
    653
    654	return false;
    655}
    656
    657static void process_evlist(struct evlist *evlist, unsigned int interval)
    658{
    659	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
    660
    661	if (evlist__ctlfd_process(evlist, &cmd) > 0) {
    662		switch (cmd) {
    663		case EVLIST_CTL_CMD_ENABLE:
    664			if (interval)
    665				process_interval();
    666			break;
    667		case EVLIST_CTL_CMD_DISABLE:
    668			if (interval)
    669				process_interval();
    670			break;
    671		case EVLIST_CTL_CMD_SNAPSHOT:
    672		case EVLIST_CTL_CMD_ACK:
    673		case EVLIST_CTL_CMD_UNSUPPORTED:
    674		case EVLIST_CTL_CMD_EVLIST:
    675		case EVLIST_CTL_CMD_STOP:
    676		case EVLIST_CTL_CMD_PING:
    677		default:
    678			break;
    679		}
    680	}
    681}
    682
    683static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
    684			int *time_to_sleep)
    685{
    686	int tts = *time_to_sleep;
    687	struct timespec time_diff;
    688
    689	diff_timespec(&time_diff, time_stop, time_start);
    690
    691	tts -= time_diff.tv_sec * MSEC_PER_SEC +
    692	       time_diff.tv_nsec / NSEC_PER_MSEC;
    693
    694	if (tts < 0)
    695		tts = 0;
    696
    697	*time_to_sleep = tts;
    698}
    699
    700static int dispatch_events(bool forks, int timeout, int interval, int *times)
    701{
    702	int child_exited = 0, status = 0;
    703	int time_to_sleep, sleep_time;
    704	struct timespec time_start, time_stop;
    705
    706	if (interval)
    707		sleep_time = interval;
    708	else if (timeout)
    709		sleep_time = timeout;
    710	else
    711		sleep_time = 1000;
    712
    713	time_to_sleep = sleep_time;
    714
    715	while (!done) {
    716		if (forks)
    717			child_exited = waitpid(child_pid, &status, WNOHANG);
    718		else
    719			child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
    720
    721		if (child_exited)
    722			break;
    723
    724		clock_gettime(CLOCK_MONOTONIC, &time_start);
    725		if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
    726			if (timeout || handle_interval(interval, times))
    727				break;
    728			time_to_sleep = sleep_time;
    729		} else { /* fd revent */
    730			process_evlist(evsel_list, interval);
    731			clock_gettime(CLOCK_MONOTONIC, &time_stop);
    732			compute_tts(&time_start, &time_stop, &time_to_sleep);
    733		}
    734	}
    735
    736	return status;
    737}
    738
    739enum counter_recovery {
    740	COUNTER_SKIP,
    741	COUNTER_RETRY,
    742	COUNTER_FATAL,
    743};
    744
    745static enum counter_recovery stat_handle_error(struct evsel *counter)
    746{
    747	char msg[BUFSIZ];
    748	/*
    749	 * PPC returns ENXIO for HW counters until 2.6.37
    750	 * (behavior changed with commit b0a873e).
    751	 */
    752	if (errno == EINVAL || errno == ENOSYS ||
    753	    errno == ENOENT || errno == EOPNOTSUPP ||
    754	    errno == ENXIO) {
    755		if (verbose > 0)
    756			ui__warning("%s event is not supported by the kernel.\n",
    757				    evsel__name(counter));
    758		counter->supported = false;
    759		/*
    760		 * errored is a sticky flag that means one of the counter's
    761		 * cpu event had a problem and needs to be reexamined.
    762		 */
    763		counter->errored = true;
    764
    765		if ((evsel__leader(counter) != counter) ||
    766		    !(counter->core.leader->nr_members > 1))
    767			return COUNTER_SKIP;
    768	} else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
    769		if (verbose > 0)
    770			ui__warning("%s\n", msg);
    771		return COUNTER_RETRY;
    772	} else if (target__has_per_thread(&target) &&
    773		   evsel_list->core.threads &&
    774		   evsel_list->core.threads->err_thread != -1) {
    775		/*
    776		 * For global --per-thread case, skip current
    777		 * error thread.
    778		 */
    779		if (!thread_map__remove(evsel_list->core.threads,
    780					evsel_list->core.threads->err_thread)) {
    781			evsel_list->core.threads->err_thread = -1;
    782			return COUNTER_RETRY;
    783		}
    784	}
    785
    786	evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
    787	ui__error("%s\n", msg);
    788
    789	if (child_pid != -1)
    790		kill(child_pid, SIGTERM);
    791	return COUNTER_FATAL;
    792}
    793
    794static int __run_perf_stat(int argc, const char **argv, int run_idx)
    795{
    796	int interval = stat_config.interval;
    797	int times = stat_config.times;
    798	int timeout = stat_config.timeout;
    799	char msg[BUFSIZ];
    800	unsigned long long t0, t1;
    801	struct evsel *counter;
    802	size_t l;
    803	int status = 0;
    804	const bool forks = (argc > 0);
    805	bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
    806	struct evlist_cpu_iterator evlist_cpu_itr;
    807	struct affinity saved_affinity, *affinity = NULL;
    808	int err;
    809	bool second_pass = false;
    810
    811	if (forks) {
    812		if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
    813			perror("failed to prepare workload");
    814			return -1;
    815		}
    816		child_pid = evsel_list->workload.pid;
    817	}
    818
    819	if (group)
    820		evlist__set_leader(evsel_list);
    821
    822	if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) {
    823		if (affinity__setup(&saved_affinity) < 0)
    824			return -1;
    825		affinity = &saved_affinity;
    826	}
    827
    828	evlist__for_each_entry(evsel_list, counter) {
    829		if (bpf_counter__load(counter, &target))
    830			return -1;
    831		if (!evsel__is_bpf(counter))
    832			all_counters_use_bpf = false;
    833	}
    834
    835	evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
    836		counter = evlist_cpu_itr.evsel;
    837
    838		/*
    839		 * bperf calls evsel__open_per_cpu() in bperf__load(), so
    840		 * no need to call it again here.
    841		 */
    842		if (target.use_bpf)
    843			break;
    844
    845		if (counter->reset_group || counter->errored)
    846			continue;
    847		if (evsel__is_bpf(counter))
    848			continue;
    849try_again:
    850		if (create_perf_stat_counter(counter, &stat_config, &target,
    851					     evlist_cpu_itr.cpu_map_idx) < 0) {
    852
    853			/*
    854			 * Weak group failed. We cannot just undo this here
    855			 * because earlier CPUs might be in group mode, and the kernel
    856			 * doesn't support mixing group and non group reads. Defer
    857			 * it to later.
    858			 * Don't close here because we're in the wrong affinity.
    859			 */
    860			if ((errno == EINVAL || errno == EBADF) &&
    861				evsel__leader(counter) != counter &&
    862				counter->weak_group) {
    863				evlist__reset_weak_group(evsel_list, counter, false);
    864				assert(counter->reset_group);
    865				second_pass = true;
    866				continue;
    867			}
    868
    869			switch (stat_handle_error(counter)) {
    870			case COUNTER_FATAL:
    871				return -1;
    872			case COUNTER_RETRY:
    873				goto try_again;
    874			case COUNTER_SKIP:
    875				continue;
    876			default:
    877				break;
    878			}
    879
    880		}
    881		counter->supported = true;
    882	}
    883
    884	if (second_pass) {
    885		/*
    886		 * Now redo all the weak group after closing them,
    887		 * and also close errored counters.
    888		 */
    889
    890		/* First close errored or weak retry */
    891		evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
    892			counter = evlist_cpu_itr.evsel;
    893
    894			if (!counter->reset_group && !counter->errored)
    895				continue;
    896
    897			perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
    898		}
    899		/* Now reopen weak */
    900		evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
    901			counter = evlist_cpu_itr.evsel;
    902
    903			if (!counter->reset_group && !counter->errored)
    904				continue;
    905			if (!counter->reset_group)
    906				continue;
    907try_again_reset:
    908			pr_debug2("reopening weak %s\n", evsel__name(counter));
    909			if (create_perf_stat_counter(counter, &stat_config, &target,
    910						     evlist_cpu_itr.cpu_map_idx) < 0) {
    911
    912				switch (stat_handle_error(counter)) {
    913				case COUNTER_FATAL:
    914					return -1;
    915				case COUNTER_RETRY:
    916					goto try_again_reset;
    917				case COUNTER_SKIP:
    918					continue;
    919				default:
    920					break;
    921				}
    922			}
    923			counter->supported = true;
    924		}
    925	}
    926	affinity__cleanup(affinity);
    927
    928	evlist__for_each_entry(evsel_list, counter) {
    929		if (!counter->supported) {
    930			perf_evsel__free_fd(&counter->core);
    931			continue;
    932		}
    933
    934		l = strlen(counter->unit);
    935		if (l > stat_config.unit_width)
    936			stat_config.unit_width = l;
    937
    938		if (evsel__should_store_id(counter) &&
    939		    evsel__store_ids(counter, evsel_list))
    940			return -1;
    941	}
    942
    943	if (evlist__apply_filters(evsel_list, &counter)) {
    944		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
    945			counter->filter, evsel__name(counter), errno,
    946			str_error_r(errno, msg, sizeof(msg)));
    947		return -1;
    948	}
    949
    950	if (STAT_RECORD) {
    951		int fd = perf_data__fd(&perf_stat.data);
    952
    953		if (is_pipe) {
    954			err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
    955		} else {
    956			err = perf_session__write_header(perf_stat.session, evsel_list,
    957							 fd, false);
    958		}
    959
    960		if (err < 0)
    961			return err;
    962
    963		err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
    964							 process_synthesized_event, is_pipe);
    965		if (err < 0)
    966			return err;
    967	}
    968
    969	/*
    970	 * Enable counters and exec the command:
    971	 */
    972	if (forks) {
    973		err = enable_counters();
    974		if (err)
    975			return -1;
    976		evlist__start_workload(evsel_list);
    977
    978		t0 = rdclock();
    979		clock_gettime(CLOCK_MONOTONIC, &ref_time);
    980
    981		if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
    982			status = dispatch_events(forks, timeout, interval, &times);
    983		if (child_pid != -1) {
    984			if (timeout)
    985				kill(child_pid, SIGTERM);
    986			wait4(child_pid, &status, 0, &stat_config.ru_data);
    987		}
    988
    989		if (workload_exec_errno) {
    990			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
    991			pr_err("Workload failed: %s\n", emsg);
    992			return -1;
    993		}
    994
    995		if (WIFSIGNALED(status))
    996			psignal(WTERMSIG(status), argv[0]);
    997	} else {
    998		err = enable_counters();
    999		if (err)
   1000			return -1;
   1001
   1002		t0 = rdclock();
   1003		clock_gettime(CLOCK_MONOTONIC, &ref_time);
   1004
   1005		status = dispatch_events(forks, timeout, interval, &times);
   1006	}
   1007
   1008	disable_counters();
   1009
   1010	t1 = rdclock();
   1011
   1012	if (stat_config.walltime_run_table)
   1013		stat_config.walltime_run[run_idx] = t1 - t0;
   1014
   1015	if (interval && stat_config.summary) {
   1016		stat_config.interval = 0;
   1017		stat_config.stop_read_counter = true;
   1018		init_stats(&walltime_nsecs_stats);
   1019		update_stats(&walltime_nsecs_stats, t1 - t0);
   1020
   1021		if (stat_config.aggr_mode == AGGR_GLOBAL)
   1022			evlist__save_aggr_prev_raw_counts(evsel_list);
   1023
   1024		evlist__copy_prev_raw_counts(evsel_list);
   1025		evlist__reset_prev_raw_counts(evsel_list);
   1026		runtime_stat_reset(&stat_config);
   1027		perf_stat__reset_shadow_per_stat(&rt_stat);
   1028	} else {
   1029		update_stats(&walltime_nsecs_stats, t1 - t0);
   1030		update_rusage_stats(&ru_stats, &stat_config.ru_data);
   1031	}
   1032
   1033	/*
   1034	 * Closing a group leader splits the group, and as we only disable
   1035	 * group leaders, results in remaining events becoming enabled. To
   1036	 * avoid arbitrary skew, we must read all counters before closing any
   1037	 * group leaders.
   1038	 */
   1039	read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
   1040
   1041	/*
   1042	 * We need to keep evsel_list alive, because it's processed
   1043	 * later the evsel_list will be closed after.
   1044	 */
   1045	if (!STAT_RECORD)
   1046		evlist__close(evsel_list);
   1047
   1048	return WEXITSTATUS(status);
   1049}
   1050
   1051static int run_perf_stat(int argc, const char **argv, int run_idx)
   1052{
   1053	int ret;
   1054
   1055	if (pre_cmd) {
   1056		ret = system(pre_cmd);
   1057		if (ret)
   1058			return ret;
   1059	}
   1060
   1061	if (sync_run)
   1062		sync();
   1063
   1064	ret = __run_perf_stat(argc, argv, run_idx);
   1065	if (ret)
   1066		return ret;
   1067
   1068	if (post_cmd) {
   1069		ret = system(post_cmd);
   1070		if (ret)
   1071			return ret;
   1072	}
   1073
   1074	return ret;
   1075}
   1076
   1077static void print_counters(struct timespec *ts, int argc, const char **argv)
   1078{
   1079	/* Do not print anything if we record to the pipe. */
   1080	if (STAT_RECORD && perf_stat.data.is_pipe)
   1081		return;
   1082	if (stat_config.quiet)
   1083		return;
   1084
   1085	evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
   1086}
   1087
   1088static volatile int signr = -1;
   1089
   1090static void skip_signal(int signo)
   1091{
   1092	if ((child_pid == -1) || stat_config.interval)
   1093		done = 1;
   1094
   1095	signr = signo;
   1096	/*
   1097	 * render child_pid harmless
   1098	 * won't send SIGTERM to a random
   1099	 * process in case of race condition
   1100	 * and fast PID recycling
   1101	 */
   1102	child_pid = -1;
   1103}
   1104
   1105static void sig_atexit(void)
   1106{
   1107	sigset_t set, oset;
   1108
   1109	/*
   1110	 * avoid race condition with SIGCHLD handler
   1111	 * in skip_signal() which is modifying child_pid
   1112	 * goal is to avoid send SIGTERM to a random
   1113	 * process
   1114	 */
   1115	sigemptyset(&set);
   1116	sigaddset(&set, SIGCHLD);
   1117	sigprocmask(SIG_BLOCK, &set, &oset);
   1118
   1119	if (child_pid != -1)
   1120		kill(child_pid, SIGTERM);
   1121
   1122	sigprocmask(SIG_SETMASK, &oset, NULL);
   1123
   1124	if (signr == -1)
   1125		return;
   1126
   1127	signal(signr, SIG_DFL);
   1128	kill(getpid(), signr);
   1129}
   1130
   1131void perf_stat__set_big_num(int set)
   1132{
   1133	stat_config.big_num = (set != 0);
   1134}
   1135
   1136void perf_stat__set_no_csv_summary(int set)
   1137{
   1138	stat_config.no_csv_summary = (set != 0);
   1139}
   1140
   1141static int stat__set_big_num(const struct option *opt __maybe_unused,
   1142			     const char *s __maybe_unused, int unset)
   1143{
   1144	big_num_opt = unset ? 0 : 1;
   1145	perf_stat__set_big_num(!unset);
   1146	return 0;
   1147}
   1148
   1149static int enable_metric_only(const struct option *opt __maybe_unused,
   1150			      const char *s __maybe_unused, int unset)
   1151{
   1152	force_metric_only = true;
   1153	stat_config.metric_only = !unset;
   1154	return 0;
   1155}
   1156
   1157static int parse_metric_groups(const struct option *opt,
   1158			       const char *str,
   1159			       int unset __maybe_unused)
   1160{
   1161	return metricgroup__parse_groups(opt, str,
   1162					 stat_config.metric_no_group,
   1163					 stat_config.metric_no_merge,
   1164					 &stat_config.metric_events);
   1165}
   1166
   1167static int parse_control_option(const struct option *opt,
   1168				const char *str,
   1169				int unset __maybe_unused)
   1170{
   1171	struct perf_stat_config *config = opt->value;
   1172
   1173	return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
   1174}
   1175
   1176static int parse_stat_cgroups(const struct option *opt,
   1177			      const char *str, int unset)
   1178{
   1179	if (stat_config.cgroup_list) {
   1180		pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
   1181		return -1;
   1182	}
   1183
   1184	return parse_cgroups(opt, str, unset);
   1185}
   1186
   1187static int parse_hybrid_type(const struct option *opt,
   1188			     const char *str,
   1189			     int unset __maybe_unused)
   1190{
   1191	struct evlist *evlist = *(struct evlist **)opt->value;
   1192
   1193	if (!list_empty(&evlist->core.entries)) {
   1194		fprintf(stderr, "Must define cputype before events/metrics\n");
   1195		return -1;
   1196	}
   1197
   1198	evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str);
   1199	if (!evlist->hybrid_pmu_name) {
   1200		fprintf(stderr, "--cputype %s is not supported!\n", str);
   1201		return -1;
   1202	}
   1203
   1204	return 0;
   1205}
   1206
   1207static struct option stat_options[] = {
   1208	OPT_BOOLEAN('T', "transaction", &transaction_run,
   1209		    "hardware transaction statistics"),
   1210	OPT_CALLBACK('e', "event", &evsel_list, "event",
   1211		     "event selector. use 'perf list' to list available events",
   1212		     parse_events_option),
   1213	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
   1214		     "event filter", parse_filter),
   1215	OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
   1216		    "child tasks do not inherit counters"),
   1217	OPT_STRING('p', "pid", &target.pid, "pid",
   1218		   "stat events on existing process id"),
   1219	OPT_STRING('t', "tid", &target.tid, "tid",
   1220		   "stat events on existing thread id"),
   1221#ifdef HAVE_BPF_SKEL
   1222	OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
   1223		   "stat events on existing bpf program id"),
   1224	OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
   1225		    "use bpf program to count events"),
   1226	OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
   1227		   "path to perf_event_attr map"),
   1228#endif
   1229	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
   1230		    "system-wide collection from all CPUs"),
   1231	OPT_BOOLEAN('g', "group", &group,
   1232		    "put the counters into a counter group"),
   1233	OPT_BOOLEAN(0, "scale", &stat_config.scale,
   1234		    "Use --no-scale to disable counter scaling for multiplexing"),
   1235	OPT_INCR('v', "verbose", &verbose,
   1236		    "be more verbose (show counter open errors, etc)"),
   1237	OPT_INTEGER('r', "repeat", &stat_config.run_count,
   1238		    "repeat command and print average + stddev (max: 100, forever: 0)"),
   1239	OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
   1240		    "display details about each run (only with -r option)"),
   1241	OPT_BOOLEAN('n', "null", &stat_config.null_run,
   1242		    "null run - dont start any counters"),
   1243	OPT_INCR('d', "detailed", &detailed_run,
   1244		    "detailed run - start a lot of events"),
   1245	OPT_BOOLEAN('S', "sync", &sync_run,
   1246		    "call sync() before starting a run"),
   1247	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
   1248			   "print large numbers with thousands\' separators",
   1249			   stat__set_big_num),
   1250	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
   1251		    "list of cpus to monitor in system-wide"),
   1252	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
   1253		    "disable CPU count aggregation", AGGR_NONE),
   1254	OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
   1255	OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
   1256		    "Merge identical named hybrid events"),
   1257	OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
   1258		   "print counts with custom separator"),
   1259	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
   1260		     "monitor event in cgroup name only", parse_stat_cgroups),
   1261	OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
   1262		    "expand events for each cgroup"),
   1263	OPT_STRING('o', "output", &output_name, "file", "output file name"),
   1264	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
   1265	OPT_INTEGER(0, "log-fd", &output_fd,
   1266		    "log output to fd, instead of stderr"),
   1267	OPT_STRING(0, "pre", &pre_cmd, "command",
   1268			"command to run prior to the measured command"),
   1269	OPT_STRING(0, "post", &post_cmd, "command",
   1270			"command to run after to the measured command"),
   1271	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
   1272		    "print counts at regular interval in ms "
   1273		    "(overhead is possible for values <= 100ms)"),
   1274	OPT_INTEGER(0, "interval-count", &stat_config.times,
   1275		    "print counts for fixed number of times"),
   1276	OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
   1277		    "clear screen in between new interval"),
   1278	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
   1279		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
   1280	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
   1281		     "aggregate counts per processor socket", AGGR_SOCKET),
   1282	OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
   1283		     "aggregate counts per processor die", AGGR_DIE),
   1284	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
   1285		     "aggregate counts per physical processor core", AGGR_CORE),
   1286	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
   1287		     "aggregate counts per thread", AGGR_THREAD),
   1288	OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
   1289		     "aggregate counts per numa node", AGGR_NODE),
   1290	OPT_INTEGER('D', "delay", &stat_config.initial_delay,
   1291		    "ms to wait before starting measurement after program start (-1: start with events disabled)"),
   1292	OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
   1293			"Only print computed metrics. No raw values", enable_metric_only),
   1294	OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
   1295		       "don't group metric events, impacts multiplexing"),
   1296	OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
   1297		       "don't try to share events between metrics in a group"),
   1298	OPT_BOOLEAN(0, "topdown", &topdown_run,
   1299			"measure top-down statistics"),
   1300	OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
   1301			"Set the metrics level for the top-down statistics (0: max level)"),
   1302	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
   1303			"measure SMI cost"),
   1304	OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
   1305		     "monitor specified metrics or metric groups (separated by ,)",
   1306		     parse_metric_groups),
   1307	OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
   1308			 "Configure all used events to run in kernel space.",
   1309			 PARSE_OPT_EXCLUSIVE),
   1310	OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
   1311			 "Configure all used events to run in user space.",
   1312			 PARSE_OPT_EXCLUSIVE),
   1313	OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
   1314		    "Use with 'percore' event qualifier to show the event "
   1315		    "counts of one hardware thread by sum up total hardware "
   1316		    "threads of same physical core"),
   1317	OPT_BOOLEAN(0, "summary", &stat_config.summary,
   1318		       "print summary for interval mode"),
   1319	OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
   1320		       "don't print 'summary' for CSV summary output"),
   1321	OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
   1322			"don't print output (useful with record)"),
   1323	OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
   1324		     "Only enable events on applying cpu with this type "
   1325		     "for hybrid platform (e.g. core or atom)",
   1326		     parse_hybrid_type),
   1327#ifdef HAVE_LIBPFM
   1328	OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
   1329		"libpfm4 event selector. use 'perf list' to list available events",
   1330		parse_libpfm_events_option),
   1331#endif
   1332	OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
   1333		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
   1334		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
   1335		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
   1336		      parse_control_option),
   1337	OPT_CALLBACK_OPTARG(0, "iostat", &evsel_list, &stat_config, "default",
   1338			    "measure I/O performance metrics provided by arch/platform",
   1339			    iostat_parse),
   1340	OPT_END()
   1341};
   1342
   1343static const char *const aggr_mode__string[] = {
   1344	[AGGR_CORE] = "core",
   1345	[AGGR_DIE] = "die",
   1346	[AGGR_GLOBAL] = "global",
   1347	[AGGR_NODE] = "node",
   1348	[AGGR_NONE] = "none",
   1349	[AGGR_SOCKET] = "socket",
   1350	[AGGR_THREAD] = "thread",
   1351	[AGGR_UNSET] = "unset",
   1352};
   1353
   1354static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
   1355						struct perf_cpu cpu)
   1356{
   1357	return aggr_cpu_id__socket(cpu, /*data=*/NULL);
   1358}
   1359
   1360static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
   1361					     struct perf_cpu cpu)
   1362{
   1363	return aggr_cpu_id__die(cpu, /*data=*/NULL);
   1364}
   1365
   1366static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
   1367					      struct perf_cpu cpu)
   1368{
   1369	return aggr_cpu_id__core(cpu, /*data=*/NULL);
   1370}
   1371
   1372static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
   1373					      struct perf_cpu cpu)
   1374{
   1375	return aggr_cpu_id__node(cpu, /*data=*/NULL);
   1376}
   1377
   1378static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
   1379					      aggr_get_id_t get_id, struct perf_cpu cpu)
   1380{
   1381	struct aggr_cpu_id id = aggr_cpu_id__empty();
   1382
   1383	if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
   1384		config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
   1385
   1386	id = config->cpus_aggr_map->map[cpu.cpu];
   1387	return id;
   1388}
   1389
   1390static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config,
   1391						       struct perf_cpu cpu)
   1392{
   1393	return perf_stat__get_aggr(config, perf_stat__get_socket, cpu);
   1394}
   1395
   1396static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config,
   1397						    struct perf_cpu cpu)
   1398{
   1399	return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
   1400}
   1401
   1402static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
   1403						     struct perf_cpu cpu)
   1404{
   1405	return perf_stat__get_aggr(config, perf_stat__get_core, cpu);
   1406}
   1407
   1408static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config,
   1409						     struct perf_cpu cpu)
   1410{
   1411	return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
   1412}
   1413
   1414static bool term_percore_set(void)
   1415{
   1416	struct evsel *counter;
   1417
   1418	evlist__for_each_entry(evsel_list, counter) {
   1419		if (counter->percore)
   1420			return true;
   1421	}
   1422
   1423	return false;
   1424}
   1425
   1426static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
   1427{
   1428	switch (aggr_mode) {
   1429	case AGGR_SOCKET:
   1430		return aggr_cpu_id__socket;
   1431	case AGGR_DIE:
   1432		return aggr_cpu_id__die;
   1433	case AGGR_CORE:
   1434		return aggr_cpu_id__core;
   1435	case AGGR_NODE:
   1436		return aggr_cpu_id__node;
   1437	case AGGR_NONE:
   1438		if (term_percore_set())
   1439			return aggr_cpu_id__core;
   1440
   1441		return NULL;
   1442	case AGGR_GLOBAL:
   1443	case AGGR_THREAD:
   1444	case AGGR_UNSET:
   1445	default:
   1446		return NULL;
   1447	}
   1448}
   1449
   1450static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
   1451{
   1452	switch (aggr_mode) {
   1453	case AGGR_SOCKET:
   1454		return perf_stat__get_socket_cached;
   1455	case AGGR_DIE:
   1456		return perf_stat__get_die_cached;
   1457	case AGGR_CORE:
   1458		return perf_stat__get_core_cached;
   1459	case AGGR_NODE:
   1460		return perf_stat__get_node_cached;
   1461	case AGGR_NONE:
   1462		if (term_percore_set()) {
   1463			return perf_stat__get_core_cached;
   1464		}
   1465		return NULL;
   1466	case AGGR_GLOBAL:
   1467	case AGGR_THREAD:
   1468	case AGGR_UNSET:
   1469	default:
   1470		return NULL;
   1471	}
   1472}
   1473
   1474static int perf_stat_init_aggr_mode(void)
   1475{
   1476	int nr;
   1477	aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
   1478
   1479	if (get_id) {
   1480		stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
   1481							 get_id, /*data=*/NULL);
   1482		if (!stat_config.aggr_map) {
   1483			pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
   1484			return -1;
   1485		}
   1486		stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
   1487	}
   1488
   1489	/*
   1490	 * The evsel_list->cpus is the base we operate on,
   1491	 * taking the highest cpu number to be the size of
   1492	 * the aggregation translate cpumap.
   1493	 */
   1494	if (evsel_list->core.user_requested_cpus)
   1495		nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
   1496	else
   1497		nr = 0;
   1498	stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
   1499	return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
   1500}
   1501
   1502static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
   1503{
   1504	if (map) {
   1505		WARN_ONCE(refcount_read(&map->refcnt) != 0,
   1506			  "cpu_aggr_map refcnt unbalanced\n");
   1507		free(map);
   1508	}
   1509}
   1510
   1511static void cpu_aggr_map__put(struct cpu_aggr_map *map)
   1512{
   1513	if (map && refcount_dec_and_test(&map->refcnt))
   1514		cpu_aggr_map__delete(map);
   1515}
   1516
   1517static void perf_stat__exit_aggr_mode(void)
   1518{
   1519	cpu_aggr_map__put(stat_config.aggr_map);
   1520	cpu_aggr_map__put(stat_config.cpus_aggr_map);
   1521	stat_config.aggr_map = NULL;
   1522	stat_config.cpus_aggr_map = NULL;
   1523}
   1524
   1525static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data)
   1526{
   1527	struct perf_env *env = data;
   1528	struct aggr_cpu_id id = aggr_cpu_id__empty();
   1529
   1530	if (cpu.cpu != -1)
   1531		id.socket = env->cpu[cpu.cpu].socket_id;
   1532
   1533	return id;
   1534}
   1535
   1536static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data)
   1537{
   1538	struct perf_env *env = data;
   1539	struct aggr_cpu_id id = aggr_cpu_id__empty();
   1540
   1541	if (cpu.cpu != -1) {
   1542		/*
   1543		 * die_id is relative to socket, so start
   1544		 * with the socket ID and then add die to
   1545		 * make a unique ID.
   1546		 */
   1547		id.socket = env->cpu[cpu.cpu].socket_id;
   1548		id.die = env->cpu[cpu.cpu].die_id;
   1549	}
   1550
   1551	return id;
   1552}
   1553
   1554static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
   1555{
   1556	struct perf_env *env = data;
   1557	struct aggr_cpu_id id = aggr_cpu_id__empty();
   1558
   1559	if (cpu.cpu != -1) {
   1560		/*
   1561		 * core_id is relative to socket and die,
   1562		 * we need a global id. So we set
   1563		 * socket, die id and core id
   1564		 */
   1565		id.socket = env->cpu[cpu.cpu].socket_id;
   1566		id.die = env->cpu[cpu.cpu].die_id;
   1567		id.core = env->cpu[cpu.cpu].core_id;
   1568	}
   1569
   1570	return id;
   1571}
   1572
   1573static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
   1574{
   1575	struct aggr_cpu_id id = aggr_cpu_id__empty();
   1576
   1577	id.node = perf_env__numa_node(data, cpu);
   1578	return id;
   1579}
   1580
   1581static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
   1582						     struct perf_cpu cpu)
   1583{
   1584	return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env);
   1585}
   1586static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
   1587						  struct perf_cpu cpu)
   1588{
   1589	return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
   1590}
   1591
   1592static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
   1593						   struct perf_cpu cpu)
   1594{
   1595	return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
   1596}
   1597
   1598static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
   1599						   struct perf_cpu cpu)
   1600{
   1601	return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
   1602}
   1603
   1604static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
   1605{
   1606	switch (aggr_mode) {
   1607	case AGGR_SOCKET:
   1608		return perf_env__get_socket_aggr_by_cpu;
   1609	case AGGR_DIE:
   1610		return perf_env__get_die_aggr_by_cpu;
   1611	case AGGR_CORE:
   1612		return perf_env__get_core_aggr_by_cpu;
   1613	case AGGR_NODE:
   1614		return perf_env__get_node_aggr_by_cpu;
   1615	case AGGR_NONE:
   1616	case AGGR_GLOBAL:
   1617	case AGGR_THREAD:
   1618	case AGGR_UNSET:
   1619	default:
   1620		return NULL;
   1621	}
   1622}
   1623
   1624static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
   1625{
   1626	switch (aggr_mode) {
   1627	case AGGR_SOCKET:
   1628		return perf_stat__get_socket_file;
   1629	case AGGR_DIE:
   1630		return perf_stat__get_die_file;
   1631	case AGGR_CORE:
   1632		return perf_stat__get_core_file;
   1633	case AGGR_NODE:
   1634		return perf_stat__get_node_file;
   1635	case AGGR_NONE:
   1636	case AGGR_GLOBAL:
   1637	case AGGR_THREAD:
   1638	case AGGR_UNSET:
   1639	default:
   1640		return NULL;
   1641	}
   1642}
   1643
   1644static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
   1645{
   1646	struct perf_env *env = &st->session->header.env;
   1647	aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
   1648
   1649	if (!get_id)
   1650		return 0;
   1651
   1652	stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, get_id, env);
   1653	if (!stat_config.aggr_map) {
   1654		pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
   1655		return -1;
   1656	}
   1657	stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode);
   1658	return 0;
   1659}
   1660
   1661/*
   1662 * Add default attributes, if there were no attributes specified or
   1663 * if -d/--detailed, -d -d or -d -d -d is used:
   1664 */
   1665static int add_default_attributes(void)
   1666{
   1667	int err;
   1668	struct perf_event_attr default_attrs0[] = {
   1669
   1670  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   1671  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
   1672  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
   1673  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
   1674
   1675  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
   1676};
   1677	struct perf_event_attr frontend_attrs[] = {
   1678  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
   1679};
   1680	struct perf_event_attr backend_attrs[] = {
   1681  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
   1682};
   1683	struct perf_event_attr default_attrs1[] = {
   1684  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   1685  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   1686  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
   1687
   1688};
   1689	struct perf_event_attr default_sw_attrs[] = {
   1690  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   1691  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
   1692  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
   1693  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
   1694};
   1695
   1696/*
   1697 * Detailed stats (-d), covering the L1 and last level data caches:
   1698 */
   1699	struct perf_event_attr detailed_attrs[] = {
   1700
   1701  { .type = PERF_TYPE_HW_CACHE,
   1702    .config =
   1703	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
   1704	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1705	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1706
   1707  { .type = PERF_TYPE_HW_CACHE,
   1708    .config =
   1709	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
   1710	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1711	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1712
   1713  { .type = PERF_TYPE_HW_CACHE,
   1714    .config =
   1715	 PERF_COUNT_HW_CACHE_LL			<<  0  |
   1716	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1717	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1718
   1719  { .type = PERF_TYPE_HW_CACHE,
   1720    .config =
   1721	 PERF_COUNT_HW_CACHE_LL			<<  0  |
   1722	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1723	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1724};
   1725
   1726/*
   1727 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
   1728 */
   1729	struct perf_event_attr very_detailed_attrs[] = {
   1730
   1731  { .type = PERF_TYPE_HW_CACHE,
   1732    .config =
   1733	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
   1734	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1735	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1736
   1737  { .type = PERF_TYPE_HW_CACHE,
   1738    .config =
   1739	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
   1740	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1741	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1742
   1743  { .type = PERF_TYPE_HW_CACHE,
   1744    .config =
   1745	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
   1746	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1747	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1748
   1749  { .type = PERF_TYPE_HW_CACHE,
   1750    .config =
   1751	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
   1752	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1753	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1754
   1755  { .type = PERF_TYPE_HW_CACHE,
   1756    .config =
   1757	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
   1758	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1759	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1760
   1761  { .type = PERF_TYPE_HW_CACHE,
   1762    .config =
   1763	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
   1764	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
   1765	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1766
   1767};
   1768
   1769/*
   1770 * Very, very detailed stats (-d -d -d), adding prefetch events:
   1771 */
   1772	struct perf_event_attr very_very_detailed_attrs[] = {
   1773
   1774  { .type = PERF_TYPE_HW_CACHE,
   1775    .config =
   1776	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
   1777	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
   1778	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
   1779
   1780  { .type = PERF_TYPE_HW_CACHE,
   1781    .config =
   1782	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
   1783	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
   1784	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
   1785};
   1786	/* Set attrs if no event is selected and !null_run: */
   1787	if (stat_config.null_run)
   1788		return 0;
   1789
   1790	if (transaction_run) {
   1791		struct parse_events_error errinfo;
   1792		/* Handle -T as -M transaction. Once platform specific metrics
   1793		 * support has been added to the json files, all architectures
   1794		 * will use this approach. To determine transaction support
   1795		 * on an architecture test for such a metric name.
   1796		 */
   1797		if (metricgroup__has_metric("transaction")) {
   1798			struct option opt = { .value = &evsel_list };
   1799
   1800			return metricgroup__parse_groups(&opt, "transaction",
   1801							 stat_config.metric_no_group,
   1802							stat_config.metric_no_merge,
   1803							 &stat_config.metric_events);
   1804		}
   1805
   1806		parse_events_error__init(&errinfo);
   1807		if (pmu_have_event("cpu", "cycles-ct") &&
   1808		    pmu_have_event("cpu", "el-start"))
   1809			err = parse_events(evsel_list, transaction_attrs,
   1810					   &errinfo);
   1811		else
   1812			err = parse_events(evsel_list,
   1813					   transaction_limited_attrs,
   1814					   &errinfo);
   1815		if (err) {
   1816			fprintf(stderr, "Cannot set up transaction events\n");
   1817			parse_events_error__print(&errinfo, transaction_attrs);
   1818		}
   1819		parse_events_error__exit(&errinfo);
   1820		return err ? -1 : 0;
   1821	}
   1822
   1823	if (smi_cost) {
   1824		struct parse_events_error errinfo;
   1825		int smi;
   1826
   1827		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
   1828			fprintf(stderr, "freeze_on_smi is not supported.\n");
   1829			return -1;
   1830		}
   1831
   1832		if (!smi) {
   1833			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
   1834				fprintf(stderr, "Failed to set freeze_on_smi.\n");
   1835				return -1;
   1836			}
   1837			smi_reset = true;
   1838		}
   1839
   1840		if (!pmu_have_event("msr", "aperf") ||
   1841		    !pmu_have_event("msr", "smi")) {
   1842			fprintf(stderr, "To measure SMI cost, it needs "
   1843				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
   1844			return -1;
   1845		}
   1846		if (!force_metric_only)
   1847			stat_config.metric_only = true;
   1848
   1849		parse_events_error__init(&errinfo);
   1850		err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
   1851		if (err) {
   1852			parse_events_error__print(&errinfo, smi_cost_attrs);
   1853			fprintf(stderr, "Cannot set up SMI cost events\n");
   1854		}
   1855		parse_events_error__exit(&errinfo);
   1856		return err ? -1 : 0;
   1857	}
   1858
   1859	if (topdown_run) {
   1860		const char **metric_attrs = topdown_metric_attrs;
   1861		unsigned int max_level = 1;
   1862		char *str = NULL;
   1863		bool warn = false;
   1864		const char *pmu_name = "cpu";
   1865
   1866		if (!force_metric_only)
   1867			stat_config.metric_only = true;
   1868
   1869		if (perf_pmu__has_hybrid()) {
   1870			if (!evsel_list->hybrid_pmu_name) {
   1871				pr_warning("WARNING: default to use cpu_core topdown events\n");
   1872				evsel_list->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
   1873			}
   1874
   1875			pmu_name = evsel_list->hybrid_pmu_name;
   1876			if (!pmu_name)
   1877				return -1;
   1878		}
   1879
   1880		if (pmu_have_event(pmu_name, topdown_metric_L2_attrs[5])) {
   1881			metric_attrs = topdown_metric_L2_attrs;
   1882			max_level = 2;
   1883		}
   1884
   1885		if (stat_config.topdown_level > max_level) {
   1886			pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
   1887			return -1;
   1888		} else if (!stat_config.topdown_level)
   1889			stat_config.topdown_level = max_level;
   1890
   1891		if (topdown_filter_events(metric_attrs, &str, 1, pmu_name) < 0) {
   1892			pr_err("Out of memory\n");
   1893			return -1;
   1894		}
   1895
   1896		if (metric_attrs[0] && str) {
   1897			if (!stat_config.interval && !stat_config.metric_only) {
   1898				fprintf(stat_config.output,
   1899					"Topdown accuracy may decrease when measuring long periods.\n"
   1900					"Please print the result regularly, e.g. -I1000\n");
   1901			}
   1902			goto setup_metrics;
   1903		}
   1904
   1905		zfree(&str);
   1906
   1907		if (stat_config.aggr_mode != AGGR_GLOBAL &&
   1908		    stat_config.aggr_mode != AGGR_CORE) {
   1909			pr_err("top down event configuration requires --per-core mode\n");
   1910			return -1;
   1911		}
   1912		stat_config.aggr_mode = AGGR_CORE;
   1913		if (nr_cgroups || !target__has_cpu(&target)) {
   1914			pr_err("top down event configuration requires system-wide mode (-a)\n");
   1915			return -1;
   1916		}
   1917
   1918		if (topdown_filter_events(topdown_attrs, &str,
   1919				arch_topdown_check_group(&warn),
   1920				pmu_name) < 0) {
   1921			pr_err("Out of memory\n");
   1922			return -1;
   1923		}
   1924
   1925		if (topdown_attrs[0] && str) {
   1926			struct parse_events_error errinfo;
   1927			if (warn)
   1928				arch_topdown_group_warn();
   1929setup_metrics:
   1930			parse_events_error__init(&errinfo);
   1931			err = parse_events(evsel_list, str, &errinfo);
   1932			if (err) {
   1933				fprintf(stderr,
   1934					"Cannot set up top down events %s: %d\n",
   1935					str, err);
   1936				parse_events_error__print(&errinfo, str);
   1937				parse_events_error__exit(&errinfo);
   1938				free(str);
   1939				return -1;
   1940			}
   1941			parse_events_error__exit(&errinfo);
   1942		} else {
   1943			fprintf(stderr, "System does not support topdown\n");
   1944			return -1;
   1945		}
   1946		free(str);
   1947	}
   1948
   1949	if (!evsel_list->core.nr_entries) {
   1950		if (perf_pmu__has_hybrid()) {
   1951			struct parse_events_error errinfo;
   1952			const char *hybrid_str = "cycles,instructions,branches,branch-misses";
   1953
   1954			if (target__has_cpu(&target))
   1955				default_sw_attrs[0].config = PERF_COUNT_SW_CPU_CLOCK;
   1956
   1957			if (evlist__add_default_attrs(evsel_list,
   1958						      default_sw_attrs) < 0) {
   1959				return -1;
   1960			}
   1961
   1962			parse_events_error__init(&errinfo);
   1963			err = parse_events(evsel_list, hybrid_str, &errinfo);
   1964			if (err) {
   1965				fprintf(stderr,
   1966					"Cannot set up hybrid events %s: %d\n",
   1967					hybrid_str, err);
   1968				parse_events_error__print(&errinfo, hybrid_str);
   1969			}
   1970			parse_events_error__exit(&errinfo);
   1971			return err ? -1 : 0;
   1972		}
   1973
   1974		if (target__has_cpu(&target))
   1975			default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
   1976
   1977		if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
   1978			return -1;
   1979		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
   1980			if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
   1981				return -1;
   1982		}
   1983		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
   1984			if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
   1985				return -1;
   1986		}
   1987		if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
   1988			return -1;
   1989
   1990		stat_config.topdown_level = TOPDOWN_MAX_LEVEL;
   1991		if (arch_evlist__add_default_attrs(evsel_list) < 0)
   1992			return -1;
   1993	}
   1994
   1995	/* Detailed events get appended to the event list: */
   1996
   1997	if (detailed_run <  1)
   1998		return 0;
   1999
   2000	/* Append detailed run extra attributes: */
   2001	if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
   2002		return -1;
   2003
   2004	if (detailed_run < 2)
   2005		return 0;
   2006
   2007	/* Append very detailed run extra attributes: */
   2008	if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
   2009		return -1;
   2010
   2011	if (detailed_run < 3)
   2012		return 0;
   2013
   2014	/* Append very, very detailed run extra attributes: */
   2015	return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
   2016}
   2017
   2018static const char * const stat_record_usage[] = {
   2019	"perf stat record [<options>]",
   2020	NULL,
   2021};
   2022
   2023static void init_features(struct perf_session *session)
   2024{
   2025	int feat;
   2026
   2027	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
   2028		perf_header__set_feat(&session->header, feat);
   2029
   2030	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
   2031	perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
   2032	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
   2033	perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
   2034	perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
   2035}
   2036
   2037static int __cmd_record(int argc, const char **argv)
   2038{
   2039	struct perf_session *session;
   2040	struct perf_data *data = &perf_stat.data;
   2041
   2042	argc = parse_options(argc, argv, stat_options, stat_record_usage,
   2043			     PARSE_OPT_STOP_AT_NON_OPTION);
   2044
   2045	if (output_name)
   2046		data->path = output_name;
   2047
   2048	if (stat_config.run_count != 1 || forever) {
   2049		pr_err("Cannot use -r option with perf stat record.\n");
   2050		return -1;
   2051	}
   2052
   2053	session = perf_session__new(data, NULL);
   2054	if (IS_ERR(session)) {
   2055		pr_err("Perf session creation failed\n");
   2056		return PTR_ERR(session);
   2057	}
   2058
   2059	init_features(session);
   2060
   2061	session->evlist   = evsel_list;
   2062	perf_stat.session = session;
   2063	perf_stat.record  = true;
   2064	return argc;
   2065}
   2066
   2067static int process_stat_round_event(struct perf_session *session,
   2068				    union perf_event *event)
   2069{
   2070	struct perf_record_stat_round *stat_round = &event->stat_round;
   2071	struct evsel *counter;
   2072	struct timespec tsh, *ts = NULL;
   2073	const char **argv = session->header.env.cmdline_argv;
   2074	int argc = session->header.env.nr_cmdline;
   2075
   2076	evlist__for_each_entry(evsel_list, counter)
   2077		perf_stat_process_counter(&stat_config, counter);
   2078
   2079	if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
   2080		update_stats(&walltime_nsecs_stats, stat_round->time);
   2081
   2082	if (stat_config.interval && stat_round->time) {
   2083		tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
   2084		tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
   2085		ts = &tsh;
   2086	}
   2087
   2088	print_counters(ts, argc, argv);
   2089	return 0;
   2090}
   2091
   2092static
   2093int process_stat_config_event(struct perf_session *session,
   2094			      union perf_event *event)
   2095{
   2096	struct perf_tool *tool = session->tool;
   2097	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
   2098
   2099	perf_event__read_stat_config(&stat_config, &event->stat_config);
   2100
   2101	if (perf_cpu_map__empty(st->cpus)) {
   2102		if (st->aggr_mode != AGGR_UNSET)
   2103			pr_warning("warning: processing task data, aggregation mode not set\n");
   2104		return 0;
   2105	}
   2106
   2107	if (st->aggr_mode != AGGR_UNSET)
   2108		stat_config.aggr_mode = st->aggr_mode;
   2109
   2110	if (perf_stat.data.is_pipe)
   2111		perf_stat_init_aggr_mode();
   2112	else
   2113		perf_stat_init_aggr_mode_file(st);
   2114
   2115	return 0;
   2116}
   2117
   2118static int set_maps(struct perf_stat *st)
   2119{
   2120	if (!st->cpus || !st->threads)
   2121		return 0;
   2122
   2123	if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
   2124		return -EINVAL;
   2125
   2126	perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
   2127
   2128	if (evlist__alloc_stats(evsel_list, true))
   2129		return -ENOMEM;
   2130
   2131	st->maps_allocated = true;
   2132	return 0;
   2133}
   2134
   2135static
   2136int process_thread_map_event(struct perf_session *session,
   2137			     union perf_event *event)
   2138{
   2139	struct perf_tool *tool = session->tool;
   2140	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
   2141
   2142	if (st->threads) {
   2143		pr_warning("Extra thread map event, ignoring.\n");
   2144		return 0;
   2145	}
   2146
   2147	st->threads = thread_map__new_event(&event->thread_map);
   2148	if (!st->threads)
   2149		return -ENOMEM;
   2150
   2151	return set_maps(st);
   2152}
   2153
   2154static
   2155int process_cpu_map_event(struct perf_session *session,
   2156			  union perf_event *event)
   2157{
   2158	struct perf_tool *tool = session->tool;
   2159	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
   2160	struct perf_cpu_map *cpus;
   2161
   2162	if (st->cpus) {
   2163		pr_warning("Extra cpu map event, ignoring.\n");
   2164		return 0;
   2165	}
   2166
   2167	cpus = cpu_map__new_data(&event->cpu_map.data);
   2168	if (!cpus)
   2169		return -ENOMEM;
   2170
   2171	st->cpus = cpus;
   2172	return set_maps(st);
   2173}
   2174
   2175static const char * const stat_report_usage[] = {
   2176	"perf stat report [<options>]",
   2177	NULL,
   2178};
   2179
   2180static struct perf_stat perf_stat = {
   2181	.tool = {
   2182		.attr		= perf_event__process_attr,
   2183		.event_update	= perf_event__process_event_update,
   2184		.thread_map	= process_thread_map_event,
   2185		.cpu_map	= process_cpu_map_event,
   2186		.stat_config	= process_stat_config_event,
   2187		.stat		= perf_event__process_stat_event,
   2188		.stat_round	= process_stat_round_event,
   2189	},
   2190	.aggr_mode = AGGR_UNSET,
   2191};
   2192
   2193static int __cmd_report(int argc, const char **argv)
   2194{
   2195	struct perf_session *session;
   2196	const struct option options[] = {
   2197	OPT_STRING('i', "input", &input_name, "file", "input file name"),
   2198	OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
   2199		     "aggregate counts per processor socket", AGGR_SOCKET),
   2200	OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
   2201		     "aggregate counts per processor die", AGGR_DIE),
   2202	OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
   2203		     "aggregate counts per physical processor core", AGGR_CORE),
   2204	OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
   2205		     "aggregate counts per numa node", AGGR_NODE),
   2206	OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
   2207		     "disable CPU count aggregation", AGGR_NONE),
   2208	OPT_END()
   2209	};
   2210	struct stat st;
   2211	int ret;
   2212
   2213	argc = parse_options(argc, argv, options, stat_report_usage, 0);
   2214
   2215	if (!input_name || !strlen(input_name)) {
   2216		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
   2217			input_name = "-";
   2218		else
   2219			input_name = "perf.data";
   2220	}
   2221
   2222	perf_stat.data.path = input_name;
   2223	perf_stat.data.mode = PERF_DATA_MODE_READ;
   2224
   2225	session = perf_session__new(&perf_stat.data, &perf_stat.tool);
   2226	if (IS_ERR(session))
   2227		return PTR_ERR(session);
   2228
   2229	perf_stat.session  = session;
   2230	stat_config.output = stderr;
   2231	evsel_list         = session->evlist;
   2232
   2233	ret = perf_session__process_events(session);
   2234	if (ret)
   2235		return ret;
   2236
   2237	perf_session__delete(session);
   2238	return 0;
   2239}
   2240
   2241static void setup_system_wide(int forks)
   2242{
   2243	/*
   2244	 * Make system wide (-a) the default target if
   2245	 * no target was specified and one of following
   2246	 * conditions is met:
   2247	 *
   2248	 *   - there's no workload specified
   2249	 *   - there is workload specified but all requested
   2250	 *     events are system wide events
   2251	 */
   2252	if (!target__none(&target))
   2253		return;
   2254
   2255	if (!forks)
   2256		target.system_wide = true;
   2257	else {
   2258		struct evsel *counter;
   2259
   2260		evlist__for_each_entry(evsel_list, counter) {
   2261			if (!counter->core.requires_cpu &&
   2262			    strcmp(counter->name, "duration_time")) {
   2263				return;
   2264			}
   2265		}
   2266
   2267		if (evsel_list->core.nr_entries)
   2268			target.system_wide = true;
   2269	}
   2270}
   2271
   2272int cmd_stat(int argc, const char **argv)
   2273{
   2274	const char * const stat_usage[] = {
   2275		"perf stat [<options>] [<command>]",
   2276		NULL
   2277	};
   2278	int status = -EINVAL, run_idx, err;
   2279	const char *mode;
   2280	FILE *output = stderr;
   2281	unsigned int interval, timeout;
   2282	const char * const stat_subcommands[] = { "record", "report" };
   2283	char errbuf[BUFSIZ];
   2284
   2285	setlocale(LC_ALL, "");
   2286
   2287	evsel_list = evlist__new();
   2288	if (evsel_list == NULL)
   2289		return -ENOMEM;
   2290
   2291	parse_events__shrink_config_terms();
   2292
   2293	/* String-parsing callback-based options would segfault when negated */
   2294	set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
   2295	set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
   2296	set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
   2297
   2298	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
   2299					(const char **) stat_usage,
   2300					PARSE_OPT_STOP_AT_NON_OPTION);
   2301	perf_stat__collect_metric_expr(evsel_list);
   2302	perf_stat__init_shadow_stats();
   2303
   2304	if (stat_config.csv_sep) {
   2305		stat_config.csv_output = true;
   2306		if (!strcmp(stat_config.csv_sep, "\\t"))
   2307			stat_config.csv_sep = "\t";
   2308	} else
   2309		stat_config.csv_sep = DEFAULT_SEPARATOR;
   2310
   2311	if (argc && strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
   2312		argc = __cmd_record(argc, argv);
   2313		if (argc < 0)
   2314			return -1;
   2315	} else if (argc && strlen(argv[0]) > 2 && strstarts("report", argv[0]))
   2316		return __cmd_report(argc, argv);
   2317
   2318	interval = stat_config.interval;
   2319	timeout = stat_config.timeout;
   2320
   2321	/*
   2322	 * For record command the -o is already taken care of.
   2323	 */
   2324	if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
   2325		output = NULL;
   2326
   2327	if (output_name && output_fd) {
   2328		fprintf(stderr, "cannot use both --output and --log-fd\n");
   2329		parse_options_usage(stat_usage, stat_options, "o", 1);
   2330		parse_options_usage(NULL, stat_options, "log-fd", 0);
   2331		goto out;
   2332	}
   2333
   2334	if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
   2335		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
   2336		goto out;
   2337	}
   2338
   2339	if (stat_config.metric_only && stat_config.run_count > 1) {
   2340		fprintf(stderr, "--metric-only is not supported with -r\n");
   2341		goto out;
   2342	}
   2343
   2344	if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
   2345		fprintf(stderr, "--table is only supported with -r\n");
   2346		parse_options_usage(stat_usage, stat_options, "r", 1);
   2347		parse_options_usage(NULL, stat_options, "table", 0);
   2348		goto out;
   2349	}
   2350
   2351	if (output_fd < 0) {
   2352		fprintf(stderr, "argument to --log-fd must be a > 0\n");
   2353		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
   2354		goto out;
   2355	}
   2356
   2357	if (!output && !stat_config.quiet) {
   2358		struct timespec tm;
   2359		mode = append_file ? "a" : "w";
   2360
   2361		output = fopen(output_name, mode);
   2362		if (!output) {
   2363			perror("failed to create output file");
   2364			return -1;
   2365		}
   2366		clock_gettime(CLOCK_REALTIME, &tm);
   2367		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
   2368	} else if (output_fd > 0) {
   2369		mode = append_file ? "a" : "w";
   2370		output = fdopen(output_fd, mode);
   2371		if (!output) {
   2372			perror("Failed opening logfd");
   2373			return -errno;
   2374		}
   2375	}
   2376
   2377	stat_config.output = output;
   2378
   2379	/*
   2380	 * let the spreadsheet do the pretty-printing
   2381	 */
   2382	if (stat_config.csv_output) {
   2383		/* User explicitly passed -B? */
   2384		if (big_num_opt == 1) {
   2385			fprintf(stderr, "-B option not supported with -x\n");
   2386			parse_options_usage(stat_usage, stat_options, "B", 1);
   2387			parse_options_usage(NULL, stat_options, "x", 1);
   2388			goto out;
   2389		} else /* Nope, so disable big number formatting */
   2390			stat_config.big_num = false;
   2391	} else if (big_num_opt == 0) /* User passed --no-big-num */
   2392		stat_config.big_num = false;
   2393
   2394	err = target__validate(&target);
   2395	if (err) {
   2396		target__strerror(&target, err, errbuf, BUFSIZ);
   2397		pr_warning("%s\n", errbuf);
   2398	}
   2399
   2400	setup_system_wide(argc);
   2401
   2402	/*
   2403	 * Display user/system times only for single
   2404	 * run and when there's specified tracee.
   2405	 */
   2406	if ((stat_config.run_count == 1) && target__none(&target))
   2407		stat_config.ru_display = true;
   2408
   2409	if (stat_config.run_count < 0) {
   2410		pr_err("Run count must be a positive number\n");
   2411		parse_options_usage(stat_usage, stat_options, "r", 1);
   2412		goto out;
   2413	} else if (stat_config.run_count == 0) {
   2414		forever = true;
   2415		stat_config.run_count = 1;
   2416	}
   2417
   2418	if (stat_config.walltime_run_table) {
   2419		stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
   2420		if (!stat_config.walltime_run) {
   2421			pr_err("failed to setup -r option");
   2422			goto out;
   2423		}
   2424	}
   2425
   2426	if ((stat_config.aggr_mode == AGGR_THREAD) &&
   2427		!target__has_task(&target)) {
   2428		if (!target.system_wide || target.cpu_list) {
   2429			fprintf(stderr, "The --per-thread option is only "
   2430				"available when monitoring via -p -t -a "
   2431				"options or only --per-thread.\n");
   2432			parse_options_usage(NULL, stat_options, "p", 1);
   2433			parse_options_usage(NULL, stat_options, "t", 1);
   2434			goto out;
   2435		}
   2436	}
   2437
   2438	/*
   2439	 * no_aggr, cgroup are for system-wide only
   2440	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
   2441	 */
   2442	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
   2443	      stat_config.aggr_mode != AGGR_THREAD) ||
   2444	     (nr_cgroups || stat_config.cgroup_list)) &&
   2445	    !target__has_cpu(&target)) {
   2446		fprintf(stderr, "both cgroup and no-aggregation "
   2447			"modes only available in system-wide mode\n");
   2448
   2449		parse_options_usage(stat_usage, stat_options, "G", 1);
   2450		parse_options_usage(NULL, stat_options, "A", 1);
   2451		parse_options_usage(NULL, stat_options, "a", 1);
   2452		parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
   2453		goto out;
   2454	}
   2455
   2456	if (stat_config.iostat_run) {
   2457		status = iostat_prepare(evsel_list, &stat_config);
   2458		if (status)
   2459			goto out;
   2460		if (iostat_mode == IOSTAT_LIST) {
   2461			iostat_list(evsel_list, &stat_config);
   2462			goto out;
   2463		} else if (verbose)
   2464			iostat_list(evsel_list, &stat_config);
   2465		if (iostat_mode == IOSTAT_RUN && !target__has_cpu(&target))
   2466			target.system_wide = true;
   2467	}
   2468
   2469	if (add_default_attributes())
   2470		goto out;
   2471
   2472	if (stat_config.cgroup_list) {
   2473		if (nr_cgroups > 0) {
   2474			pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
   2475			parse_options_usage(stat_usage, stat_options, "G", 1);
   2476			parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
   2477			goto out;
   2478		}
   2479
   2480		if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list,
   2481					  &stat_config.metric_events, true) < 0) {
   2482			parse_options_usage(stat_usage, stat_options,
   2483					    "for-each-cgroup", 0);
   2484			goto out;
   2485		}
   2486	}
   2487
   2488	if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
   2489		target.per_thread = true;
   2490
   2491	if (evlist__fix_hybrid_cpus(evsel_list, target.cpu_list)) {
   2492		pr_err("failed to use cpu list %s\n", target.cpu_list);
   2493		goto out;
   2494	}
   2495
   2496	target.hybrid = perf_pmu__has_hybrid();
   2497	if (evlist__create_maps(evsel_list, &target) < 0) {
   2498		if (target__has_task(&target)) {
   2499			pr_err("Problems finding threads of monitor\n");
   2500			parse_options_usage(stat_usage, stat_options, "p", 1);
   2501			parse_options_usage(NULL, stat_options, "t", 1);
   2502		} else if (target__has_cpu(&target)) {
   2503			perror("failed to parse CPUs map");
   2504			parse_options_usage(stat_usage, stat_options, "C", 1);
   2505			parse_options_usage(NULL, stat_options, "a", 1);
   2506		}
   2507		goto out;
   2508	}
   2509
   2510	evlist__check_cpu_maps(evsel_list);
   2511
   2512	/*
   2513	 * Initialize thread_map with comm names,
   2514	 * so we could print it out on output.
   2515	 */
   2516	if (stat_config.aggr_mode == AGGR_THREAD) {
   2517		thread_map__read_comms(evsel_list->core.threads);
   2518		if (target.system_wide) {
   2519			if (runtime_stat_new(&stat_config,
   2520				perf_thread_map__nr(evsel_list->core.threads))) {
   2521				goto out;
   2522			}
   2523		}
   2524	}
   2525
   2526	if (stat_config.aggr_mode == AGGR_NODE)
   2527		cpu__setup_cpunode_map();
   2528
   2529	if (stat_config.times && interval)
   2530		interval_count = true;
   2531	else if (stat_config.times && !interval) {
   2532		pr_err("interval-count option should be used together with "
   2533				"interval-print.\n");
   2534		parse_options_usage(stat_usage, stat_options, "interval-count", 0);
   2535		parse_options_usage(stat_usage, stat_options, "I", 1);
   2536		goto out;
   2537	}
   2538
   2539	if (timeout && timeout < 100) {
   2540		if (timeout < 10) {
   2541			pr_err("timeout must be >= 10ms.\n");
   2542			parse_options_usage(stat_usage, stat_options, "timeout", 0);
   2543			goto out;
   2544		} else
   2545			pr_warning("timeout < 100ms. "
   2546				   "The overhead percentage could be high in some cases. "
   2547				   "Please proceed with caution.\n");
   2548	}
   2549	if (timeout && interval) {
   2550		pr_err("timeout option is not supported with interval-print.\n");
   2551		parse_options_usage(stat_usage, stat_options, "timeout", 0);
   2552		parse_options_usage(stat_usage, stat_options, "I", 1);
   2553		goto out;
   2554	}
   2555
   2556	if (evlist__alloc_stats(evsel_list, interval))
   2557		goto out;
   2558
   2559	if (perf_stat_init_aggr_mode())
   2560		goto out;
   2561
   2562	/*
   2563	 * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
   2564	 * while avoiding that older tools show confusing messages.
   2565	 *
   2566	 * However for pipe sessions we need to keep it zero,
   2567	 * because script's perf_evsel__check_attr is triggered
   2568	 * by attr->sample_type != 0, and we can't run it on
   2569	 * stat sessions.
   2570	 */
   2571	stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
   2572
   2573	/*
   2574	 * We dont want to block the signals - that would cause
   2575	 * child tasks to inherit that and Ctrl-C would not work.
   2576	 * What we want is for Ctrl-C to work in the exec()-ed
   2577	 * task, but being ignored by perf stat itself:
   2578	 */
   2579	atexit(sig_atexit);
   2580	if (!forever)
   2581		signal(SIGINT,  skip_signal);
   2582	signal(SIGCHLD, skip_signal);
   2583	signal(SIGALRM, skip_signal);
   2584	signal(SIGABRT, skip_signal);
   2585
   2586	if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
   2587		goto out;
   2588
   2589	/* Enable ignoring missing threads when -p option is defined. */
   2590	evlist__first(evsel_list)->ignore_missing_thread = target.pid;
   2591	status = 0;
   2592	for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
   2593		if (stat_config.run_count != 1 && verbose > 0)
   2594			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
   2595				run_idx + 1);
   2596
   2597		if (run_idx != 0)
   2598			evlist__reset_prev_raw_counts(evsel_list);
   2599
   2600		status = run_perf_stat(argc, argv, run_idx);
   2601		if (forever && status != -1 && !interval) {
   2602			print_counters(NULL, argc, argv);
   2603			perf_stat__reset_stats();
   2604		}
   2605	}
   2606
   2607	if (!forever && status != -1 && (!interval || stat_config.summary))
   2608		print_counters(NULL, argc, argv);
   2609
   2610	evlist__finalize_ctlfd(evsel_list);
   2611
   2612	if (STAT_RECORD) {
   2613		/*
   2614		 * We synthesize the kernel mmap record just so that older tools
   2615		 * don't emit warnings about not being able to resolve symbols
   2616		 * due to /proc/sys/kernel/kptr_restrict settings and instead provide
   2617		 * a saner message about no samples being in the perf.data file.
   2618		 *
   2619		 * This also serves to suppress a warning about f_header.data.size == 0
   2620		 * in header.c at the moment 'perf stat record' gets introduced, which
   2621		 * is not really needed once we start adding the stat specific PERF_RECORD_
   2622		 * records, but the need to suppress the kptr_restrict messages in older
   2623		 * tools remain  -acme
   2624		 */
   2625		int fd = perf_data__fd(&perf_stat.data);
   2626
   2627		err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
   2628							 process_synthesized_event,
   2629							 &perf_stat.session->machines.host);
   2630		if (err) {
   2631			pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
   2632				   "older tools may produce warnings about this file\n.");
   2633		}
   2634
   2635		if (!interval) {
   2636			if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
   2637				pr_err("failed to write stat round event\n");
   2638		}
   2639
   2640		if (!perf_stat.data.is_pipe) {
   2641			perf_stat.session->header.data_size += perf_stat.bytes_written;
   2642			perf_session__write_header(perf_stat.session, evsel_list, fd, true);
   2643		}
   2644
   2645		evlist__close(evsel_list);
   2646		perf_session__delete(perf_stat.session);
   2647	}
   2648
   2649	perf_stat__exit_aggr_mode();
   2650	evlist__free_stats(evsel_list);
   2651out:
   2652	if (stat_config.iostat_run)
   2653		iostat_release(evsel_list);
   2654
   2655	zfree(&stat_config.walltime_run);
   2656
   2657	if (smi_cost && smi_reset)
   2658		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
   2659
   2660	evlist__delete(evsel_list);
   2661
   2662	metricgroup__rblist_exit(&stat_config.metric_events);
   2663	runtime_stat_delete(&stat_config);
   2664	evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
   2665
   2666	return status;
   2667}