cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

turbostat.c (174689B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * turbostat -- show CPU frequency and C-state residency
      4 * on modern Intel and AMD processors.
      5 *
      6 * Copyright (c) 2022 Intel Corporation.
      7 * Len Brown <len.brown@intel.com>
      8 */
      9
     10#define _GNU_SOURCE
     11#include MSRHEADER
     12#include INTEL_FAMILY_HEADER
     13#include <stdarg.h>
     14#include <stdio.h>
     15#include <err.h>
     16#include <unistd.h>
     17#include <sys/types.h>
     18#include <sys/wait.h>
     19#include <sys/stat.h>
     20#include <sys/select.h>
     21#include <sys/resource.h>
     22#include <fcntl.h>
     23#include <signal.h>
     24#include <sys/time.h>
     25#include <stdlib.h>
     26#include <getopt.h>
     27#include <dirent.h>
     28#include <string.h>
     29#include <ctype.h>
     30#include <sched.h>
     31#include <time.h>
     32#include <cpuid.h>
     33#include <sys/capability.h>
     34#include <errno.h>
     35#include <math.h>
     36#include <linux/perf_event.h>
     37#include <asm/unistd.h>
     38#include <stdbool.h>
     39
     40#define UNUSED(x) (void)(x)
     41
     42/*
     43 * This list matches the column headers, except
     44 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
     45 * 2. Core and CPU are moved to the end, we can't have strings that contain them
     46 *    matching on them for --show and --hide.
     47 */
     48
     49/*
     50 * buffer size used by sscanf() for added column names
     51 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
     52 */
     53#define	NAME_BYTES 20
     54#define PATH_BYTES 128
     55
     56enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
     57enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
     58enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
     59
     60struct msr_counter {
     61	unsigned int msr_num;
     62	char name[NAME_BYTES];
     63	char path[PATH_BYTES];
     64	unsigned int width;
     65	enum counter_type type;
     66	enum counter_format format;
     67	struct msr_counter *next;
     68	unsigned int flags;
     69#define	FLAGS_HIDE	(1 << 0)
     70#define	FLAGS_SHOW	(1 << 1)
     71#define	SYSFS_PERCPU	(1 << 1)
     72};
     73
     74struct msr_counter bic[] = {
     75	{ 0x0, "usec", "", 0, 0, 0, NULL, 0 },
     76	{ 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 },
     77	{ 0x0, "Package", "", 0, 0, 0, NULL, 0 },
     78	{ 0x0, "Node", "", 0, 0, 0, NULL, 0 },
     79	{ 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 },
     80	{ 0x0, "Busy%", "", 0, 0, 0, NULL, 0 },
     81	{ 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 },
     82	{ 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 },
     83	{ 0x0, "IRQ", "", 0, 0, 0, NULL, 0 },
     84	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 },
     85	{ 0x0, "sysfs", "", 0, 0, 0, NULL, 0 },
     86	{ 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 },
     87	{ 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 },
     88	{ 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 },
     89	{ 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 },
     90	{ 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 },
     91	{ 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 },
     92	{ 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 },
     93	{ 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 },
     94	{ 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 },
     95	{ 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 },
     96	{ 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 },
     97	{ 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 },
     98	{ 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 },
     99	{ 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 },
    100	{ 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 },
    101	{ 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 },
    102	{ 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 },
    103	{ 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 },
    104	{ 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 },
    105	{ 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 },
    106	{ 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 },
    107	{ 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 },
    108	{ 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 },
    109	{ 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 },
    110	{ 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 },
    111	{ 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 },
    112	{ 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 },
    113	{ 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 },
    114	{ 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 },
    115	{ 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 },
    116	{ 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 },
    117	{ 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 },
    118	{ 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 },
    119	{ 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 },
    120	{ 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 },
    121	{ 0x0, "Core", "", 0, 0, 0, NULL, 0 },
    122	{ 0x0, "CPU", "", 0, 0, 0, NULL, 0 },
    123	{ 0x0, "APIC", "", 0, 0, 0, NULL, 0 },
    124	{ 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 },
    125	{ 0x0, "Die", "", 0, 0, 0, NULL, 0 },
    126	{ 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
    127	{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
    128	{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
    129};
    130
    131#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
    132#define	BIC_USEC	(1ULL << 0)
    133#define	BIC_TOD		(1ULL << 1)
    134#define	BIC_Package	(1ULL << 2)
    135#define	BIC_Node	(1ULL << 3)
    136#define	BIC_Avg_MHz	(1ULL << 4)
    137#define	BIC_Busy	(1ULL << 5)
    138#define	BIC_Bzy_MHz	(1ULL << 6)
    139#define	BIC_TSC_MHz	(1ULL << 7)
    140#define	BIC_IRQ		(1ULL << 8)
    141#define	BIC_SMI		(1ULL << 9)
    142#define	BIC_sysfs	(1ULL << 10)
    143#define	BIC_CPU_c1	(1ULL << 11)
    144#define	BIC_CPU_c3	(1ULL << 12)
    145#define	BIC_CPU_c6	(1ULL << 13)
    146#define	BIC_CPU_c7	(1ULL << 14)
    147#define	BIC_ThreadC	(1ULL << 15)
    148#define	BIC_CoreTmp	(1ULL << 16)
    149#define	BIC_CoreCnt	(1ULL << 17)
    150#define	BIC_PkgTmp	(1ULL << 18)
    151#define	BIC_GFX_rc6	(1ULL << 19)
    152#define	BIC_GFXMHz	(1ULL << 20)
    153#define	BIC_Pkgpc2	(1ULL << 21)
    154#define	BIC_Pkgpc3	(1ULL << 22)
    155#define	BIC_Pkgpc6	(1ULL << 23)
    156#define	BIC_Pkgpc7	(1ULL << 24)
    157#define	BIC_Pkgpc8	(1ULL << 25)
    158#define	BIC_Pkgpc9	(1ULL << 26)
    159#define	BIC_Pkgpc10	(1ULL << 27)
    160#define BIC_CPU_LPI	(1ULL << 28)
    161#define BIC_SYS_LPI	(1ULL << 29)
    162#define	BIC_PkgWatt	(1ULL << 30)
    163#define	BIC_CorWatt	(1ULL << 31)
    164#define	BIC_GFXWatt	(1ULL << 32)
    165#define	BIC_PkgCnt	(1ULL << 33)
    166#define	BIC_RAMWatt	(1ULL << 34)
    167#define	BIC_PKG__	(1ULL << 35)
    168#define	BIC_RAM__	(1ULL << 36)
    169#define	BIC_Pkg_J	(1ULL << 37)
    170#define	BIC_Cor_J	(1ULL << 38)
    171#define	BIC_GFX_J	(1ULL << 39)
    172#define	BIC_RAM_J	(1ULL << 40)
    173#define	BIC_Mod_c6	(1ULL << 41)
    174#define	BIC_Totl_c0	(1ULL << 42)
    175#define	BIC_Any_c0	(1ULL << 43)
    176#define	BIC_GFX_c0	(1ULL << 44)
    177#define	BIC_CPUGFX	(1ULL << 45)
    178#define	BIC_Core	(1ULL << 46)
    179#define	BIC_CPU		(1ULL << 47)
    180#define	BIC_APIC	(1ULL << 48)
    181#define	BIC_X2APIC	(1ULL << 49)
    182#define	BIC_Die		(1ULL << 50)
    183#define	BIC_GFXACTMHz	(1ULL << 51)
    184#define	BIC_IPC		(1ULL << 52)
    185#define	BIC_CORE_THROT_CNT	(1ULL << 53)
    186
    187#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
    188#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
    189#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
    190#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
    191#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
    192
    193#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
    194
    195unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
    196unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
    197
    198#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
    199#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
    200#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
    201#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
    202#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
    203#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
    204
    205char *proc_stat = "/proc/stat";
    206FILE *outf;
    207int *fd_percpu;
    208int *fd_instr_count_percpu;
    209struct timeval interval_tv = { 5, 0 };
    210struct timespec interval_ts = { 5, 0 };
    211
    212/* Save original CPU model */
    213unsigned int model_orig;
    214
    215unsigned int num_iterations;
    216unsigned int header_iterations;
    217unsigned int debug;
    218unsigned int quiet;
    219unsigned int shown;
    220unsigned int sums_need_wide_columns;
    221unsigned int rapl_joules;
    222unsigned int summary_only;
    223unsigned int list_header_only;
    224unsigned int dump_only;
    225unsigned int do_snb_cstates;
    226unsigned int do_knl_cstates;
    227unsigned int do_slm_cstates;
    228unsigned int use_c1_residency_msr;
    229unsigned int has_aperf;
    230unsigned int has_epb;
    231unsigned int do_irtl_snb;
    232unsigned int do_irtl_hsw;
    233unsigned int units = 1000000;	/* MHz etc */
    234unsigned int genuine_intel;
    235unsigned int authentic_amd;
    236unsigned int hygon_genuine;
    237unsigned int max_level, max_extended_level;
    238unsigned int has_invariant_tsc;
    239unsigned int do_nhm_platform_info;
    240unsigned int no_MSR_MISC_PWR_MGMT;
    241unsigned int aperf_mperf_multiplier = 1;
    242double bclk;
    243double base_hz;
    244unsigned int has_base_hz;
    245double tsc_tweak = 1.0;
    246unsigned int show_pkg_only;
    247unsigned int show_core_only;
    248char *output_buffer, *outp;
    249unsigned int do_rapl;
    250unsigned int do_dts;
    251unsigned int do_ptm;
    252unsigned int do_ipc;
    253unsigned long long gfx_cur_rc6_ms;
    254unsigned long long cpuidle_cur_cpu_lpi_us;
    255unsigned long long cpuidle_cur_sys_lpi_us;
    256unsigned int gfx_cur_mhz;
    257unsigned int gfx_act_mhz;
    258unsigned int tj_max;
    259unsigned int tj_max_override;
    260int tcc_offset_bits;
    261double rapl_power_units, rapl_time_units;
    262double rapl_dram_energy_units, rapl_energy_units;
    263double rapl_joule_counter_range;
    264unsigned int do_core_perf_limit_reasons;
    265unsigned int has_automatic_cstate_conversion;
    266unsigned int dis_cstate_prewake;
    267unsigned int do_gfx_perf_limit_reasons;
    268unsigned int do_ring_perf_limit_reasons;
    269unsigned int crystal_hz;
    270unsigned long long tsc_hz;
    271int base_cpu;
    272double discover_bclk(unsigned int family, unsigned int model);
    273unsigned int has_hwp;		/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
    274			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
    275unsigned int has_hwp_notify;	/* IA32_HWP_INTERRUPT */
    276unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
    277unsigned int has_hwp_epp;	/* IA32_HWP_REQUEST[bits 31:24] */
    278unsigned int has_hwp_pkg;	/* IA32_HWP_REQUEST_PKG */
    279unsigned int has_misc_feature_control;
    280unsigned int first_counter_read = 1;
    281int ignore_stdin;
    282
    283#define RAPL_PKG		(1 << 0)
    284					/* 0x610 MSR_PKG_POWER_LIMIT */
    285					/* 0x611 MSR_PKG_ENERGY_STATUS */
    286#define RAPL_PKG_PERF_STATUS	(1 << 1)
    287					/* 0x613 MSR_PKG_PERF_STATUS */
    288#define RAPL_PKG_POWER_INFO	(1 << 2)
    289					/* 0x614 MSR_PKG_POWER_INFO */
    290
    291#define RAPL_DRAM		(1 << 3)
    292					/* 0x618 MSR_DRAM_POWER_LIMIT */
    293					/* 0x619 MSR_DRAM_ENERGY_STATUS */
    294#define RAPL_DRAM_PERF_STATUS	(1 << 4)
    295					/* 0x61b MSR_DRAM_PERF_STATUS */
    296#define RAPL_DRAM_POWER_INFO	(1 << 5)
    297					/* 0x61c MSR_DRAM_POWER_INFO */
    298
    299#define RAPL_CORES_POWER_LIMIT	(1 << 6)
    300					/* 0x638 MSR_PP0_POWER_LIMIT */
    301#define RAPL_CORE_POLICY	(1 << 7)
    302					/* 0x63a MSR_PP0_POLICY */
    303
    304#define RAPL_GFX		(1 << 8)
    305					/* 0x640 MSR_PP1_POWER_LIMIT */
    306					/* 0x641 MSR_PP1_ENERGY_STATUS */
    307					/* 0x642 MSR_PP1_POLICY */
    308
    309#define RAPL_CORES_ENERGY_STATUS	(1 << 9)
    310					/* 0x639 MSR_PP0_ENERGY_STATUS */
    311#define RAPL_PER_CORE_ENERGY	(1 << 10)
    312					/* Indicates cores energy collection is per-core,
    313					 * not per-package. */
    314#define RAPL_AMD_F17H		(1 << 11)
    315					/* 0xc0010299 MSR_RAPL_PWR_UNIT */
    316					/* 0xc001029a MSR_CORE_ENERGY_STAT */
    317					/* 0xc001029b MSR_PKG_ENERGY_STAT */
    318#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
    319#define	TJMAX_DEFAULT	100
    320
    321/* MSRs that are not yet in the kernel-provided header. */
    322#define MSR_RAPL_PWR_UNIT	0xc0010299
    323#define MSR_CORE_ENERGY_STAT	0xc001029a
    324#define MSR_PKG_ENERGY_STAT	0xc001029b
    325
    326#define MAX(a, b) ((a) > (b) ? (a) : (b))
    327
    328int backwards_count;
    329char *progname;
    330
    331#define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
    332cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
    333size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
    334#define MAX_ADDED_COUNTERS 8
    335#define MAX_ADDED_THREAD_COUNTERS 24
    336#define BITMASK_SIZE 32
    337
    338struct thread_data {
    339	struct timeval tv_begin;
    340	struct timeval tv_end;
    341	struct timeval tv_delta;
    342	unsigned long long tsc;
    343	unsigned long long aperf;
    344	unsigned long long mperf;
    345	unsigned long long c1;
    346	unsigned long long instr_count;
    347	unsigned long long irq_count;
    348	unsigned int smi_count;
    349	unsigned int cpu_id;
    350	unsigned int apic_id;
    351	unsigned int x2apic_id;
    352	unsigned int flags;
    353	bool is_atom;
    354#define CPU_IS_FIRST_THREAD_IN_CORE	0x2
    355#define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
    356	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
    357} *thread_even, *thread_odd;
    358
    359struct core_data {
    360	unsigned long long c3;
    361	unsigned long long c6;
    362	unsigned long long c7;
    363	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
    364	unsigned int core_temp_c;
    365	unsigned int core_energy;	/* MSR_CORE_ENERGY_STAT */
    366	unsigned int core_id;
    367	unsigned long long core_throt_cnt;
    368	unsigned long long counter[MAX_ADDED_COUNTERS];
    369} *core_even, *core_odd;
    370
    371struct pkg_data {
    372	unsigned long long pc2;
    373	unsigned long long pc3;
    374	unsigned long long pc6;
    375	unsigned long long pc7;
    376	unsigned long long pc8;
    377	unsigned long long pc9;
    378	unsigned long long pc10;
    379	unsigned long long cpu_lpi;
    380	unsigned long long sys_lpi;
    381	unsigned long long pkg_wtd_core_c0;
    382	unsigned long long pkg_any_core_c0;
    383	unsigned long long pkg_any_gfxe_c0;
    384	unsigned long long pkg_both_core_gfxe_c0;
    385	long long gfx_rc6_ms;
    386	unsigned int gfx_mhz;
    387	unsigned int gfx_act_mhz;
    388	unsigned int package_id;
    389	unsigned long long energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
    390	unsigned long long energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
    391	unsigned long long energy_cores;	/* MSR_PP0_ENERGY_STATUS */
    392	unsigned long long energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
    393	unsigned long long rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
    394	unsigned long long rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
    395	unsigned int pkg_temp_c;
    396	unsigned long long counter[MAX_ADDED_COUNTERS];
    397} *package_even, *package_odd;
    398
    399#define ODD_COUNTERS thread_odd, core_odd, package_odd
    400#define EVEN_COUNTERS thread_even, core_even, package_even
    401
    402#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)	      \
    403	((thread_base) +						      \
    404	 ((pkg_no) *							      \
    405	  topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
    406	 ((node_no) * topo.cores_per_node * topo.threads_per_core) +	      \
    407	 ((core_no) * topo.threads_per_core) +				      \
    408	 (thread_no))
    409
    410#define GET_CORE(core_base, core_no, node_no, pkg_no)			\
    411	((core_base) +							\
    412	 ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +	\
    413	 ((node_no) * topo.cores_per_node) +				\
    414	 (core_no))
    415
    416#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
    417
    418/*
    419 * The accumulated sum of MSR is defined as a monotonic
    420 * increasing MSR, it will be accumulated periodically,
    421 * despite its register's bit width.
    422 */
    423enum {
    424	IDX_PKG_ENERGY,
    425	IDX_DRAM_ENERGY,
    426	IDX_PP0_ENERGY,
    427	IDX_PP1_ENERGY,
    428	IDX_PKG_PERF,
    429	IDX_DRAM_PERF,
    430	IDX_COUNT,
    431};
    432
    433int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
    434
    435struct msr_sum_array {
    436	/* get_msr_sum() = sum + (get_msr() - last) */
    437	struct {
    438		/*The accumulated MSR value is updated by the timer */
    439		unsigned long long sum;
    440		/*The MSR footprint recorded in last timer */
    441		unsigned long long last;
    442	} entries[IDX_COUNT];
    443};
    444
    445/* The percpu MSR sum array.*/
    446struct msr_sum_array *per_cpu_msr_sum;
    447
    448off_t idx_to_offset(int idx)
    449{
    450	off_t offset;
    451
    452	switch (idx) {
    453	case IDX_PKG_ENERGY:
    454		if (do_rapl & RAPL_AMD_F17H)
    455			offset = MSR_PKG_ENERGY_STAT;
    456		else
    457			offset = MSR_PKG_ENERGY_STATUS;
    458		break;
    459	case IDX_DRAM_ENERGY:
    460		offset = MSR_DRAM_ENERGY_STATUS;
    461		break;
    462	case IDX_PP0_ENERGY:
    463		offset = MSR_PP0_ENERGY_STATUS;
    464		break;
    465	case IDX_PP1_ENERGY:
    466		offset = MSR_PP1_ENERGY_STATUS;
    467		break;
    468	case IDX_PKG_PERF:
    469		offset = MSR_PKG_PERF_STATUS;
    470		break;
    471	case IDX_DRAM_PERF:
    472		offset = MSR_DRAM_PERF_STATUS;
    473		break;
    474	default:
    475		offset = -1;
    476	}
    477	return offset;
    478}
    479
    480int offset_to_idx(off_t offset)
    481{
    482	int idx;
    483
    484	switch (offset) {
    485	case MSR_PKG_ENERGY_STATUS:
    486	case MSR_PKG_ENERGY_STAT:
    487		idx = IDX_PKG_ENERGY;
    488		break;
    489	case MSR_DRAM_ENERGY_STATUS:
    490		idx = IDX_DRAM_ENERGY;
    491		break;
    492	case MSR_PP0_ENERGY_STATUS:
    493		idx = IDX_PP0_ENERGY;
    494		break;
    495	case MSR_PP1_ENERGY_STATUS:
    496		idx = IDX_PP1_ENERGY;
    497		break;
    498	case MSR_PKG_PERF_STATUS:
    499		idx = IDX_PKG_PERF;
    500		break;
    501	case MSR_DRAM_PERF_STATUS:
    502		idx = IDX_DRAM_PERF;
    503		break;
    504	default:
    505		idx = -1;
    506	}
    507	return idx;
    508}
    509
    510int idx_valid(int idx)
    511{
    512	switch (idx) {
    513	case IDX_PKG_ENERGY:
    514		return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
    515	case IDX_DRAM_ENERGY:
    516		return do_rapl & RAPL_DRAM;
    517	case IDX_PP0_ENERGY:
    518		return do_rapl & RAPL_CORES_ENERGY_STATUS;
    519	case IDX_PP1_ENERGY:
    520		return do_rapl & RAPL_GFX;
    521	case IDX_PKG_PERF:
    522		return do_rapl & RAPL_PKG_PERF_STATUS;
    523	case IDX_DRAM_PERF:
    524		return do_rapl & RAPL_DRAM_PERF_STATUS;
    525	default:
    526		return 0;
    527	}
    528}
    529
    530struct sys_counters {
    531	unsigned int added_thread_counters;
    532	unsigned int added_core_counters;
    533	unsigned int added_package_counters;
    534	struct msr_counter *tp;
    535	struct msr_counter *cp;
    536	struct msr_counter *pp;
    537} sys;
    538
    539struct system_summary {
    540	struct thread_data threads;
    541	struct core_data cores;
    542	struct pkg_data packages;
    543} average;
    544
    545struct cpu_topology {
    546	int physical_package_id;
    547	int die_id;
    548	int logical_cpu_id;
    549	int physical_node_id;
    550	int logical_node_id;	/* 0-based count within the package */
    551	int physical_core_id;
    552	int thread_id;
    553	cpu_set_t *put_ids;	/* Processing Unit/Thread IDs */
    554} *cpus;
    555
    556struct topo_params {
    557	int num_packages;
    558	int num_die;
    559	int num_cpus;
    560	int num_cores;
    561	int max_cpu_num;
    562	int max_node_num;
    563	int nodes_per_pkg;
    564	int cores_per_node;
    565	int threads_per_core;
    566} topo;
    567
    568struct timeval tv_even, tv_odd, tv_delta;
    569
    570int *irq_column_2_cpu;		/* /proc/interrupts column numbers */
    571int *irqs_per_cpu;		/* indexed by cpu_num */
    572
    573void setup_all_buffers(void);
    574
    575char *sys_lpi_file;
    576char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
    577char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
    578
    579int cpu_is_not_present(int cpu)
    580{
    581	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
    582}
    583
    584/*
    585 * run func(thread, core, package) in topology order
    586 * skip non-present cpus
    587 */
    588
    589int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
    590		 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
    591{
    592	int retval, pkg_no, core_no, thread_no, node_no;
    593
    594	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
    595		for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
    596			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
    597				for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
    598					struct thread_data *t;
    599					struct core_data *c;
    600					struct pkg_data *p;
    601
    602					t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
    603
    604					if (cpu_is_not_present(t->cpu_id))
    605						continue;
    606
    607					c = GET_CORE(core_base, core_no, node_no, pkg_no);
    608					p = GET_PKG(pkg_base, pkg_no);
    609
    610					retval = func(t, c, p);
    611					if (retval)
    612						return retval;
    613				}
    614			}
    615		}
    616	}
    617	return 0;
    618}
    619
    620int cpu_migrate(int cpu)
    621{
    622	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
    623	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
    624	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
    625		return -1;
    626	else
    627		return 0;
    628}
    629
    630int get_msr_fd(int cpu)
    631{
    632	char pathname[32];
    633	int fd;
    634
    635	fd = fd_percpu[cpu];
    636
    637	if (fd)
    638		return fd;
    639
    640	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
    641	fd = open(pathname, O_RDONLY);
    642	if (fd < 0)
    643		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
    644
    645	fd_percpu[cpu] = fd;
    646
    647	return fd;
    648}
    649
    650static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
    651{
    652	return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
    653}
    654
    655static int perf_instr_count_open(int cpu_num)
    656{
    657	struct perf_event_attr pea;
    658	int fd;
    659
    660	memset(&pea, 0, sizeof(struct perf_event_attr));
    661	pea.type = PERF_TYPE_HARDWARE;
    662	pea.size = sizeof(struct perf_event_attr);
    663	pea.config = PERF_COUNT_HW_INSTRUCTIONS;
    664
    665	/* counter for cpu_num, including user + kernel and all processes */
    666	fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
    667	if (fd == -1) {
    668		warn("cpu%d: perf instruction counter", cpu_num);
    669		BIC_NOT_PRESENT(BIC_IPC);
    670	}
    671
    672	return fd;
    673}
    674
    675int get_instr_count_fd(int cpu)
    676{
    677	if (fd_instr_count_percpu[cpu])
    678		return fd_instr_count_percpu[cpu];
    679
    680	fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
    681
    682	return fd_instr_count_percpu[cpu];
    683}
    684
    685int get_msr(int cpu, off_t offset, unsigned long long *msr)
    686{
    687	ssize_t retval;
    688
    689	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
    690
    691	if (retval != sizeof *msr)
    692		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
    693
    694	return 0;
    695}
    696
    697#define MAX_DEFERRED 16
    698char *deferred_add_names[MAX_DEFERRED];
    699char *deferred_skip_names[MAX_DEFERRED];
    700int deferred_add_index;
    701int deferred_skip_index;
    702
    703/*
    704 * HIDE_LIST - hide this list of counters, show the rest [default]
    705 * SHOW_LIST - show this list of counters, hide the rest
    706 */
    707enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
    708
    709void help(void)
    710{
    711	fprintf(outf,
    712		"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
    713		"\n"
    714		"Turbostat forks the specified COMMAND and prints statistics\n"
    715		"when COMMAND completes.\n"
    716		"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
    717		"to print statistics, until interrupted.\n"
    718		"  -a, --add	add a counter\n"
    719		"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
    720		"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
    721		"		  {core | package | j,k,l..m,n-p }\n"
    722		"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
    723		"  -D, --Dump	displays the raw counter values\n"
    724		"  -e, --enable	[all | column]\n"
    725		"		shows all or the specified disabled column\n"
    726		"  -H, --hide [column|column,column,...]\n"
    727		"		hide the specified column(s)\n"
    728		"  -i, --interval sec.subsec\n"
    729		"		Override default 5-second measurement interval\n"
    730		"  -J, --Joules	displays energy in Joules instead of Watts\n"
    731		"  -l, --list	list column headers only\n"
    732		"  -n, --num_iterations num\n"
    733		"		number of the measurement iterations\n"
    734		"  -N, --header_iterations num\n"
    735		"		print header every num iterations\n"
    736		"  -o, --out file\n"
    737		"		create or truncate \"file\" for all output\n"
    738		"  -q, --quiet	skip decoding system configuration header\n"
    739		"  -s, --show [column|column,column,...]\n"
    740		"		show only the specified column(s)\n"
    741		"  -S, --Summary\n"
    742		"		limits output to 1-line system summary per interval\n"
    743		"  -T, --TCC temperature\n"
    744		"		sets the Thermal Control Circuit temperature in\n"
    745		"		  degrees Celsius\n"
    746		"  -h, --help	print this help message\n"
    747		"  -v, --version	print version information\n" "\n" "For more help, run \"man turbostat\"\n");
    748}
    749
    750/*
    751 * bic_lookup
    752 * for all the strings in comma separate name_list,
    753 * set the approprate bit in return value.
    754 */
    755unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
    756{
    757	unsigned int i;
    758	unsigned long long retval = 0;
    759
    760	while (name_list) {
    761		char *comma;
    762
    763		comma = strchr(name_list, ',');
    764
    765		if (comma)
    766			*comma = '\0';
    767
    768		for (i = 0; i < MAX_BIC; ++i) {
    769			if (!strcmp(name_list, bic[i].name)) {
    770				retval |= (1ULL << i);
    771				break;
    772			}
    773			if (!strcmp(name_list, "all")) {
    774				retval |= ~0;
    775				break;
    776			} else if (!strcmp(name_list, "topology")) {
    777				retval |= BIC_TOPOLOGY;
    778				break;
    779			} else if (!strcmp(name_list, "power")) {
    780				retval |= BIC_THERMAL_PWR;
    781				break;
    782			} else if (!strcmp(name_list, "idle")) {
    783				retval |= BIC_IDLE;
    784				break;
    785			} else if (!strcmp(name_list, "frequency")) {
    786				retval |= BIC_FREQUENCY;
    787				break;
    788			} else if (!strcmp(name_list, "other")) {
    789				retval |= BIC_OTHER;
    790				break;
    791			}
    792
    793		}
    794		if (i == MAX_BIC) {
    795			if (mode == SHOW_LIST) {
    796				deferred_add_names[deferred_add_index++] = name_list;
    797				if (deferred_add_index >= MAX_DEFERRED) {
    798					fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
    799						MAX_DEFERRED, name_list);
    800					help();
    801					exit(1);
    802				}
    803			} else {
    804				deferred_skip_names[deferred_skip_index++] = name_list;
    805				if (debug)
    806					fprintf(stderr, "deferred \"%s\"\n", name_list);
    807				if (deferred_skip_index >= MAX_DEFERRED) {
    808					fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
    809						MAX_DEFERRED, name_list);
    810					help();
    811					exit(1);
    812				}
    813			}
    814		}
    815
    816		name_list = comma;
    817		if (name_list)
    818			name_list++;
    819
    820	}
    821	return retval;
    822}
    823
    824void print_header(char *delim)
    825{
    826	struct msr_counter *mp;
    827	int printed = 0;
    828
    829	if (DO_BIC(BIC_USEC))
    830		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
    831	if (DO_BIC(BIC_TOD))
    832		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
    833	if (DO_BIC(BIC_Package))
    834		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
    835	if (DO_BIC(BIC_Die))
    836		outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
    837	if (DO_BIC(BIC_Node))
    838		outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
    839	if (DO_BIC(BIC_Core))
    840		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
    841	if (DO_BIC(BIC_CPU))
    842		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
    843	if (DO_BIC(BIC_APIC))
    844		outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
    845	if (DO_BIC(BIC_X2APIC))
    846		outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
    847	if (DO_BIC(BIC_Avg_MHz))
    848		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
    849	if (DO_BIC(BIC_Busy))
    850		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
    851	if (DO_BIC(BIC_Bzy_MHz))
    852		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
    853	if (DO_BIC(BIC_TSC_MHz))
    854		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
    855
    856	if (DO_BIC(BIC_IPC))
    857		outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
    858
    859	if (DO_BIC(BIC_IRQ)) {
    860		if (sums_need_wide_columns)
    861			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
    862		else
    863			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
    864	}
    865
    866	if (DO_BIC(BIC_SMI))
    867		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
    868
    869	for (mp = sys.tp; mp; mp = mp->next) {
    870
    871		if (mp->format == FORMAT_RAW) {
    872			if (mp->width == 64)
    873				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
    874			else
    875				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
    876		} else {
    877			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
    878				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
    879			else
    880				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
    881		}
    882	}
    883
    884	if (DO_BIC(BIC_CPU_c1))
    885		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
    886	if (DO_BIC(BIC_CPU_c3))
    887		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
    888	if (DO_BIC(BIC_CPU_c6))
    889		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
    890	if (DO_BIC(BIC_CPU_c7))
    891		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
    892
    893	if (DO_BIC(BIC_Mod_c6))
    894		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
    895
    896	if (DO_BIC(BIC_CoreTmp))
    897		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
    898
    899	if (DO_BIC(BIC_CORE_THROT_CNT))
    900		outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
    901
    902	if (do_rapl && !rapl_joules) {
    903		if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
    904			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
    905	} else if (do_rapl && rapl_joules) {
    906		if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
    907			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
    908	}
    909
    910	for (mp = sys.cp; mp; mp = mp->next) {
    911		if (mp->format == FORMAT_RAW) {
    912			if (mp->width == 64)
    913				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
    914			else
    915				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
    916		} else {
    917			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
    918				outp += sprintf(outp, "%s%8s", delim, mp->name);
    919			else
    920				outp += sprintf(outp, "%s%s", delim, mp->name);
    921		}
    922	}
    923
    924	if (DO_BIC(BIC_PkgTmp))
    925		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
    926
    927	if (DO_BIC(BIC_GFX_rc6))
    928		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
    929
    930	if (DO_BIC(BIC_GFXMHz))
    931		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
    932
    933	if (DO_BIC(BIC_GFXACTMHz))
    934		outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
    935
    936	if (DO_BIC(BIC_Totl_c0))
    937		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
    938	if (DO_BIC(BIC_Any_c0))
    939		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
    940	if (DO_BIC(BIC_GFX_c0))
    941		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
    942	if (DO_BIC(BIC_CPUGFX))
    943		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
    944
    945	if (DO_BIC(BIC_Pkgpc2))
    946		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
    947	if (DO_BIC(BIC_Pkgpc3))
    948		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
    949	if (DO_BIC(BIC_Pkgpc6))
    950		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
    951	if (DO_BIC(BIC_Pkgpc7))
    952		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
    953	if (DO_BIC(BIC_Pkgpc8))
    954		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
    955	if (DO_BIC(BIC_Pkgpc9))
    956		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
    957	if (DO_BIC(BIC_Pkgpc10))
    958		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
    959	if (DO_BIC(BIC_CPU_LPI))
    960		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
    961	if (DO_BIC(BIC_SYS_LPI))
    962		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
    963
    964	if (do_rapl && !rapl_joules) {
    965		if (DO_BIC(BIC_PkgWatt))
    966			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
    967		if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
    968			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
    969		if (DO_BIC(BIC_GFXWatt))
    970			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
    971		if (DO_BIC(BIC_RAMWatt))
    972			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
    973		if (DO_BIC(BIC_PKG__))
    974			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
    975		if (DO_BIC(BIC_RAM__))
    976			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
    977	} else if (do_rapl && rapl_joules) {
    978		if (DO_BIC(BIC_Pkg_J))
    979			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
    980		if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
    981			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
    982		if (DO_BIC(BIC_GFX_J))
    983			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
    984		if (DO_BIC(BIC_RAM_J))
    985			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
    986		if (DO_BIC(BIC_PKG__))
    987			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
    988		if (DO_BIC(BIC_RAM__))
    989			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
    990	}
    991	for (mp = sys.pp; mp; mp = mp->next) {
    992		if (mp->format == FORMAT_RAW) {
    993			if (mp->width == 64)
    994				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
    995			else
    996				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
    997		} else {
    998			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
    999				outp += sprintf(outp, "%s%8s", delim, mp->name);
   1000			else
   1001				outp += sprintf(outp, "%s%s", delim, mp->name);
   1002		}
   1003	}
   1004
   1005	outp += sprintf(outp, "\n");
   1006}
   1007
   1008int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   1009{
   1010	int i;
   1011	struct msr_counter *mp;
   1012
   1013	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
   1014
   1015	if (t) {
   1016		outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
   1017		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
   1018		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
   1019		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
   1020		outp += sprintf(outp, "c1: %016llX\n", t->c1);
   1021
   1022		if (DO_BIC(BIC_IPC))
   1023			outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
   1024
   1025		if (DO_BIC(BIC_IRQ))
   1026			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
   1027		if (DO_BIC(BIC_SMI))
   1028			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
   1029
   1030		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
   1031			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
   1032		}
   1033	}
   1034
   1035	if (c) {
   1036		outp += sprintf(outp, "core: %d\n", c->core_id);
   1037		outp += sprintf(outp, "c3: %016llX\n", c->c3);
   1038		outp += sprintf(outp, "c6: %016llX\n", c->c6);
   1039		outp += sprintf(outp, "c7: %016llX\n", c->c7);
   1040		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
   1041		outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
   1042		outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
   1043
   1044		for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
   1045			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
   1046		}
   1047		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
   1048	}
   1049
   1050	if (p) {
   1051		outp += sprintf(outp, "package: %d\n", p->package_id);
   1052
   1053		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
   1054		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
   1055		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
   1056		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
   1057
   1058		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
   1059		if (DO_BIC(BIC_Pkgpc3))
   1060			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
   1061		if (DO_BIC(BIC_Pkgpc6))
   1062			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
   1063		if (DO_BIC(BIC_Pkgpc7))
   1064			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
   1065		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
   1066		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
   1067		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
   1068		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
   1069		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
   1070		outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
   1071		outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
   1072		outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
   1073		outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
   1074		outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
   1075		outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
   1076		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
   1077
   1078		for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
   1079			outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
   1080		}
   1081	}
   1082
   1083	outp += sprintf(outp, "\n");
   1084
   1085	return 0;
   1086}
   1087
   1088/*
   1089 * column formatting convention & formats
   1090 */
   1091int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   1092{
   1093	double interval_float, tsc;
   1094	char *fmt8;
   1095	int i;
   1096	struct msr_counter *mp;
   1097	char *delim = "\t";
   1098	int printed = 0;
   1099
   1100	/* if showing only 1st thread in core and this isn't one, bail out */
   1101	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
   1102		return 0;
   1103
   1104	/* if showing only 1st thread in pkg and this isn't one, bail out */
   1105	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   1106		return 0;
   1107
   1108	/*if not summary line and --cpu is used */
   1109	if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
   1110		return 0;
   1111
   1112	if (DO_BIC(BIC_USEC)) {
   1113		/* on each row, print how many usec each timestamp took to gather */
   1114		struct timeval tv;
   1115
   1116		timersub(&t->tv_end, &t->tv_begin, &tv);
   1117		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
   1118	}
   1119
   1120	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
   1121	if (DO_BIC(BIC_TOD))
   1122		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
   1123
   1124	interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
   1125
   1126	tsc = t->tsc * tsc_tweak;
   1127
   1128	/* topo columns, print blanks on 1st (average) line */
   1129	if (t == &average.threads) {
   1130		if (DO_BIC(BIC_Package))
   1131			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1132		if (DO_BIC(BIC_Die))
   1133			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1134		if (DO_BIC(BIC_Node))
   1135			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1136		if (DO_BIC(BIC_Core))
   1137			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1138		if (DO_BIC(BIC_CPU))
   1139			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1140		if (DO_BIC(BIC_APIC))
   1141			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1142		if (DO_BIC(BIC_X2APIC))
   1143			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1144	} else {
   1145		if (DO_BIC(BIC_Package)) {
   1146			if (p)
   1147				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
   1148			else
   1149				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1150		}
   1151		if (DO_BIC(BIC_Die)) {
   1152			if (c)
   1153				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
   1154			else
   1155				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1156		}
   1157		if (DO_BIC(BIC_Node)) {
   1158			if (t)
   1159				outp += sprintf(outp, "%s%d",
   1160						(printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
   1161			else
   1162				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1163		}
   1164		if (DO_BIC(BIC_Core)) {
   1165			if (c)
   1166				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
   1167			else
   1168				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
   1169		}
   1170		if (DO_BIC(BIC_CPU))
   1171			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
   1172		if (DO_BIC(BIC_APIC))
   1173			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
   1174		if (DO_BIC(BIC_X2APIC))
   1175			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
   1176	}
   1177
   1178	if (DO_BIC(BIC_Avg_MHz))
   1179		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
   1180
   1181	if (DO_BIC(BIC_Busy))
   1182		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
   1183
   1184	if (DO_BIC(BIC_Bzy_MHz)) {
   1185		if (has_base_hz)
   1186			outp +=
   1187			    sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
   1188		else
   1189			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
   1190					tsc / units * t->aperf / t->mperf / interval_float);
   1191	}
   1192
   1193	if (DO_BIC(BIC_TSC_MHz))
   1194		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
   1195
   1196	if (DO_BIC(BIC_IPC))
   1197		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
   1198
   1199	/* IRQ */
   1200	if (DO_BIC(BIC_IRQ)) {
   1201		if (sums_need_wide_columns)
   1202			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
   1203		else
   1204			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
   1205	}
   1206
   1207	/* SMI */
   1208	if (DO_BIC(BIC_SMI))
   1209		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
   1210
   1211	/* Added counters */
   1212	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
   1213		if (mp->format == FORMAT_RAW) {
   1214			if (mp->width == 32)
   1215				outp +=
   1216				    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
   1217			else
   1218				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
   1219		} else if (mp->format == FORMAT_DELTA) {
   1220			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
   1221				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
   1222			else
   1223				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
   1224		} else if (mp->format == FORMAT_PERCENT) {
   1225			if (mp->type == COUNTER_USEC)
   1226				outp +=
   1227				    sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
   1228					    t->counter[i] / interval_float / 10000);
   1229			else
   1230				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
   1231		}
   1232	}
   1233
   1234	/* C1 */
   1235	if (DO_BIC(BIC_CPU_c1))
   1236		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
   1237
   1238	/* print per-core data only for 1st thread in core */
   1239	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
   1240		goto done;
   1241
   1242	if (DO_BIC(BIC_CPU_c3))
   1243		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
   1244	if (DO_BIC(BIC_CPU_c6))
   1245		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
   1246	if (DO_BIC(BIC_CPU_c7))
   1247		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
   1248
   1249	/* Mod%c6 */
   1250	if (DO_BIC(BIC_Mod_c6))
   1251		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
   1252
   1253	if (DO_BIC(BIC_CoreTmp))
   1254		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
   1255
   1256	/* Core throttle count */
   1257	if (DO_BIC(BIC_CORE_THROT_CNT))
   1258		outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
   1259
   1260	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
   1261		if (mp->format == FORMAT_RAW) {
   1262			if (mp->width == 32)
   1263				outp +=
   1264				    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
   1265			else
   1266				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
   1267		} else if (mp->format == FORMAT_DELTA) {
   1268			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
   1269				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
   1270			else
   1271				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
   1272		} else if (mp->format == FORMAT_PERCENT) {
   1273			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
   1274		}
   1275	}
   1276
   1277	fmt8 = "%s%.2f";
   1278
   1279	if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
   1280		outp +=
   1281		    sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
   1282	if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
   1283		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
   1284
   1285	/* print per-package data only for 1st core in package */
   1286	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   1287		goto done;
   1288
   1289	/* PkgTmp */
   1290	if (DO_BIC(BIC_PkgTmp))
   1291		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
   1292
   1293	/* GFXrc6 */
   1294	if (DO_BIC(BIC_GFX_rc6)) {
   1295		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
   1296			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
   1297		} else {
   1298			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
   1299					p->gfx_rc6_ms / 10.0 / interval_float);
   1300		}
   1301	}
   1302
   1303	/* GFXMHz */
   1304	if (DO_BIC(BIC_GFXMHz))
   1305		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
   1306
   1307	/* GFXACTMHz */
   1308	if (DO_BIC(BIC_GFXACTMHz))
   1309		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
   1310
   1311	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
   1312	if (DO_BIC(BIC_Totl_c0))
   1313		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
   1314	if (DO_BIC(BIC_Any_c0))
   1315		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
   1316	if (DO_BIC(BIC_GFX_c0))
   1317		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
   1318	if (DO_BIC(BIC_CPUGFX))
   1319		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
   1320
   1321	if (DO_BIC(BIC_Pkgpc2))
   1322		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
   1323	if (DO_BIC(BIC_Pkgpc3))
   1324		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
   1325	if (DO_BIC(BIC_Pkgpc6))
   1326		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
   1327	if (DO_BIC(BIC_Pkgpc7))
   1328		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
   1329	if (DO_BIC(BIC_Pkgpc8))
   1330		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
   1331	if (DO_BIC(BIC_Pkgpc9))
   1332		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
   1333	if (DO_BIC(BIC_Pkgpc10))
   1334		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
   1335
   1336	if (DO_BIC(BIC_CPU_LPI))
   1337		outp +=
   1338		    sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
   1339	if (DO_BIC(BIC_SYS_LPI))
   1340		outp +=
   1341		    sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
   1342
   1343	if (DO_BIC(BIC_PkgWatt))
   1344		outp +=
   1345		    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
   1346
   1347	if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
   1348		outp +=
   1349		    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
   1350	if (DO_BIC(BIC_GFXWatt))
   1351		outp +=
   1352		    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
   1353	if (DO_BIC(BIC_RAMWatt))
   1354		outp +=
   1355		    sprintf(outp, fmt8, (printed++ ? delim : ""),
   1356			    p->energy_dram * rapl_dram_energy_units / interval_float);
   1357	if (DO_BIC(BIC_Pkg_J))
   1358		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
   1359	if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
   1360		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
   1361	if (DO_BIC(BIC_GFX_J))
   1362		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
   1363	if (DO_BIC(BIC_RAM_J))
   1364		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
   1365	if (DO_BIC(BIC_PKG__))
   1366		outp +=
   1367		    sprintf(outp, fmt8, (printed++ ? delim : ""),
   1368			    100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
   1369	if (DO_BIC(BIC_RAM__))
   1370		outp +=
   1371		    sprintf(outp, fmt8, (printed++ ? delim : ""),
   1372			    100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
   1373
   1374	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
   1375		if (mp->format == FORMAT_RAW) {
   1376			if (mp->width == 32)
   1377				outp +=
   1378				    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
   1379			else
   1380				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
   1381		} else if (mp->format == FORMAT_DELTA) {
   1382			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
   1383				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
   1384			else
   1385				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
   1386		} else if (mp->format == FORMAT_PERCENT) {
   1387			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
   1388		}
   1389	}
   1390
   1391done:
   1392	if (*(outp - 1) != '\n')
   1393		outp += sprintf(outp, "\n");
   1394
   1395	return 0;
   1396}
   1397
   1398void flush_output_stdout(void)
   1399{
   1400	FILE *filep;
   1401
   1402	if (outf == stderr)
   1403		filep = stdout;
   1404	else
   1405		filep = outf;
   1406
   1407	fputs(output_buffer, filep);
   1408	fflush(filep);
   1409
   1410	outp = output_buffer;
   1411}
   1412
   1413void flush_output_stderr(void)
   1414{
   1415	fputs(output_buffer, outf);
   1416	fflush(outf);
   1417	outp = output_buffer;
   1418}
   1419
   1420void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   1421{
   1422	static int count;
   1423
   1424	if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
   1425		print_header("\t");
   1426
   1427	format_counters(&average.threads, &average.cores, &average.packages);
   1428
   1429	count++;
   1430
   1431	if (summary_only)
   1432		return;
   1433
   1434	for_all_cpus(format_counters, t, c, p);
   1435}
   1436
   1437#define DELTA_WRAP32(new, old)			\
   1438	old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
   1439
   1440int delta_package(struct pkg_data *new, struct pkg_data *old)
   1441{
   1442	int i;
   1443	struct msr_counter *mp;
   1444
   1445	if (DO_BIC(BIC_Totl_c0))
   1446		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
   1447	if (DO_BIC(BIC_Any_c0))
   1448		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
   1449	if (DO_BIC(BIC_GFX_c0))
   1450		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
   1451	if (DO_BIC(BIC_CPUGFX))
   1452		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
   1453
   1454	old->pc2 = new->pc2 - old->pc2;
   1455	if (DO_BIC(BIC_Pkgpc3))
   1456		old->pc3 = new->pc3 - old->pc3;
   1457	if (DO_BIC(BIC_Pkgpc6))
   1458		old->pc6 = new->pc6 - old->pc6;
   1459	if (DO_BIC(BIC_Pkgpc7))
   1460		old->pc7 = new->pc7 - old->pc7;
   1461	old->pc8 = new->pc8 - old->pc8;
   1462	old->pc9 = new->pc9 - old->pc9;
   1463	old->pc10 = new->pc10 - old->pc10;
   1464	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
   1465	old->sys_lpi = new->sys_lpi - old->sys_lpi;
   1466	old->pkg_temp_c = new->pkg_temp_c;
   1467
   1468	/* flag an error when rc6 counter resets/wraps */
   1469	if (old->gfx_rc6_ms > new->gfx_rc6_ms)
   1470		old->gfx_rc6_ms = -1;
   1471	else
   1472		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
   1473
   1474	old->gfx_mhz = new->gfx_mhz;
   1475	old->gfx_act_mhz = new->gfx_act_mhz;
   1476
   1477	old->energy_pkg = new->energy_pkg - old->energy_pkg;
   1478	old->energy_cores = new->energy_cores - old->energy_cores;
   1479	old->energy_gfx = new->energy_gfx - old->energy_gfx;
   1480	old->energy_dram = new->energy_dram - old->energy_dram;
   1481	old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
   1482	old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
   1483
   1484	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
   1485		if (mp->format == FORMAT_RAW)
   1486			old->counter[i] = new->counter[i];
   1487		else
   1488			old->counter[i] = new->counter[i] - old->counter[i];
   1489	}
   1490
   1491	return 0;
   1492}
   1493
   1494void delta_core(struct core_data *new, struct core_data *old)
   1495{
   1496	int i;
   1497	struct msr_counter *mp;
   1498
   1499	old->c3 = new->c3 - old->c3;
   1500	old->c6 = new->c6 - old->c6;
   1501	old->c7 = new->c7 - old->c7;
   1502	old->core_temp_c = new->core_temp_c;
   1503	old->core_throt_cnt = new->core_throt_cnt;
   1504	old->mc6_us = new->mc6_us - old->mc6_us;
   1505
   1506	DELTA_WRAP32(new->core_energy, old->core_energy);
   1507
   1508	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
   1509		if (mp->format == FORMAT_RAW)
   1510			old->counter[i] = new->counter[i];
   1511		else
   1512			old->counter[i] = new->counter[i] - old->counter[i];
   1513	}
   1514}
   1515
   1516int soft_c1_residency_display(int bic)
   1517{
   1518	if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
   1519		return 0;
   1520
   1521	return DO_BIC_READ(bic);
   1522}
   1523
   1524/*
   1525 * old = new - old
   1526 */
   1527int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
   1528{
   1529	int i;
   1530	struct msr_counter *mp;
   1531
   1532	/* we run cpuid just the 1st time, copy the results */
   1533	if (DO_BIC(BIC_APIC))
   1534		new->apic_id = old->apic_id;
   1535	if (DO_BIC(BIC_X2APIC))
   1536		new->x2apic_id = old->x2apic_id;
   1537
   1538	/*
   1539	 * the timestamps from start of measurement interval are in "old"
   1540	 * the timestamp from end of measurement interval are in "new"
   1541	 * over-write old w/ new so we can print end of interval values
   1542	 */
   1543
   1544	timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
   1545	old->tv_begin = new->tv_begin;
   1546	old->tv_end = new->tv_end;
   1547
   1548	old->tsc = new->tsc - old->tsc;
   1549
   1550	/* check for TSC < 1 Mcycles over interval */
   1551	if (old->tsc < (1000 * 1000))
   1552		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
   1553		     "You can disable all c-states by booting with \"idle=poll\"\n"
   1554		     "or just the deep ones with \"processor.max_cstate=1\"");
   1555
   1556	old->c1 = new->c1 - old->c1;
   1557
   1558	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
   1559		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
   1560			old->aperf = new->aperf - old->aperf;
   1561			old->mperf = new->mperf - old->mperf;
   1562		} else {
   1563			return -1;
   1564		}
   1565	}
   1566
   1567	if (use_c1_residency_msr) {
   1568		/*
   1569		 * Some models have a dedicated C1 residency MSR,
   1570		 * which should be more accurate than the derivation below.
   1571		 */
   1572	} else {
   1573		/*
   1574		 * As counter collection is not atomic,
   1575		 * it is possible for mperf's non-halted cycles + idle states
   1576		 * to exceed TSC's all cycles: show c1 = 0% in that case.
   1577		 */
   1578		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
   1579			old->c1 = 0;
   1580		else {
   1581			/* normal case, derive c1 */
   1582			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
   1583			    - core_delta->c6 - core_delta->c7;
   1584		}
   1585	}
   1586
   1587	if (old->mperf == 0) {
   1588		if (debug > 1)
   1589			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
   1590		old->mperf = 1;	/* divide by 0 protection */
   1591	}
   1592
   1593	if (DO_BIC(BIC_IPC))
   1594		old->instr_count = new->instr_count - old->instr_count;
   1595
   1596	if (DO_BIC(BIC_IRQ))
   1597		old->irq_count = new->irq_count - old->irq_count;
   1598
   1599	if (DO_BIC(BIC_SMI))
   1600		old->smi_count = new->smi_count - old->smi_count;
   1601
   1602	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
   1603		if (mp->format == FORMAT_RAW)
   1604			old->counter[i] = new->counter[i];
   1605		else
   1606			old->counter[i] = new->counter[i] - old->counter[i];
   1607	}
   1608	return 0;
   1609}
   1610
   1611int delta_cpu(struct thread_data *t, struct core_data *c,
   1612	      struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
   1613{
   1614	int retval = 0;
   1615
   1616	/* calculate core delta only for 1st thread in core */
   1617	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
   1618		delta_core(c, c2);
   1619
   1620	/* always calculate thread delta */
   1621	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
   1622	if (retval)
   1623		return retval;
   1624
   1625	/* calculate package delta only for 1st core in package */
   1626	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
   1627		retval = delta_package(p, p2);
   1628
   1629	return retval;
   1630}
   1631
   1632void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   1633{
   1634	int i;
   1635	struct msr_counter *mp;
   1636
   1637	t->tv_begin.tv_sec = 0;
   1638	t->tv_begin.tv_usec = 0;
   1639	t->tv_end.tv_sec = 0;
   1640	t->tv_end.tv_usec = 0;
   1641	t->tv_delta.tv_sec = 0;
   1642	t->tv_delta.tv_usec = 0;
   1643
   1644	t->tsc = 0;
   1645	t->aperf = 0;
   1646	t->mperf = 0;
   1647	t->c1 = 0;
   1648
   1649	t->instr_count = 0;
   1650
   1651	t->irq_count = 0;
   1652	t->smi_count = 0;
   1653
   1654	/* tells format_counters to dump all fields from this set */
   1655	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
   1656
   1657	c->c3 = 0;
   1658	c->c6 = 0;
   1659	c->c7 = 0;
   1660	c->mc6_us = 0;
   1661	c->core_temp_c = 0;
   1662	c->core_energy = 0;
   1663	c->core_throt_cnt = 0;
   1664
   1665	p->pkg_wtd_core_c0 = 0;
   1666	p->pkg_any_core_c0 = 0;
   1667	p->pkg_any_gfxe_c0 = 0;
   1668	p->pkg_both_core_gfxe_c0 = 0;
   1669
   1670	p->pc2 = 0;
   1671	if (DO_BIC(BIC_Pkgpc3))
   1672		p->pc3 = 0;
   1673	if (DO_BIC(BIC_Pkgpc6))
   1674		p->pc6 = 0;
   1675	if (DO_BIC(BIC_Pkgpc7))
   1676		p->pc7 = 0;
   1677	p->pc8 = 0;
   1678	p->pc9 = 0;
   1679	p->pc10 = 0;
   1680	p->cpu_lpi = 0;
   1681	p->sys_lpi = 0;
   1682
   1683	p->energy_pkg = 0;
   1684	p->energy_dram = 0;
   1685	p->energy_cores = 0;
   1686	p->energy_gfx = 0;
   1687	p->rapl_pkg_perf_status = 0;
   1688	p->rapl_dram_perf_status = 0;
   1689	p->pkg_temp_c = 0;
   1690
   1691	p->gfx_rc6_ms = 0;
   1692	p->gfx_mhz = 0;
   1693	p->gfx_act_mhz = 0;
   1694	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
   1695		t->counter[i] = 0;
   1696
   1697	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
   1698		c->counter[i] = 0;
   1699
   1700	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
   1701		p->counter[i] = 0;
   1702}
   1703
   1704int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   1705{
   1706	int i;
   1707	struct msr_counter *mp;
   1708
   1709	/* copy un-changing apic_id's */
   1710	if (DO_BIC(BIC_APIC))
   1711		average.threads.apic_id = t->apic_id;
   1712	if (DO_BIC(BIC_X2APIC))
   1713		average.threads.x2apic_id = t->x2apic_id;
   1714
   1715	/* remember first tv_begin */
   1716	if (average.threads.tv_begin.tv_sec == 0)
   1717		average.threads.tv_begin = t->tv_begin;
   1718
   1719	/* remember last tv_end */
   1720	average.threads.tv_end = t->tv_end;
   1721
   1722	average.threads.tsc += t->tsc;
   1723	average.threads.aperf += t->aperf;
   1724	average.threads.mperf += t->mperf;
   1725	average.threads.c1 += t->c1;
   1726
   1727	average.threads.instr_count += t->instr_count;
   1728
   1729	average.threads.irq_count += t->irq_count;
   1730	average.threads.smi_count += t->smi_count;
   1731
   1732	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
   1733		if (mp->format == FORMAT_RAW)
   1734			continue;
   1735		average.threads.counter[i] += t->counter[i];
   1736	}
   1737
   1738	/* sum per-core values only for 1st thread in core */
   1739	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
   1740		return 0;
   1741
   1742	average.cores.c3 += c->c3;
   1743	average.cores.c6 += c->c6;
   1744	average.cores.c7 += c->c7;
   1745	average.cores.mc6_us += c->mc6_us;
   1746
   1747	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
   1748	average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
   1749
   1750	average.cores.core_energy += c->core_energy;
   1751
   1752	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
   1753		if (mp->format == FORMAT_RAW)
   1754			continue;
   1755		average.cores.counter[i] += c->counter[i];
   1756	}
   1757
   1758	/* sum per-pkg values only for 1st core in pkg */
   1759	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   1760		return 0;
   1761
   1762	if (DO_BIC(BIC_Totl_c0))
   1763		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
   1764	if (DO_BIC(BIC_Any_c0))
   1765		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
   1766	if (DO_BIC(BIC_GFX_c0))
   1767		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
   1768	if (DO_BIC(BIC_CPUGFX))
   1769		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
   1770
   1771	average.packages.pc2 += p->pc2;
   1772	if (DO_BIC(BIC_Pkgpc3))
   1773		average.packages.pc3 += p->pc3;
   1774	if (DO_BIC(BIC_Pkgpc6))
   1775		average.packages.pc6 += p->pc6;
   1776	if (DO_BIC(BIC_Pkgpc7))
   1777		average.packages.pc7 += p->pc7;
   1778	average.packages.pc8 += p->pc8;
   1779	average.packages.pc9 += p->pc9;
   1780	average.packages.pc10 += p->pc10;
   1781
   1782	average.packages.cpu_lpi = p->cpu_lpi;
   1783	average.packages.sys_lpi = p->sys_lpi;
   1784
   1785	average.packages.energy_pkg += p->energy_pkg;
   1786	average.packages.energy_dram += p->energy_dram;
   1787	average.packages.energy_cores += p->energy_cores;
   1788	average.packages.energy_gfx += p->energy_gfx;
   1789
   1790	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
   1791	average.packages.gfx_mhz = p->gfx_mhz;
   1792	average.packages.gfx_act_mhz = p->gfx_act_mhz;
   1793
   1794	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
   1795
   1796	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
   1797	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
   1798
   1799	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
   1800		if (mp->format == FORMAT_RAW)
   1801			continue;
   1802		average.packages.counter[i] += p->counter[i];
   1803	}
   1804	return 0;
   1805}
   1806
   1807/*
   1808 * sum the counters for all cpus in the system
   1809 * compute the weighted average
   1810 */
   1811void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   1812{
   1813	int i;
   1814	struct msr_counter *mp;
   1815
   1816	clear_counters(&average.threads, &average.cores, &average.packages);
   1817
   1818	for_all_cpus(sum_counters, t, c, p);
   1819
   1820	/* Use the global time delta for the average. */
   1821	average.threads.tv_delta = tv_delta;
   1822
   1823	average.threads.tsc /= topo.num_cpus;
   1824	average.threads.aperf /= topo.num_cpus;
   1825	average.threads.mperf /= topo.num_cpus;
   1826	average.threads.instr_count /= topo.num_cpus;
   1827	average.threads.c1 /= topo.num_cpus;
   1828
   1829	if (average.threads.irq_count > 9999999)
   1830		sums_need_wide_columns = 1;
   1831
   1832	average.cores.c3 /= topo.num_cores;
   1833	average.cores.c6 /= topo.num_cores;
   1834	average.cores.c7 /= topo.num_cores;
   1835	average.cores.mc6_us /= topo.num_cores;
   1836
   1837	if (DO_BIC(BIC_Totl_c0))
   1838		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
   1839	if (DO_BIC(BIC_Any_c0))
   1840		average.packages.pkg_any_core_c0 /= topo.num_packages;
   1841	if (DO_BIC(BIC_GFX_c0))
   1842		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
   1843	if (DO_BIC(BIC_CPUGFX))
   1844		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
   1845
   1846	average.packages.pc2 /= topo.num_packages;
   1847	if (DO_BIC(BIC_Pkgpc3))
   1848		average.packages.pc3 /= topo.num_packages;
   1849	if (DO_BIC(BIC_Pkgpc6))
   1850		average.packages.pc6 /= topo.num_packages;
   1851	if (DO_BIC(BIC_Pkgpc7))
   1852		average.packages.pc7 /= topo.num_packages;
   1853
   1854	average.packages.pc8 /= topo.num_packages;
   1855	average.packages.pc9 /= topo.num_packages;
   1856	average.packages.pc10 /= topo.num_packages;
   1857
   1858	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
   1859		if (mp->format == FORMAT_RAW)
   1860			continue;
   1861		if (mp->type == COUNTER_ITEMS) {
   1862			if (average.threads.counter[i] > 9999999)
   1863				sums_need_wide_columns = 1;
   1864			continue;
   1865		}
   1866		average.threads.counter[i] /= topo.num_cpus;
   1867	}
   1868	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
   1869		if (mp->format == FORMAT_RAW)
   1870			continue;
   1871		if (mp->type == COUNTER_ITEMS) {
   1872			if (average.cores.counter[i] > 9999999)
   1873				sums_need_wide_columns = 1;
   1874		}
   1875		average.cores.counter[i] /= topo.num_cores;
   1876	}
   1877	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
   1878		if (mp->format == FORMAT_RAW)
   1879			continue;
   1880		if (mp->type == COUNTER_ITEMS) {
   1881			if (average.packages.counter[i] > 9999999)
   1882				sums_need_wide_columns = 1;
   1883		}
   1884		average.packages.counter[i] /= topo.num_packages;
   1885	}
   1886}
   1887
   1888static unsigned long long rdtsc(void)
   1889{
   1890	unsigned int low, high;
   1891
   1892	asm volatile ("rdtsc":"=a" (low), "=d"(high));
   1893
   1894	return low | ((unsigned long long)high) << 32;
   1895}
   1896
   1897/*
   1898 * Open a file, and exit on failure
   1899 */
   1900FILE *fopen_or_die(const char *path, const char *mode)
   1901{
   1902	FILE *filep = fopen(path, mode);
   1903
   1904	if (!filep)
   1905		err(1, "%s: open failed", path);
   1906	return filep;
   1907}
   1908
   1909/*
   1910 * snapshot_sysfs_counter()
   1911 *
   1912 * return snapshot of given counter
   1913 */
   1914unsigned long long snapshot_sysfs_counter(char *path)
   1915{
   1916	FILE *fp;
   1917	int retval;
   1918	unsigned long long counter;
   1919
   1920	fp = fopen_or_die(path, "r");
   1921
   1922	retval = fscanf(fp, "%lld", &counter);
   1923	if (retval != 1)
   1924		err(1, "snapshot_sysfs_counter(%s)", path);
   1925
   1926	fclose(fp);
   1927
   1928	return counter;
   1929}
   1930
   1931int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
   1932{
   1933	if (mp->msr_num != 0) {
   1934		if (get_msr(cpu, mp->msr_num, counterp))
   1935			return -1;
   1936	} else {
   1937		char path[128 + PATH_BYTES];
   1938
   1939		if (mp->flags & SYSFS_PERCPU) {
   1940			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path);
   1941
   1942			*counterp = snapshot_sysfs_counter(path);
   1943		} else {
   1944			*counterp = snapshot_sysfs_counter(mp->path);
   1945		}
   1946	}
   1947
   1948	return 0;
   1949}
   1950
   1951int get_epb(int cpu)
   1952{
   1953	char path[128 + PATH_BYTES];
   1954	unsigned long long msr;
   1955	int ret, epb = -1;
   1956	FILE *fp;
   1957
   1958	sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
   1959
   1960	fp = fopen(path, "r");
   1961	if (!fp)
   1962		goto msr_fallback;
   1963
   1964	ret = fscanf(fp, "%d", &epb);
   1965	if (ret != 1)
   1966		err(1, "%s(%s)", __func__, path);
   1967
   1968	fclose(fp);
   1969
   1970	return epb;
   1971
   1972msr_fallback:
   1973	get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
   1974
   1975	return msr & 0xf;
   1976}
   1977
   1978void get_apic_id(struct thread_data *t)
   1979{
   1980	unsigned int eax, ebx, ecx, edx;
   1981
   1982	if (DO_BIC(BIC_APIC)) {
   1983		eax = ebx = ecx = edx = 0;
   1984		__cpuid(1, eax, ebx, ecx, edx);
   1985
   1986		t->apic_id = (ebx >> 24) & 0xff;
   1987	}
   1988
   1989	if (!DO_BIC(BIC_X2APIC))
   1990		return;
   1991
   1992	if (authentic_amd || hygon_genuine) {
   1993		unsigned int topology_extensions;
   1994
   1995		if (max_extended_level < 0x8000001e)
   1996			return;
   1997
   1998		eax = ebx = ecx = edx = 0;
   1999		__cpuid(0x80000001, eax, ebx, ecx, edx);
   2000		topology_extensions = ecx & (1 << 22);
   2001
   2002		if (topology_extensions == 0)
   2003			return;
   2004
   2005		eax = ebx = ecx = edx = 0;
   2006		__cpuid(0x8000001e, eax, ebx, ecx, edx);
   2007
   2008		t->x2apic_id = eax;
   2009		return;
   2010	}
   2011
   2012	if (!genuine_intel)
   2013		return;
   2014
   2015	if (max_level < 0xb)
   2016		return;
   2017
   2018	ecx = 0;
   2019	__cpuid(0xb, eax, ebx, ecx, edx);
   2020	t->x2apic_id = edx;
   2021
   2022	if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
   2023		fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
   2024}
   2025
   2026int get_core_throt_cnt(int cpu, unsigned long long *cnt)
   2027{
   2028	char path[128 + PATH_BYTES];
   2029	unsigned long long tmp;
   2030	FILE *fp;
   2031	int ret;
   2032
   2033	sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
   2034	fp = fopen(path, "r");
   2035	if (!fp)
   2036		return -1;
   2037	ret = fscanf(fp, "%lld", &tmp);
   2038	if (ret != 1)
   2039		return -1;
   2040	fclose(fp);
   2041	*cnt = tmp;
   2042
   2043	return 0;
   2044}
   2045
   2046/*
   2047 * get_counters(...)
   2048 * migrate to cpu
   2049 * acquire and record local counters for that cpu
   2050 */
   2051int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   2052{
   2053	int cpu = t->cpu_id;
   2054	unsigned long long msr;
   2055	int aperf_mperf_retry_count = 0;
   2056	struct msr_counter *mp;
   2057	int i;
   2058
   2059	if (cpu_migrate(cpu)) {
   2060		fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
   2061		return -1;
   2062	}
   2063
   2064	gettimeofday(&t->tv_begin, (struct timezone *)NULL);
   2065
   2066	if (first_counter_read)
   2067		get_apic_id(t);
   2068retry:
   2069	t->tsc = rdtsc();	/* we are running on local CPU of interest */
   2070
   2071	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
   2072		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
   2073
   2074		/*
   2075		 * The TSC, APERF and MPERF must be read together for
   2076		 * APERF/MPERF and MPERF/TSC to give accurate results.
   2077		 *
   2078		 * Unfortunately, APERF and MPERF are read by
   2079		 * individual system call, so delays may occur
   2080		 * between them.  If the time to read them
   2081		 * varies by a large amount, we re-read them.
   2082		 */
   2083
   2084		/*
   2085		 * This initial dummy APERF read has been seen to
   2086		 * reduce jitter in the subsequent reads.
   2087		 */
   2088
   2089		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
   2090			return -3;
   2091
   2092		t->tsc = rdtsc();	/* re-read close to APERF */
   2093
   2094		tsc_before = t->tsc;
   2095
   2096		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
   2097			return -3;
   2098
   2099		tsc_between = rdtsc();
   2100
   2101		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
   2102			return -4;
   2103
   2104		tsc_after = rdtsc();
   2105
   2106		aperf_time = tsc_between - tsc_before;
   2107		mperf_time = tsc_after - tsc_between;
   2108
   2109		/*
   2110		 * If the system call latency to read APERF and MPERF
   2111		 * differ by more than 2x, then try again.
   2112		 */
   2113		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
   2114			aperf_mperf_retry_count++;
   2115			if (aperf_mperf_retry_count < 5)
   2116				goto retry;
   2117			else
   2118				warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
   2119		}
   2120		aperf_mperf_retry_count = 0;
   2121
   2122		t->aperf = t->aperf * aperf_mperf_multiplier;
   2123		t->mperf = t->mperf * aperf_mperf_multiplier;
   2124	}
   2125
   2126	if (DO_BIC(BIC_IPC))
   2127		if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
   2128			return -4;
   2129
   2130	if (DO_BIC(BIC_IRQ))
   2131		t->irq_count = irqs_per_cpu[cpu];
   2132	if (DO_BIC(BIC_SMI)) {
   2133		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
   2134			return -5;
   2135		t->smi_count = msr & 0xFFFFFFFF;
   2136	}
   2137	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
   2138		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
   2139			return -6;
   2140	}
   2141
   2142	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
   2143		if (get_mp(cpu, mp, &t->counter[i]))
   2144			return -10;
   2145	}
   2146
   2147	/* collect core counters only for 1st thread in core */
   2148	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
   2149		goto done;
   2150
   2151	if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
   2152		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
   2153			return -6;
   2154	}
   2155
   2156	if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
   2157		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
   2158			return -7;
   2159	} else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
   2160		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
   2161			return -7;
   2162	}
   2163
   2164	if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
   2165		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
   2166			return -8;
   2167		else if (t->is_atom) {
   2168			/*
   2169			 * For Atom CPUs that has core cstate deeper than c6,
   2170			 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
   2171			 * Minus CC7 (and deeper cstates) residency to get
   2172			 * accturate cc6 residency.
   2173			 */
   2174			c->c6 -= c->c7;
   2175		}
   2176	}
   2177
   2178	if (DO_BIC(BIC_Mod_c6))
   2179		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
   2180			return -8;
   2181
   2182	if (DO_BIC(BIC_CoreTmp)) {
   2183		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
   2184			return -9;
   2185		c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
   2186	}
   2187
   2188	if (DO_BIC(BIC_CORE_THROT_CNT))
   2189		get_core_throt_cnt(cpu, &c->core_throt_cnt);
   2190
   2191	if (do_rapl & RAPL_AMD_F17H) {
   2192		if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
   2193			return -14;
   2194		c->core_energy = msr & 0xFFFFFFFF;
   2195	}
   2196
   2197	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
   2198		if (get_mp(cpu, mp, &c->counter[i]))
   2199			return -10;
   2200	}
   2201
   2202	/* collect package counters only for 1st core in package */
   2203	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   2204		goto done;
   2205
   2206	if (DO_BIC(BIC_Totl_c0)) {
   2207		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
   2208			return -10;
   2209	}
   2210	if (DO_BIC(BIC_Any_c0)) {
   2211		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
   2212			return -11;
   2213	}
   2214	if (DO_BIC(BIC_GFX_c0)) {
   2215		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
   2216			return -12;
   2217	}
   2218	if (DO_BIC(BIC_CPUGFX)) {
   2219		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
   2220			return -13;
   2221	}
   2222	if (DO_BIC(BIC_Pkgpc3))
   2223		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
   2224			return -9;
   2225	if (DO_BIC(BIC_Pkgpc6)) {
   2226		if (do_slm_cstates) {
   2227			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
   2228				return -10;
   2229		} else {
   2230			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
   2231				return -10;
   2232		}
   2233	}
   2234
   2235	if (DO_BIC(BIC_Pkgpc2))
   2236		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
   2237			return -11;
   2238	if (DO_BIC(BIC_Pkgpc7))
   2239		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
   2240			return -12;
   2241	if (DO_BIC(BIC_Pkgpc8))
   2242		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
   2243			return -13;
   2244	if (DO_BIC(BIC_Pkgpc9))
   2245		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
   2246			return -13;
   2247	if (DO_BIC(BIC_Pkgpc10))
   2248		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
   2249			return -13;
   2250
   2251	if (DO_BIC(BIC_CPU_LPI))
   2252		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
   2253	if (DO_BIC(BIC_SYS_LPI))
   2254		p->sys_lpi = cpuidle_cur_sys_lpi_us;
   2255
   2256	if (do_rapl & RAPL_PKG) {
   2257		if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
   2258			return -13;
   2259		p->energy_pkg = msr;
   2260	}
   2261	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
   2262		if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
   2263			return -14;
   2264		p->energy_cores = msr;
   2265	}
   2266	if (do_rapl & RAPL_DRAM) {
   2267		if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
   2268			return -15;
   2269		p->energy_dram = msr;
   2270	}
   2271	if (do_rapl & RAPL_GFX) {
   2272		if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
   2273			return -16;
   2274		p->energy_gfx = msr;
   2275	}
   2276	if (do_rapl & RAPL_PKG_PERF_STATUS) {
   2277		if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
   2278			return -16;
   2279		p->rapl_pkg_perf_status = msr;
   2280	}
   2281	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
   2282		if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
   2283			return -16;
   2284		p->rapl_dram_perf_status = msr;
   2285	}
   2286	if (do_rapl & RAPL_AMD_F17H) {
   2287		if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
   2288			return -13;
   2289		p->energy_pkg = msr;
   2290	}
   2291	if (DO_BIC(BIC_PkgTmp)) {
   2292		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
   2293			return -17;
   2294		p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
   2295	}
   2296
   2297	if (DO_BIC(BIC_GFX_rc6))
   2298		p->gfx_rc6_ms = gfx_cur_rc6_ms;
   2299
   2300	if (DO_BIC(BIC_GFXMHz))
   2301		p->gfx_mhz = gfx_cur_mhz;
   2302
   2303	if (DO_BIC(BIC_GFXACTMHz))
   2304		p->gfx_act_mhz = gfx_act_mhz;
   2305
   2306	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
   2307		if (get_mp(cpu, mp, &p->counter[i]))
   2308			return -10;
   2309	}
   2310done:
   2311	gettimeofday(&t->tv_end, (struct timezone *)NULL);
   2312
   2313	return 0;
   2314}
   2315
   2316/*
   2317 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
   2318 * If you change the values, note they are used both in comparisons
   2319 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
   2320 */
   2321
   2322#define PCLUKN 0		/* Unknown */
   2323#define PCLRSV 1		/* Reserved */
   2324#define PCL__0 2		/* PC0 */
   2325#define PCL__1 3		/* PC1 */
   2326#define PCL__2 4		/* PC2 */
   2327#define PCL__3 5		/* PC3 */
   2328#define PCL__4 6		/* PC4 */
   2329#define PCL__6 7		/* PC6 */
   2330#define PCL_6N 8		/* PC6 No Retention */
   2331#define PCL_6R 9		/* PC6 Retention */
   2332#define PCL__7 10		/* PC7 */
   2333#define PCL_7S 11		/* PC7 Shrink */
   2334#define PCL__8 12		/* PC8 */
   2335#define PCL__9 13		/* PC9 */
   2336#define PCL_10 14		/* PC10 */
   2337#define PCLUNL 15		/* Unlimited */
   2338
   2339int pkg_cstate_limit = PCLUKN;
   2340char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
   2341	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
   2342};
   2343
   2344int nhm_pkg_cstate_limits[16] =
   2345    { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2346	PCLRSV, PCLRSV
   2347};
   2348
   2349int snb_pkg_cstate_limits[16] =
   2350    { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2351	PCLRSV, PCLRSV
   2352};
   2353
   2354int hsw_pkg_cstate_limits[16] =
   2355    { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2356	PCLRSV, PCLRSV
   2357};
   2358
   2359int slv_pkg_cstate_limits[16] =
   2360    { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2361	PCL__6, PCL__7
   2362};
   2363
   2364int amt_pkg_cstate_limits[16] =
   2365    { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2366	PCLRSV, PCLRSV
   2367};
   2368
   2369int phi_pkg_cstate_limits[16] =
   2370    { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2371	PCLRSV, PCLRSV
   2372};
   2373
   2374int glm_pkg_cstate_limits[16] =
   2375    { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2376	PCLRSV, PCLRSV
   2377};
   2378
   2379int skx_pkg_cstate_limits[16] =
   2380    { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2381	PCLRSV, PCLRSV
   2382};
   2383
   2384int icx_pkg_cstate_limits[16] =
   2385    { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
   2386	PCLRSV, PCLRSV
   2387};
   2388
   2389static void calculate_tsc_tweak()
   2390{
   2391	tsc_tweak = base_hz / tsc_hz;
   2392}
   2393
   2394void prewake_cstate_probe(unsigned int family, unsigned int model);
   2395
   2396static void dump_nhm_platform_info(void)
   2397{
   2398	unsigned long long msr;
   2399	unsigned int ratio;
   2400
   2401	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
   2402
   2403	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
   2404
   2405	ratio = (msr >> 40) & 0xFF;
   2406	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
   2407
   2408	ratio = (msr >> 8) & 0xFF;
   2409	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
   2410
   2411	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
   2412	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
   2413		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
   2414
   2415	/* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
   2416	if (dis_cstate_prewake)
   2417		fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
   2418
   2419	return;
   2420}
   2421
   2422static void dump_hsw_turbo_ratio_limits(void)
   2423{
   2424	unsigned long long msr;
   2425	unsigned int ratio;
   2426
   2427	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
   2428
   2429	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
   2430
   2431	ratio = (msr >> 8) & 0xFF;
   2432	if (ratio)
   2433		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
   2434
   2435	ratio = (msr >> 0) & 0xFF;
   2436	if (ratio)
   2437		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
   2438	return;
   2439}
   2440
   2441static void dump_ivt_turbo_ratio_limits(void)
   2442{
   2443	unsigned long long msr;
   2444	unsigned int ratio;
   2445
   2446	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
   2447
   2448	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
   2449
   2450	ratio = (msr >> 56) & 0xFF;
   2451	if (ratio)
   2452		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
   2453
   2454	ratio = (msr >> 48) & 0xFF;
   2455	if (ratio)
   2456		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
   2457
   2458	ratio = (msr >> 40) & 0xFF;
   2459	if (ratio)
   2460		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
   2461
   2462	ratio = (msr >> 32) & 0xFF;
   2463	if (ratio)
   2464		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
   2465
   2466	ratio = (msr >> 24) & 0xFF;
   2467	if (ratio)
   2468		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
   2469
   2470	ratio = (msr >> 16) & 0xFF;
   2471	if (ratio)
   2472		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
   2473
   2474	ratio = (msr >> 8) & 0xFF;
   2475	if (ratio)
   2476		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
   2477
   2478	ratio = (msr >> 0) & 0xFF;
   2479	if (ratio)
   2480		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
   2481	return;
   2482}
   2483
   2484int has_turbo_ratio_group_limits(int family, int model)
   2485{
   2486
   2487	if (!genuine_intel)
   2488		return 0;
   2489
   2490	if (family != 6)
   2491		return 0;
   2492
   2493	switch (model) {
   2494	case INTEL_FAM6_ATOM_GOLDMONT:
   2495	case INTEL_FAM6_SKYLAKE_X:
   2496	case INTEL_FAM6_ICELAKE_X:
   2497	case INTEL_FAM6_ATOM_GOLDMONT_D:
   2498	case INTEL_FAM6_ATOM_TREMONT_D:
   2499		return 1;
   2500	default:
   2501		return 0;
   2502	}
   2503}
   2504
   2505static void dump_turbo_ratio_limits(int family, int model)
   2506{
   2507	unsigned long long msr, core_counts;
   2508	unsigned int ratio, group_size;
   2509
   2510	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
   2511	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
   2512
   2513	if (has_turbo_ratio_group_limits(family, model)) {
   2514		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
   2515		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
   2516	} else {
   2517		core_counts = 0x0807060504030201;
   2518	}
   2519
   2520	ratio = (msr >> 56) & 0xFF;
   2521	group_size = (core_counts >> 56) & 0xFF;
   2522	if (ratio)
   2523		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2524			ratio, bclk, ratio * bclk, group_size);
   2525
   2526	ratio = (msr >> 48) & 0xFF;
   2527	group_size = (core_counts >> 48) & 0xFF;
   2528	if (ratio)
   2529		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2530			ratio, bclk, ratio * bclk, group_size);
   2531
   2532	ratio = (msr >> 40) & 0xFF;
   2533	group_size = (core_counts >> 40) & 0xFF;
   2534	if (ratio)
   2535		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2536			ratio, bclk, ratio * bclk, group_size);
   2537
   2538	ratio = (msr >> 32) & 0xFF;
   2539	group_size = (core_counts >> 32) & 0xFF;
   2540	if (ratio)
   2541		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2542			ratio, bclk, ratio * bclk, group_size);
   2543
   2544	ratio = (msr >> 24) & 0xFF;
   2545	group_size = (core_counts >> 24) & 0xFF;
   2546	if (ratio)
   2547		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2548			ratio, bclk, ratio * bclk, group_size);
   2549
   2550	ratio = (msr >> 16) & 0xFF;
   2551	group_size = (core_counts >> 16) & 0xFF;
   2552	if (ratio)
   2553		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2554			ratio, bclk, ratio * bclk, group_size);
   2555
   2556	ratio = (msr >> 8) & 0xFF;
   2557	group_size = (core_counts >> 8) & 0xFF;
   2558	if (ratio)
   2559		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2560			ratio, bclk, ratio * bclk, group_size);
   2561
   2562	ratio = (msr >> 0) & 0xFF;
   2563	group_size = (core_counts >> 0) & 0xFF;
   2564	if (ratio)
   2565		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2566			ratio, bclk, ratio * bclk, group_size);
   2567	return;
   2568}
   2569
   2570static void dump_atom_turbo_ratio_limits(void)
   2571{
   2572	unsigned long long msr;
   2573	unsigned int ratio;
   2574
   2575	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
   2576	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
   2577
   2578	ratio = (msr >> 0) & 0x3F;
   2579	if (ratio)
   2580		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
   2581
   2582	ratio = (msr >> 8) & 0x3F;
   2583	if (ratio)
   2584		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
   2585
   2586	ratio = (msr >> 16) & 0x3F;
   2587	if (ratio)
   2588		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
   2589
   2590	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
   2591	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
   2592
   2593	ratio = (msr >> 24) & 0x3F;
   2594	if (ratio)
   2595		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
   2596
   2597	ratio = (msr >> 16) & 0x3F;
   2598	if (ratio)
   2599		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
   2600
   2601	ratio = (msr >> 8) & 0x3F;
   2602	if (ratio)
   2603		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
   2604
   2605	ratio = (msr >> 0) & 0x3F;
   2606	if (ratio)
   2607		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
   2608}
   2609
   2610static void dump_knl_turbo_ratio_limits(void)
   2611{
   2612	const unsigned int buckets_no = 7;
   2613
   2614	unsigned long long msr;
   2615	int delta_cores, delta_ratio;
   2616	int i, b_nr;
   2617	unsigned int cores[buckets_no];
   2618	unsigned int ratio[buckets_no];
   2619
   2620	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
   2621
   2622	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
   2623
   2624	/*
   2625	 * Turbo encoding in KNL is as follows:
   2626	 * [0] -- Reserved
   2627	 * [7:1] -- Base value of number of active cores of bucket 1.
   2628	 * [15:8] -- Base value of freq ratio of bucket 1.
   2629	 * [20:16] -- +ve delta of number of active cores of bucket 2.
   2630	 * i.e. active cores of bucket 2 =
   2631	 * active cores of bucket 1 + delta
   2632	 * [23:21] -- Negative delta of freq ratio of bucket 2.
   2633	 * i.e. freq ratio of bucket 2 =
   2634	 * freq ratio of bucket 1 - delta
   2635	 * [28:24]-- +ve delta of number of active cores of bucket 3.
   2636	 * [31:29]-- -ve delta of freq ratio of bucket 3.
   2637	 * [36:32]-- +ve delta of number of active cores of bucket 4.
   2638	 * [39:37]-- -ve delta of freq ratio of bucket 4.
   2639	 * [44:40]-- +ve delta of number of active cores of bucket 5.
   2640	 * [47:45]-- -ve delta of freq ratio of bucket 5.
   2641	 * [52:48]-- +ve delta of number of active cores of bucket 6.
   2642	 * [55:53]-- -ve delta of freq ratio of bucket 6.
   2643	 * [60:56]-- +ve delta of number of active cores of bucket 7.
   2644	 * [63:61]-- -ve delta of freq ratio of bucket 7.
   2645	 */
   2646
   2647	b_nr = 0;
   2648	cores[b_nr] = (msr & 0xFF) >> 1;
   2649	ratio[b_nr] = (msr >> 8) & 0xFF;
   2650
   2651	for (i = 16; i < 64; i += 8) {
   2652		delta_cores = (msr >> i) & 0x1F;
   2653		delta_ratio = (msr >> (i + 5)) & 0x7;
   2654
   2655		cores[b_nr + 1] = cores[b_nr] + delta_cores;
   2656		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
   2657		b_nr++;
   2658	}
   2659
   2660	for (i = buckets_no - 1; i >= 0; i--)
   2661		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
   2662			fprintf(outf,
   2663				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
   2664				ratio[i], bclk, ratio[i] * bclk, cores[i]);
   2665}
   2666
   2667static void dump_nhm_cst_cfg(void)
   2668{
   2669	unsigned long long msr;
   2670
   2671	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
   2672
   2673	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
   2674
   2675	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
   2676		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
   2677		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
   2678		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
   2679		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
   2680		(msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
   2681
   2682#define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
   2683	if (has_automatic_cstate_conversion) {
   2684		fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
   2685	}
   2686
   2687	fprintf(outf, ")\n");
   2688
   2689	return;
   2690}
   2691
   2692static void dump_config_tdp(void)
   2693{
   2694	unsigned long long msr;
   2695
   2696	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
   2697	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
   2698	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
   2699
   2700	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
   2701	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
   2702	if (msr) {
   2703		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
   2704		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
   2705		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
   2706		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
   2707	}
   2708	fprintf(outf, ")\n");
   2709
   2710	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
   2711	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
   2712	if (msr) {
   2713		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
   2714		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
   2715		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
   2716		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
   2717	}
   2718	fprintf(outf, ")\n");
   2719
   2720	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
   2721	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
   2722	if ((msr) & 0x3)
   2723		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
   2724	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
   2725	fprintf(outf, ")\n");
   2726
   2727	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
   2728	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
   2729	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
   2730	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
   2731	fprintf(outf, ")\n");
   2732}
   2733
   2734unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
   2735
   2736void print_irtl(void)
   2737{
   2738	unsigned long long msr;
   2739
   2740	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
   2741	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
   2742	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
   2743		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
   2744
   2745	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
   2746	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
   2747	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
   2748		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
   2749
   2750	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
   2751	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
   2752	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
   2753		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
   2754
   2755	if (!do_irtl_hsw)
   2756		return;
   2757
   2758	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
   2759	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
   2760	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
   2761		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
   2762
   2763	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
   2764	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
   2765	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
   2766		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
   2767
   2768	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
   2769	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
   2770	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
   2771		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
   2772
   2773}
   2774
   2775void free_fd_percpu(void)
   2776{
   2777	int i;
   2778
   2779	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
   2780		if (fd_percpu[i] != 0)
   2781			close(fd_percpu[i]);
   2782	}
   2783
   2784	free(fd_percpu);
   2785}
   2786
   2787void free_all_buffers(void)
   2788{
   2789	int i;
   2790
   2791	CPU_FREE(cpu_present_set);
   2792	cpu_present_set = NULL;
   2793	cpu_present_setsize = 0;
   2794
   2795	CPU_FREE(cpu_affinity_set);
   2796	cpu_affinity_set = NULL;
   2797	cpu_affinity_setsize = 0;
   2798
   2799	free(thread_even);
   2800	free(core_even);
   2801	free(package_even);
   2802
   2803	thread_even = NULL;
   2804	core_even = NULL;
   2805	package_even = NULL;
   2806
   2807	free(thread_odd);
   2808	free(core_odd);
   2809	free(package_odd);
   2810
   2811	thread_odd = NULL;
   2812	core_odd = NULL;
   2813	package_odd = NULL;
   2814
   2815	free(output_buffer);
   2816	output_buffer = NULL;
   2817	outp = NULL;
   2818
   2819	free_fd_percpu();
   2820
   2821	free(irq_column_2_cpu);
   2822	free(irqs_per_cpu);
   2823
   2824	for (i = 0; i <= topo.max_cpu_num; ++i) {
   2825		if (cpus[i].put_ids)
   2826			CPU_FREE(cpus[i].put_ids);
   2827	}
   2828	free(cpus);
   2829}
   2830
   2831/*
   2832 * Parse a file containing a single int.
   2833 * Return 0 if file can not be opened
   2834 * Exit if file can be opened, but can not be parsed
   2835 */
   2836int parse_int_file(const char *fmt, ...)
   2837{
   2838	va_list args;
   2839	char path[PATH_MAX];
   2840	FILE *filep;
   2841	int value;
   2842
   2843	va_start(args, fmt);
   2844	vsnprintf(path, sizeof(path), fmt, args);
   2845	va_end(args);
   2846	filep = fopen(path, "r");
   2847	if (!filep)
   2848		return 0;
   2849	if (fscanf(filep, "%d", &value) != 1)
   2850		err(1, "%s: failed to parse number from file", path);
   2851	fclose(filep);
   2852	return value;
   2853}
   2854
   2855/*
   2856 * cpu_is_first_core_in_package(cpu)
   2857 * return 1 if given CPU is 1st core in package
   2858 */
   2859int cpu_is_first_core_in_package(int cpu)
   2860{
   2861	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
   2862}
   2863
   2864int get_physical_package_id(int cpu)
   2865{
   2866	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
   2867}
   2868
   2869int get_die_id(int cpu)
   2870{
   2871	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
   2872}
   2873
   2874int get_core_id(int cpu)
   2875{
   2876	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
   2877}
   2878
   2879void set_node_data(void)
   2880{
   2881	int pkg, node, lnode, cpu, cpux;
   2882	int cpu_count;
   2883
   2884	/* initialize logical_node_id */
   2885	for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
   2886		cpus[cpu].logical_node_id = -1;
   2887
   2888	cpu_count = 0;
   2889	for (pkg = 0; pkg < topo.num_packages; pkg++) {
   2890		lnode = 0;
   2891		for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
   2892			if (cpus[cpu].physical_package_id != pkg)
   2893				continue;
   2894			/* find a cpu with an unset logical_node_id */
   2895			if (cpus[cpu].logical_node_id != -1)
   2896				continue;
   2897			cpus[cpu].logical_node_id = lnode;
   2898			node = cpus[cpu].physical_node_id;
   2899			cpu_count++;
   2900			/*
   2901			 * find all matching cpus on this pkg and set
   2902			 * the logical_node_id
   2903			 */
   2904			for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
   2905				if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
   2906					cpus[cpux].logical_node_id = lnode;
   2907					cpu_count++;
   2908				}
   2909			}
   2910			lnode++;
   2911			if (lnode > topo.nodes_per_pkg)
   2912				topo.nodes_per_pkg = lnode;
   2913		}
   2914		if (cpu_count >= topo.max_cpu_num)
   2915			break;
   2916	}
   2917}
   2918
   2919int get_physical_node_id(struct cpu_topology *thiscpu)
   2920{
   2921	char path[80];
   2922	FILE *filep;
   2923	int i;
   2924	int cpu = thiscpu->logical_cpu_id;
   2925
   2926	for (i = 0; i <= topo.max_cpu_num; i++) {
   2927		sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
   2928		filep = fopen(path, "r");
   2929		if (!filep)
   2930			continue;
   2931		fclose(filep);
   2932		return i;
   2933	}
   2934	return -1;
   2935}
   2936
   2937int get_thread_siblings(struct cpu_topology *thiscpu)
   2938{
   2939	char path[80], character;
   2940	FILE *filep;
   2941	unsigned long map;
   2942	int so, shift, sib_core;
   2943	int cpu = thiscpu->logical_cpu_id;
   2944	int offset = topo.max_cpu_num + 1;
   2945	size_t size;
   2946	int thread_id = 0;
   2947
   2948	thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
   2949	if (thiscpu->thread_id < 0)
   2950		thiscpu->thread_id = thread_id++;
   2951	if (!thiscpu->put_ids)
   2952		return -1;
   2953
   2954	size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
   2955	CPU_ZERO_S(size, thiscpu->put_ids);
   2956
   2957	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
   2958	filep = fopen(path, "r");
   2959
   2960	if (!filep) {
   2961		warnx("%s: open failed", path);
   2962		return -1;
   2963	}
   2964	do {
   2965		offset -= BITMASK_SIZE;
   2966		if (fscanf(filep, "%lx%c", &map, &character) != 2)
   2967			err(1, "%s: failed to parse file", path);
   2968		for (shift = 0; shift < BITMASK_SIZE; shift++) {
   2969			if ((map >> shift) & 0x1) {
   2970				so = shift + offset;
   2971				sib_core = get_core_id(so);
   2972				if (sib_core == thiscpu->physical_core_id) {
   2973					CPU_SET_S(so, size, thiscpu->put_ids);
   2974					if ((so != cpu) && (cpus[so].thread_id < 0))
   2975						cpus[so].thread_id = thread_id++;
   2976				}
   2977			}
   2978		}
   2979	} while (!strncmp(&character, ",", 1));
   2980	fclose(filep);
   2981
   2982	return CPU_COUNT_S(size, thiscpu->put_ids);
   2983}
   2984
   2985/*
   2986 * run func(thread, core, package) in topology order
   2987 * skip non-present cpus
   2988 */
   2989
   2990int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
   2991			       struct pkg_data *, struct thread_data *, struct core_data *,
   2992			       struct pkg_data *), struct thread_data *thread_base,
   2993		   struct core_data *core_base, struct pkg_data *pkg_base,
   2994		   struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
   2995{
   2996	int retval, pkg_no, node_no, core_no, thread_no;
   2997
   2998	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
   2999		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
   3000			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
   3001				for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
   3002					struct thread_data *t, *t2;
   3003					struct core_data *c, *c2;
   3004					struct pkg_data *p, *p2;
   3005
   3006					t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
   3007
   3008					if (cpu_is_not_present(t->cpu_id))
   3009						continue;
   3010
   3011					t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
   3012
   3013					c = GET_CORE(core_base, core_no, node_no, pkg_no);
   3014					c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
   3015
   3016					p = GET_PKG(pkg_base, pkg_no);
   3017					p2 = GET_PKG(pkg_base2, pkg_no);
   3018
   3019					retval = func(t, c, p, t2, c2, p2);
   3020					if (retval)
   3021						return retval;
   3022				}
   3023			}
   3024		}
   3025	}
   3026	return 0;
   3027}
   3028
   3029/*
   3030 * run func(cpu) on every cpu in /proc/stat
   3031 * return max_cpu number
   3032 */
   3033int for_all_proc_cpus(int (func) (int))
   3034{
   3035	FILE *fp;
   3036	int cpu_num;
   3037	int retval;
   3038
   3039	fp = fopen_or_die(proc_stat, "r");
   3040
   3041	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
   3042	if (retval != 0)
   3043		err(1, "%s: failed to parse format", proc_stat);
   3044
   3045	while (1) {
   3046		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
   3047		if (retval != 1)
   3048			break;
   3049
   3050		retval = func(cpu_num);
   3051		if (retval) {
   3052			fclose(fp);
   3053			return (retval);
   3054		}
   3055	}
   3056	fclose(fp);
   3057	return 0;
   3058}
   3059
   3060void re_initialize(void)
   3061{
   3062	free_all_buffers();
   3063	setup_all_buffers();
   3064	fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
   3065}
   3066
   3067void set_max_cpu_num(void)
   3068{
   3069	FILE *filep;
   3070	int base_cpu;
   3071	unsigned long dummy;
   3072	char pathname[64];
   3073
   3074	base_cpu = sched_getcpu();
   3075	if (base_cpu < 0)
   3076		err(1, "cannot find calling cpu ID");
   3077	sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
   3078
   3079	filep = fopen_or_die(pathname, "r");
   3080	topo.max_cpu_num = 0;
   3081	while (fscanf(filep, "%lx,", &dummy) == 1)
   3082		topo.max_cpu_num += BITMASK_SIZE;
   3083	fclose(filep);
   3084	topo.max_cpu_num--;	/* 0 based */
   3085}
   3086
   3087/*
   3088 * count_cpus()
   3089 * remember the last one seen, it will be the max
   3090 */
   3091int count_cpus(int cpu)
   3092{
   3093	UNUSED(cpu);
   3094
   3095	topo.num_cpus++;
   3096	return 0;
   3097}
   3098
   3099int mark_cpu_present(int cpu)
   3100{
   3101	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
   3102	return 0;
   3103}
   3104
   3105int init_thread_id(int cpu)
   3106{
   3107	cpus[cpu].thread_id = -1;
   3108	return 0;
   3109}
   3110
   3111/*
   3112 * snapshot_proc_interrupts()
   3113 *
   3114 * read and record summary of /proc/interrupts
   3115 *
   3116 * return 1 if config change requires a restart, else return 0
   3117 */
   3118int snapshot_proc_interrupts(void)
   3119{
   3120	static FILE *fp;
   3121	int column, retval;
   3122
   3123	if (fp == NULL)
   3124		fp = fopen_or_die("/proc/interrupts", "r");
   3125	else
   3126		rewind(fp);
   3127
   3128	/* read 1st line of /proc/interrupts to get cpu* name for each column */
   3129	for (column = 0; column < topo.num_cpus; ++column) {
   3130		int cpu_number;
   3131
   3132		retval = fscanf(fp, " CPU%d", &cpu_number);
   3133		if (retval != 1)
   3134			break;
   3135
   3136		if (cpu_number > topo.max_cpu_num) {
   3137			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
   3138			return 1;
   3139		}
   3140
   3141		irq_column_2_cpu[column] = cpu_number;
   3142		irqs_per_cpu[cpu_number] = 0;
   3143	}
   3144
   3145	/* read /proc/interrupt count lines and sum up irqs per cpu */
   3146	while (1) {
   3147		int column;
   3148		char buf[64];
   3149
   3150		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
   3151		if (retval != 1)
   3152			break;
   3153
   3154		/* read the count per cpu */
   3155		for (column = 0; column < topo.num_cpus; ++column) {
   3156
   3157			int cpu_number, irq_count;
   3158
   3159			retval = fscanf(fp, " %d", &irq_count);
   3160			if (retval != 1)
   3161				break;
   3162
   3163			cpu_number = irq_column_2_cpu[column];
   3164			irqs_per_cpu[cpu_number] += irq_count;
   3165
   3166		}
   3167
   3168		while (getc(fp) != '\n') ;	/* flush interrupt description */
   3169
   3170	}
   3171	return 0;
   3172}
   3173
   3174/*
   3175 * snapshot_gfx_rc6_ms()
   3176 *
   3177 * record snapshot of
   3178 * /sys/class/drm/card0/power/rc6_residency_ms
   3179 *
   3180 * return 1 if config change requires a restart, else return 0
   3181 */
   3182int snapshot_gfx_rc6_ms(void)
   3183{
   3184	FILE *fp;
   3185	int retval;
   3186
   3187	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
   3188
   3189	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
   3190	if (retval != 1)
   3191		err(1, "GFX rc6");
   3192
   3193	fclose(fp);
   3194
   3195	return 0;
   3196}
   3197
   3198/*
   3199 * snapshot_gfx_mhz()
   3200 *
   3201 * record snapshot of
   3202 * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
   3203 *
   3204 * return 1 if config change requires a restart, else return 0
   3205 */
   3206int snapshot_gfx_mhz(void)
   3207{
   3208	static FILE *fp;
   3209	int retval;
   3210
   3211	if (fp == NULL)
   3212		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
   3213	else {
   3214		rewind(fp);
   3215		fflush(fp);
   3216	}
   3217
   3218	retval = fscanf(fp, "%d", &gfx_cur_mhz);
   3219	if (retval != 1)
   3220		err(1, "GFX MHz");
   3221
   3222	return 0;
   3223}
   3224
   3225/*
   3226 * snapshot_gfx_cur_mhz()
   3227 *
   3228 * record snapshot of
   3229 * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
   3230 *
   3231 * return 1 if config change requires a restart, else return 0
   3232 */
   3233int snapshot_gfx_act_mhz(void)
   3234{
   3235	static FILE *fp;
   3236	int retval;
   3237
   3238	if (fp == NULL)
   3239		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
   3240	else {
   3241		rewind(fp);
   3242		fflush(fp);
   3243	}
   3244
   3245	retval = fscanf(fp, "%d", &gfx_act_mhz);
   3246	if (retval != 1)
   3247		err(1, "GFX ACT MHz");
   3248
   3249	return 0;
   3250}
   3251
   3252/*
   3253 * snapshot_cpu_lpi()
   3254 *
   3255 * record snapshot of
   3256 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
   3257 */
   3258int snapshot_cpu_lpi_us(void)
   3259{
   3260	FILE *fp;
   3261	int retval;
   3262
   3263	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
   3264
   3265	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
   3266	if (retval != 1) {
   3267		fprintf(stderr, "Disabling Low Power Idle CPU output\n");
   3268		BIC_NOT_PRESENT(BIC_CPU_LPI);
   3269		fclose(fp);
   3270		return -1;
   3271	}
   3272
   3273	fclose(fp);
   3274
   3275	return 0;
   3276}
   3277
   3278/*
   3279 * snapshot_sys_lpi()
   3280 *
   3281 * record snapshot of sys_lpi_file
   3282 */
   3283int snapshot_sys_lpi_us(void)
   3284{
   3285	FILE *fp;
   3286	int retval;
   3287
   3288	fp = fopen_or_die(sys_lpi_file, "r");
   3289
   3290	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
   3291	if (retval != 1) {
   3292		fprintf(stderr, "Disabling Low Power Idle System output\n");
   3293		BIC_NOT_PRESENT(BIC_SYS_LPI);
   3294		fclose(fp);
   3295		return -1;
   3296	}
   3297	fclose(fp);
   3298
   3299	return 0;
   3300}
   3301
   3302/*
   3303 * snapshot /proc and /sys files
   3304 *
   3305 * return 1 if configuration restart needed, else return 0
   3306 */
   3307int snapshot_proc_sysfs_files(void)
   3308{
   3309	if (DO_BIC(BIC_IRQ))
   3310		if (snapshot_proc_interrupts())
   3311			return 1;
   3312
   3313	if (DO_BIC(BIC_GFX_rc6))
   3314		snapshot_gfx_rc6_ms();
   3315
   3316	if (DO_BIC(BIC_GFXMHz))
   3317		snapshot_gfx_mhz();
   3318
   3319	if (DO_BIC(BIC_GFXACTMHz))
   3320		snapshot_gfx_act_mhz();
   3321
   3322	if (DO_BIC(BIC_CPU_LPI))
   3323		snapshot_cpu_lpi_us();
   3324
   3325	if (DO_BIC(BIC_SYS_LPI))
   3326		snapshot_sys_lpi_us();
   3327
   3328	return 0;
   3329}
   3330
   3331int exit_requested;
   3332
   3333static void signal_handler(int signal)
   3334{
   3335	switch (signal) {
   3336	case SIGINT:
   3337		exit_requested = 1;
   3338		if (debug)
   3339			fprintf(stderr, " SIGINT\n");
   3340		break;
   3341	case SIGUSR1:
   3342		if (debug > 1)
   3343			fprintf(stderr, "SIGUSR1\n");
   3344		break;
   3345	}
   3346}
   3347
   3348void setup_signal_handler(void)
   3349{
   3350	struct sigaction sa;
   3351
   3352	memset(&sa, 0, sizeof(sa));
   3353
   3354	sa.sa_handler = &signal_handler;
   3355
   3356	if (sigaction(SIGINT, &sa, NULL) < 0)
   3357		err(1, "sigaction SIGINT");
   3358	if (sigaction(SIGUSR1, &sa, NULL) < 0)
   3359		err(1, "sigaction SIGUSR1");
   3360}
   3361
   3362void do_sleep(void)
   3363{
   3364	struct timeval tout;
   3365	struct timespec rest;
   3366	fd_set readfds;
   3367	int retval;
   3368
   3369	FD_ZERO(&readfds);
   3370	FD_SET(0, &readfds);
   3371
   3372	if (ignore_stdin) {
   3373		nanosleep(&interval_ts, NULL);
   3374		return;
   3375	}
   3376
   3377	tout = interval_tv;
   3378	retval = select(1, &readfds, NULL, NULL, &tout);
   3379
   3380	if (retval == 1) {
   3381		switch (getc(stdin)) {
   3382		case 'q':
   3383			exit_requested = 1;
   3384			break;
   3385		case EOF:
   3386			/*
   3387			 * 'stdin' is a pipe closed on the other end. There
   3388			 * won't be any further input.
   3389			 */
   3390			ignore_stdin = 1;
   3391			/* Sleep the rest of the time */
   3392			rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
   3393			rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
   3394			nanosleep(&rest, NULL);
   3395		}
   3396	}
   3397}
   3398
   3399int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
   3400{
   3401	int ret, idx;
   3402	unsigned long long msr_cur, msr_last;
   3403
   3404	if (!per_cpu_msr_sum)
   3405		return 1;
   3406
   3407	idx = offset_to_idx(offset);
   3408	if (idx < 0)
   3409		return idx;
   3410	/* get_msr_sum() = sum + (get_msr() - last) */
   3411	ret = get_msr(cpu, offset, &msr_cur);
   3412	if (ret)
   3413		return ret;
   3414	msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
   3415	DELTA_WRAP32(msr_cur, msr_last);
   3416	*msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
   3417
   3418	return 0;
   3419}
   3420
   3421timer_t timerid;
   3422
   3423/* Timer callback, update the sum of MSRs periodically. */
   3424static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   3425{
   3426	int i, ret;
   3427	int cpu = t->cpu_id;
   3428
   3429	UNUSED(c);
   3430	UNUSED(p);
   3431
   3432	for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
   3433		unsigned long long msr_cur, msr_last;
   3434		off_t offset;
   3435
   3436		if (!idx_valid(i))
   3437			continue;
   3438		offset = idx_to_offset(i);
   3439		if (offset < 0)
   3440			continue;
   3441		ret = get_msr(cpu, offset, &msr_cur);
   3442		if (ret) {
   3443			fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
   3444			continue;
   3445		}
   3446
   3447		msr_last = per_cpu_msr_sum[cpu].entries[i].last;
   3448		per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
   3449
   3450		DELTA_WRAP32(msr_cur, msr_last);
   3451		per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
   3452	}
   3453	return 0;
   3454}
   3455
   3456static void msr_record_handler(union sigval v)
   3457{
   3458	UNUSED(v);
   3459
   3460	for_all_cpus(update_msr_sum, EVEN_COUNTERS);
   3461}
   3462
   3463void msr_sum_record(void)
   3464{
   3465	struct itimerspec its;
   3466	struct sigevent sev;
   3467
   3468	per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
   3469	if (!per_cpu_msr_sum) {
   3470		fprintf(outf, "Can not allocate memory for long time MSR.\n");
   3471		return;
   3472	}
   3473	/*
   3474	 * Signal handler might be restricted, so use thread notifier instead.
   3475	 */
   3476	memset(&sev, 0, sizeof(struct sigevent));
   3477	sev.sigev_notify = SIGEV_THREAD;
   3478	sev.sigev_notify_function = msr_record_handler;
   3479
   3480	sev.sigev_value.sival_ptr = &timerid;
   3481	if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
   3482		fprintf(outf, "Can not create timer.\n");
   3483		goto release_msr;
   3484	}
   3485
   3486	its.it_value.tv_sec = 0;
   3487	its.it_value.tv_nsec = 1;
   3488	/*
   3489	 * A wraparound time has been calculated early.
   3490	 * Some sources state that the peak power for a
   3491	 * microprocessor is usually 1.5 times the TDP rating,
   3492	 * use 2 * TDP for safety.
   3493	 */
   3494	its.it_interval.tv_sec = rapl_joule_counter_range / 2;
   3495	its.it_interval.tv_nsec = 0;
   3496
   3497	if (timer_settime(timerid, 0, &its, NULL) == -1) {
   3498		fprintf(outf, "Can not set timer.\n");
   3499		goto release_timer;
   3500	}
   3501	return;
   3502
   3503release_timer:
   3504	timer_delete(timerid);
   3505release_msr:
   3506	free(per_cpu_msr_sum);
   3507}
   3508
   3509/*
   3510 * set_my_sched_priority(pri)
   3511 * return previous
   3512 *
   3513 * if non-root, do this:
   3514 * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat
   3515 */
   3516int set_my_sched_priority(int priority)
   3517{
   3518	int retval;
   3519	int original_priority;
   3520
   3521	errno = 0;
   3522	original_priority = getpriority(PRIO_PROCESS, 0);
   3523	if (errno && (original_priority == -1))
   3524		err(errno, "getpriority");
   3525
   3526	retval = setpriority(PRIO_PROCESS, 0, priority);
   3527	if (retval)
   3528		err(retval, "setpriority(%d)", priority);
   3529
   3530	errno = 0;
   3531	retval = getpriority(PRIO_PROCESS, 0);
   3532	if (retval != priority)
   3533		err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
   3534
   3535	return original_priority;
   3536}
   3537
   3538void turbostat_loop()
   3539{
   3540	int retval;
   3541	int restarted = 0;
   3542	unsigned int done_iters = 0;
   3543
   3544	setup_signal_handler();
   3545
   3546	/*
   3547	 * elevate own priority for interval mode
   3548	 */
   3549	set_my_sched_priority(-20);
   3550
   3551restart:
   3552	restarted++;
   3553
   3554	snapshot_proc_sysfs_files();
   3555	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
   3556	first_counter_read = 0;
   3557	if (retval < -1) {
   3558		exit(retval);
   3559	} else if (retval == -1) {
   3560		if (restarted > 10) {
   3561			exit(retval);
   3562		}
   3563		re_initialize();
   3564		goto restart;
   3565	}
   3566	restarted = 0;
   3567	done_iters = 0;
   3568	gettimeofday(&tv_even, (struct timezone *)NULL);
   3569
   3570	while (1) {
   3571		if (for_all_proc_cpus(cpu_is_not_present)) {
   3572			re_initialize();
   3573			goto restart;
   3574		}
   3575		do_sleep();
   3576		if (snapshot_proc_sysfs_files())
   3577			goto restart;
   3578		retval = for_all_cpus(get_counters, ODD_COUNTERS);
   3579		if (retval < -1) {
   3580			exit(retval);
   3581		} else if (retval == -1) {
   3582			re_initialize();
   3583			goto restart;
   3584		}
   3585		gettimeofday(&tv_odd, (struct timezone *)NULL);
   3586		timersub(&tv_odd, &tv_even, &tv_delta);
   3587		if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
   3588			re_initialize();
   3589			goto restart;
   3590		}
   3591		compute_average(EVEN_COUNTERS);
   3592		format_all_counters(EVEN_COUNTERS);
   3593		flush_output_stdout();
   3594		if (exit_requested)
   3595			break;
   3596		if (num_iterations && ++done_iters >= num_iterations)
   3597			break;
   3598		do_sleep();
   3599		if (snapshot_proc_sysfs_files())
   3600			goto restart;
   3601		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
   3602		if (retval < -1) {
   3603			exit(retval);
   3604		} else if (retval == -1) {
   3605			re_initialize();
   3606			goto restart;
   3607		}
   3608		gettimeofday(&tv_even, (struct timezone *)NULL);
   3609		timersub(&tv_even, &tv_odd, &tv_delta);
   3610		if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
   3611			re_initialize();
   3612			goto restart;
   3613		}
   3614		compute_average(ODD_COUNTERS);
   3615		format_all_counters(ODD_COUNTERS);
   3616		flush_output_stdout();
   3617		if (exit_requested)
   3618			break;
   3619		if (num_iterations && ++done_iters >= num_iterations)
   3620			break;
   3621	}
   3622}
   3623
   3624void check_dev_msr()
   3625{
   3626	struct stat sb;
   3627	char pathname[32];
   3628
   3629	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
   3630	if (stat(pathname, &sb))
   3631		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
   3632			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
   3633}
   3634
   3635/*
   3636 * check for CAP_SYS_RAWIO
   3637 * return 0 on success
   3638 * return 1 on fail
   3639 */
   3640int check_for_cap_sys_rawio(void)
   3641{
   3642	cap_t caps;
   3643	cap_flag_value_t cap_flag_value;
   3644
   3645	caps = cap_get_proc();
   3646	if (caps == NULL)
   3647		err(-6, "cap_get_proc\n");
   3648
   3649	if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
   3650		err(-6, "cap_get\n");
   3651
   3652	if (cap_flag_value != CAP_SET) {
   3653		warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
   3654		return 1;
   3655	}
   3656
   3657	if (cap_free(caps) == -1)
   3658		err(-6, "cap_free\n");
   3659
   3660	return 0;
   3661}
   3662
   3663void check_permissions(void)
   3664{
   3665	int do_exit = 0;
   3666	char pathname[32];
   3667
   3668	/* check for CAP_SYS_RAWIO */
   3669	do_exit += check_for_cap_sys_rawio();
   3670
   3671	/* test file permissions */
   3672	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
   3673	if (euidaccess(pathname, R_OK)) {
   3674		do_exit++;
   3675		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
   3676	}
   3677
   3678	/* if all else fails, thell them to be root */
   3679	if (do_exit)
   3680		if (getuid() != 0)
   3681			warnx("... or simply run as root");
   3682
   3683	if (do_exit)
   3684		exit(-6);
   3685}
   3686
   3687/*
   3688 * NHM adds support for additional MSRs:
   3689 *
   3690 * MSR_SMI_COUNT                   0x00000034
   3691 *
   3692 * MSR_PLATFORM_INFO               0x000000ce
   3693 * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
   3694 *
   3695 * MSR_MISC_PWR_MGMT               0x000001aa
   3696 *
   3697 * MSR_PKG_C3_RESIDENCY            0x000003f8
   3698 * MSR_PKG_C6_RESIDENCY            0x000003f9
   3699 * MSR_CORE_C3_RESIDENCY           0x000003fc
   3700 * MSR_CORE_C6_RESIDENCY           0x000003fd
   3701 *
   3702 * Side effect:
   3703 * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
   3704 * sets has_misc_feature_control
   3705 */
   3706int probe_nhm_msrs(unsigned int family, unsigned int model)
   3707{
   3708	unsigned long long msr;
   3709	unsigned int base_ratio;
   3710	int *pkg_cstate_limits;
   3711
   3712	if (!genuine_intel)
   3713		return 0;
   3714
   3715	if (family != 6)
   3716		return 0;
   3717
   3718	bclk = discover_bclk(family, model);
   3719
   3720	switch (model) {
   3721	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
   3722	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
   3723		pkg_cstate_limits = nhm_pkg_cstate_limits;
   3724		break;
   3725	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
   3726	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
   3727	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
   3728	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
   3729		pkg_cstate_limits = snb_pkg_cstate_limits;
   3730		has_misc_feature_control = 1;
   3731		break;
   3732	case INTEL_FAM6_HASWELL:	/* HSW */
   3733	case INTEL_FAM6_HASWELL_G:	/* HSW */
   3734	case INTEL_FAM6_HASWELL_X:	/* HSX */
   3735	case INTEL_FAM6_HASWELL_L:	/* HSW */
   3736	case INTEL_FAM6_BROADWELL:	/* BDW */
   3737	case INTEL_FAM6_BROADWELL_G:	/* BDW */
   3738	case INTEL_FAM6_BROADWELL_X:	/* BDX */
   3739	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
   3740	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
   3741		pkg_cstate_limits = hsw_pkg_cstate_limits;
   3742		has_misc_feature_control = 1;
   3743		break;
   3744	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
   3745		pkg_cstate_limits = skx_pkg_cstate_limits;
   3746		has_misc_feature_control = 1;
   3747		break;
   3748	case INTEL_FAM6_ICELAKE_X:	/* ICX */
   3749		pkg_cstate_limits = icx_pkg_cstate_limits;
   3750		has_misc_feature_control = 1;
   3751		break;
   3752	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
   3753		no_MSR_MISC_PWR_MGMT = 1;
   3754		/* FALLTHRU */
   3755	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
   3756		pkg_cstate_limits = slv_pkg_cstate_limits;
   3757		break;
   3758	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
   3759		pkg_cstate_limits = amt_pkg_cstate_limits;
   3760		no_MSR_MISC_PWR_MGMT = 1;
   3761		break;
   3762	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
   3763		pkg_cstate_limits = phi_pkg_cstate_limits;
   3764		break;
   3765	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
   3766	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
   3767	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
   3768	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
   3769	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
   3770		pkg_cstate_limits = glm_pkg_cstate_limits;
   3771		break;
   3772	default:
   3773		return 0;
   3774	}
   3775	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
   3776	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
   3777
   3778	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
   3779	base_ratio = (msr >> 8) & 0xFF;
   3780
   3781	base_hz = base_ratio * bclk * 1000000;
   3782	has_base_hz = 1;
   3783	return 1;
   3784}
   3785
   3786/*
   3787 * SLV client has support for unique MSRs:
   3788 *
   3789 * MSR_CC6_DEMOTION_POLICY_CONFIG
   3790 * MSR_MC6_DEMOTION_POLICY_CONFIG
   3791 */
   3792
   3793int has_slv_msrs(unsigned int family, unsigned int model)
   3794{
   3795	if (!genuine_intel)
   3796		return 0;
   3797
   3798	if (family != 6)
   3799		return 0;
   3800
   3801	switch (model) {
   3802	case INTEL_FAM6_ATOM_SILVERMONT:
   3803	case INTEL_FAM6_ATOM_SILVERMONT_MID:
   3804	case INTEL_FAM6_ATOM_AIRMONT_MID:
   3805		return 1;
   3806	}
   3807	return 0;
   3808}
   3809
   3810int is_dnv(unsigned int family, unsigned int model)
   3811{
   3812
   3813	if (!genuine_intel)
   3814		return 0;
   3815
   3816	if (family != 6)
   3817		return 0;
   3818
   3819	switch (model) {
   3820	case INTEL_FAM6_ATOM_GOLDMONT_D:
   3821		return 1;
   3822	}
   3823	return 0;
   3824}
   3825
   3826int is_bdx(unsigned int family, unsigned int model)
   3827{
   3828
   3829	if (!genuine_intel)
   3830		return 0;
   3831
   3832	if (family != 6)
   3833		return 0;
   3834
   3835	switch (model) {
   3836	case INTEL_FAM6_BROADWELL_X:
   3837		return 1;
   3838	}
   3839	return 0;
   3840}
   3841
   3842int is_skx(unsigned int family, unsigned int model)
   3843{
   3844
   3845	if (!genuine_intel)
   3846		return 0;
   3847
   3848	if (family != 6)
   3849		return 0;
   3850
   3851	switch (model) {
   3852	case INTEL_FAM6_SKYLAKE_X:
   3853		return 1;
   3854	}
   3855	return 0;
   3856}
   3857
   3858int is_icx(unsigned int family, unsigned int model)
   3859{
   3860
   3861	if (!genuine_intel)
   3862		return 0;
   3863
   3864	if (family != 6)
   3865		return 0;
   3866
   3867	switch (model) {
   3868	case INTEL_FAM6_ICELAKE_X:
   3869		return 1;
   3870	}
   3871	return 0;
   3872}
   3873
   3874int is_ehl(unsigned int family, unsigned int model)
   3875{
   3876	if (!genuine_intel)
   3877		return 0;
   3878
   3879	if (family != 6)
   3880		return 0;
   3881
   3882	switch (model) {
   3883	case INTEL_FAM6_ATOM_TREMONT:
   3884		return 1;
   3885	}
   3886	return 0;
   3887}
   3888
   3889int is_jvl(unsigned int family, unsigned int model)
   3890{
   3891	if (!genuine_intel)
   3892		return 0;
   3893
   3894	if (family != 6)
   3895		return 0;
   3896
   3897	switch (model) {
   3898	case INTEL_FAM6_ATOM_TREMONT_D:
   3899		return 1;
   3900	}
   3901	return 0;
   3902}
   3903
   3904int has_turbo_ratio_limit(unsigned int family, unsigned int model)
   3905{
   3906	if (has_slv_msrs(family, model))
   3907		return 0;
   3908
   3909	if (family != 6)
   3910		return 0;
   3911
   3912	switch (model) {
   3913		/* Nehalem compatible, but do not include turbo-ratio limit support */
   3914	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
   3915	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
   3916		return 0;
   3917	default:
   3918		return 1;
   3919	}
   3920}
   3921
   3922int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
   3923{
   3924	if (has_slv_msrs(family, model))
   3925		return 1;
   3926
   3927	return 0;
   3928}
   3929
   3930int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
   3931{
   3932	if (!genuine_intel)
   3933		return 0;
   3934
   3935	if (family != 6)
   3936		return 0;
   3937
   3938	switch (model) {
   3939	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
   3940	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
   3941		return 1;
   3942	default:
   3943		return 0;
   3944	}
   3945}
   3946
   3947int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
   3948{
   3949	if (!genuine_intel)
   3950		return 0;
   3951
   3952	if (family != 6)
   3953		return 0;
   3954
   3955	switch (model) {
   3956	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
   3957		return 1;
   3958	default:
   3959		return 0;
   3960	}
   3961}
   3962
   3963int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
   3964{
   3965	if (!genuine_intel)
   3966		return 0;
   3967
   3968	if (family != 6)
   3969		return 0;
   3970
   3971	switch (model) {
   3972	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
   3973		return 1;
   3974	default:
   3975		return 0;
   3976	}
   3977}
   3978
   3979int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
   3980{
   3981	if (!genuine_intel)
   3982		return 0;
   3983
   3984	if (family != 6)
   3985		return 0;
   3986
   3987	switch (model) {
   3988	case INTEL_FAM6_ATOM_GOLDMONT:
   3989	case INTEL_FAM6_SKYLAKE_X:
   3990	case INTEL_FAM6_ICELAKE_X:
   3991		return 1;
   3992	default:
   3993		return 0;
   3994	}
   3995}
   3996
   3997int has_config_tdp(unsigned int family, unsigned int model)
   3998{
   3999	if (!genuine_intel)
   4000		return 0;
   4001
   4002	if (family != 6)
   4003		return 0;
   4004
   4005	switch (model) {
   4006	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
   4007	case INTEL_FAM6_HASWELL:	/* HSW */
   4008	case INTEL_FAM6_HASWELL_X:	/* HSX */
   4009	case INTEL_FAM6_HASWELL_L:	/* HSW */
   4010	case INTEL_FAM6_HASWELL_G:	/* HSW */
   4011	case INTEL_FAM6_BROADWELL:	/* BDW */
   4012	case INTEL_FAM6_BROADWELL_G:	/* BDW */
   4013	case INTEL_FAM6_BROADWELL_X:	/* BDX */
   4014	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
   4015	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
   4016	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
   4017	case INTEL_FAM6_ICELAKE_X:	/* ICX */
   4018
   4019	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
   4020		return 1;
   4021	default:
   4022		return 0;
   4023	}
   4024}
   4025
   4026/*
   4027 * tcc_offset_bits:
   4028 * 0: Tcc Offset not supported (Default)
   4029 * 6: Bit 29:24 of MSR_PLATFORM_INFO
   4030 * 4: Bit 27:24 of MSR_PLATFORM_INFO
   4031 */
   4032void check_tcc_offset(int model)
   4033{
   4034	unsigned long long msr;
   4035
   4036	if (!genuine_intel)
   4037		return;
   4038
   4039	switch (model) {
   4040	case INTEL_FAM6_SKYLAKE_L:
   4041	case INTEL_FAM6_SKYLAKE:
   4042	case INTEL_FAM6_KABYLAKE_L:
   4043	case INTEL_FAM6_KABYLAKE:
   4044	case INTEL_FAM6_ICELAKE_L:
   4045	case INTEL_FAM6_ICELAKE:
   4046	case INTEL_FAM6_TIGERLAKE_L:
   4047	case INTEL_FAM6_TIGERLAKE:
   4048	case INTEL_FAM6_COMETLAKE:
   4049		if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
   4050			msr = (msr >> 30) & 1;
   4051			if (msr)
   4052				tcc_offset_bits = 6;
   4053		}
   4054		return;
   4055	default:
   4056		return;
   4057	}
   4058}
   4059
   4060static void remove_underbar(char *s)
   4061{
   4062	char *to = s;
   4063
   4064	while (*s) {
   4065		if (*s != '_')
   4066			*to++ = *s;
   4067		s++;
   4068	}
   4069
   4070	*to = 0;
   4071}
   4072
   4073static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
   4074{
   4075	if (!do_nhm_platform_info)
   4076		return;
   4077
   4078	dump_nhm_platform_info();
   4079
   4080	if (has_hsw_turbo_ratio_limit(family, model))
   4081		dump_hsw_turbo_ratio_limits();
   4082
   4083	if (has_ivt_turbo_ratio_limit(family, model))
   4084		dump_ivt_turbo_ratio_limits();
   4085
   4086	if (has_turbo_ratio_limit(family, model))
   4087		dump_turbo_ratio_limits(family, model);
   4088
   4089	if (has_atom_turbo_ratio_limit(family, model))
   4090		dump_atom_turbo_ratio_limits();
   4091
   4092	if (has_knl_turbo_ratio_limit(family, model))
   4093		dump_knl_turbo_ratio_limits();
   4094
   4095	if (has_config_tdp(family, model))
   4096		dump_config_tdp();
   4097
   4098	dump_nhm_cst_cfg();
   4099}
   4100
   4101static void dump_sysfs_file(char *path)
   4102{
   4103	FILE *input;
   4104	char cpuidle_buf[64];
   4105
   4106	input = fopen(path, "r");
   4107	if (input == NULL) {
   4108		if (debug)
   4109			fprintf(outf, "NSFOD %s\n", path);
   4110		return;
   4111	}
   4112	if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
   4113		err(1, "%s: failed to read file", path);
   4114	fclose(input);
   4115
   4116	fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
   4117}
   4118
   4119static void dump_sysfs_cstate_config(void)
   4120{
   4121	char path[64];
   4122	char name_buf[16];
   4123	char desc[64];
   4124	FILE *input;
   4125	int state;
   4126	char *sp;
   4127
   4128	if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
   4129		fprintf(outf, "cpuidle not loaded\n");
   4130		return;
   4131	}
   4132
   4133	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
   4134	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
   4135	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
   4136
   4137	for (state = 0; state < 10; ++state) {
   4138
   4139		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
   4140		input = fopen(path, "r");
   4141		if (input == NULL)
   4142			continue;
   4143		if (!fgets(name_buf, sizeof(name_buf), input))
   4144			err(1, "%s: failed to read file", path);
   4145
   4146		/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
   4147		sp = strchr(name_buf, '-');
   4148		if (!sp)
   4149			sp = strchrnul(name_buf, '\n');
   4150		*sp = '\0';
   4151		fclose(input);
   4152
   4153		remove_underbar(name_buf);
   4154
   4155		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
   4156		input = fopen(path, "r");
   4157		if (input == NULL)
   4158			continue;
   4159		if (!fgets(desc, sizeof(desc), input))
   4160			err(1, "%s: failed to read file", path);
   4161
   4162		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
   4163		fclose(input);
   4164	}
   4165}
   4166
   4167static void dump_sysfs_pstate_config(void)
   4168{
   4169	char path[64];
   4170	char driver_buf[64];
   4171	char governor_buf[64];
   4172	FILE *input;
   4173	int turbo;
   4174
   4175	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
   4176	input = fopen(path, "r");
   4177	if (input == NULL) {
   4178		fprintf(outf, "NSFOD %s\n", path);
   4179		return;
   4180	}
   4181	if (!fgets(driver_buf, sizeof(driver_buf), input))
   4182		err(1, "%s: failed to read file", path);
   4183	fclose(input);
   4184
   4185	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
   4186	input = fopen(path, "r");
   4187	if (input == NULL) {
   4188		fprintf(outf, "NSFOD %s\n", path);
   4189		return;
   4190	}
   4191	if (!fgets(governor_buf, sizeof(governor_buf), input))
   4192		err(1, "%s: failed to read file", path);
   4193	fclose(input);
   4194
   4195	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
   4196	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
   4197
   4198	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
   4199	input = fopen(path, "r");
   4200	if (input != NULL) {
   4201		if (fscanf(input, "%d", &turbo) != 1)
   4202			err(1, "%s: failed to parse number from file", path);
   4203		fprintf(outf, "cpufreq boost: %d\n", turbo);
   4204		fclose(input);
   4205	}
   4206
   4207	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
   4208	input = fopen(path, "r");
   4209	if (input != NULL) {
   4210		if (fscanf(input, "%d", &turbo) != 1)
   4211			err(1, "%s: failed to parse number from file", path);
   4212		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
   4213		fclose(input);
   4214	}
   4215}
   4216
   4217/*
   4218 * print_epb()
   4219 * Decode the ENERGY_PERF_BIAS MSR
   4220 */
   4221int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   4222{
   4223	char *epb_string;
   4224	int cpu, epb;
   4225
   4226	UNUSED(c);
   4227	UNUSED(p);
   4228
   4229	if (!has_epb)
   4230		return 0;
   4231
   4232	cpu = t->cpu_id;
   4233
   4234	/* EPB is per-package */
   4235	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   4236		return 0;
   4237
   4238	if (cpu_migrate(cpu)) {
   4239		fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
   4240		return -1;
   4241	}
   4242
   4243	epb = get_epb(cpu);
   4244	if (epb < 0)
   4245		return 0;
   4246
   4247	switch (epb) {
   4248	case ENERGY_PERF_BIAS_PERFORMANCE:
   4249		epb_string = "performance";
   4250		break;
   4251	case ENERGY_PERF_BIAS_NORMAL:
   4252		epb_string = "balanced";
   4253		break;
   4254	case ENERGY_PERF_BIAS_POWERSAVE:
   4255		epb_string = "powersave";
   4256		break;
   4257	default:
   4258		epb_string = "custom";
   4259		break;
   4260	}
   4261	fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
   4262
   4263	return 0;
   4264}
   4265
   4266/*
   4267 * print_hwp()
   4268 * Decode the MSR_HWP_CAPABILITIES
   4269 */
   4270int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   4271{
   4272	unsigned long long msr;
   4273	int cpu;
   4274
   4275	UNUSED(c);
   4276	UNUSED(p);
   4277
   4278	if (!has_hwp)
   4279		return 0;
   4280
   4281	cpu = t->cpu_id;
   4282
   4283	/* MSR_HWP_CAPABILITIES is per-package */
   4284	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   4285		return 0;
   4286
   4287	if (cpu_migrate(cpu)) {
   4288		fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
   4289		return -1;
   4290	}
   4291
   4292	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
   4293		return 0;
   4294
   4295	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
   4296
   4297	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
   4298	if ((msr & (1 << 0)) == 0)
   4299		return 0;
   4300
   4301	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
   4302		return 0;
   4303
   4304	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
   4305		"(high %d guar %d eff %d low %d)\n",
   4306		cpu, msr,
   4307		(unsigned int)HWP_HIGHEST_PERF(msr),
   4308		(unsigned int)HWP_GUARANTEED_PERF(msr),
   4309		(unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
   4310
   4311	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
   4312		return 0;
   4313
   4314	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
   4315		"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
   4316		cpu, msr,
   4317		(unsigned int)(((msr) >> 0) & 0xff),
   4318		(unsigned int)(((msr) >> 8) & 0xff),
   4319		(unsigned int)(((msr) >> 16) & 0xff),
   4320		(unsigned int)(((msr) >> 24) & 0xff),
   4321		(unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
   4322
   4323	if (has_hwp_pkg) {
   4324		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
   4325			return 0;
   4326
   4327		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
   4328			"(min %d max %d des %d epp 0x%x window 0x%x)\n",
   4329			cpu, msr,
   4330			(unsigned int)(((msr) >> 0) & 0xff),
   4331			(unsigned int)(((msr) >> 8) & 0xff),
   4332			(unsigned int)(((msr) >> 16) & 0xff),
   4333			(unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
   4334	}
   4335	if (has_hwp_notify) {
   4336		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
   4337			return 0;
   4338
   4339		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
   4340			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
   4341			cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
   4342	}
   4343	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
   4344		return 0;
   4345
   4346	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
   4347		"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
   4348		cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-");
   4349
   4350	return 0;
   4351}
   4352
   4353/*
   4354 * print_perf_limit()
   4355 */
   4356int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   4357{
   4358	unsigned long long msr;
   4359	int cpu;
   4360
   4361	UNUSED(c);
   4362	UNUSED(p);
   4363
   4364	cpu = t->cpu_id;
   4365
   4366	/* per-package */
   4367	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   4368		return 0;
   4369
   4370	if (cpu_migrate(cpu)) {
   4371		fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
   4372		return -1;
   4373	}
   4374
   4375	if (do_core_perf_limit_reasons) {
   4376		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
   4377		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
   4378		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
   4379			(msr & 1 << 15) ? "bit15, " : "",
   4380			(msr & 1 << 14) ? "bit14, " : "",
   4381			(msr & 1 << 13) ? "Transitions, " : "",
   4382			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
   4383			(msr & 1 << 11) ? "PkgPwrL2, " : "",
   4384			(msr & 1 << 10) ? "PkgPwrL1, " : "",
   4385			(msr & 1 << 9) ? "CorePwr, " : "",
   4386			(msr & 1 << 8) ? "Amps, " : "",
   4387			(msr & 1 << 6) ? "VR-Therm, " : "",
   4388			(msr & 1 << 5) ? "Auto-HWP, " : "",
   4389			(msr & 1 << 4) ? "Graphics, " : "",
   4390			(msr & 1 << 2) ? "bit2, " : "",
   4391			(msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
   4392		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
   4393			(msr & 1 << 31) ? "bit31, " : "",
   4394			(msr & 1 << 30) ? "bit30, " : "",
   4395			(msr & 1 << 29) ? "Transitions, " : "",
   4396			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
   4397			(msr & 1 << 27) ? "PkgPwrL2, " : "",
   4398			(msr & 1 << 26) ? "PkgPwrL1, " : "",
   4399			(msr & 1 << 25) ? "CorePwr, " : "",
   4400			(msr & 1 << 24) ? "Amps, " : "",
   4401			(msr & 1 << 22) ? "VR-Therm, " : "",
   4402			(msr & 1 << 21) ? "Auto-HWP, " : "",
   4403			(msr & 1 << 20) ? "Graphics, " : "",
   4404			(msr & 1 << 18) ? "bit18, " : "",
   4405			(msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
   4406
   4407	}
   4408	if (do_gfx_perf_limit_reasons) {
   4409		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
   4410		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
   4411		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
   4412			(msr & 1 << 0) ? "PROCHOT, " : "",
   4413			(msr & 1 << 1) ? "ThermStatus, " : "",
   4414			(msr & 1 << 4) ? "Graphics, " : "",
   4415			(msr & 1 << 6) ? "VR-Therm, " : "",
   4416			(msr & 1 << 8) ? "Amps, " : "",
   4417			(msr & 1 << 9) ? "GFXPwr, " : "",
   4418			(msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
   4419		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
   4420			(msr & 1 << 16) ? "PROCHOT, " : "",
   4421			(msr & 1 << 17) ? "ThermStatus, " : "",
   4422			(msr & 1 << 20) ? "Graphics, " : "",
   4423			(msr & 1 << 22) ? "VR-Therm, " : "",
   4424			(msr & 1 << 24) ? "Amps, " : "",
   4425			(msr & 1 << 25) ? "GFXPwr, " : "",
   4426			(msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
   4427	}
   4428	if (do_ring_perf_limit_reasons) {
   4429		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
   4430		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
   4431		fprintf(outf, " (Active: %s%s%s%s%s%s)",
   4432			(msr & 1 << 0) ? "PROCHOT, " : "",
   4433			(msr & 1 << 1) ? "ThermStatus, " : "",
   4434			(msr & 1 << 6) ? "VR-Therm, " : "",
   4435			(msr & 1 << 8) ? "Amps, " : "",
   4436			(msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
   4437		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
   4438			(msr & 1 << 16) ? "PROCHOT, " : "",
   4439			(msr & 1 << 17) ? "ThermStatus, " : "",
   4440			(msr & 1 << 22) ? "VR-Therm, " : "",
   4441			(msr & 1 << 24) ? "Amps, " : "",
   4442			(msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
   4443	}
   4444	return 0;
   4445}
   4446
   4447#define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
   4448#define	RAPL_TIME_GRANULARITY	0x3F	/* 6 bit time granularity */
   4449
   4450double get_tdp_intel(unsigned int model)
   4451{
   4452	unsigned long long msr;
   4453
   4454	if (do_rapl & RAPL_PKG_POWER_INFO)
   4455		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
   4456			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
   4457
   4458	switch (model) {
   4459	case INTEL_FAM6_ATOM_SILVERMONT:
   4460	case INTEL_FAM6_ATOM_SILVERMONT_D:
   4461		return 30.0;
   4462	default:
   4463		return 135.0;
   4464	}
   4465}
   4466
   4467double get_tdp_amd(unsigned int family)
   4468{
   4469	UNUSED(family);
   4470
   4471	/* This is the max stock TDP of HEDT/Server Fam17h+ chips */
   4472	return 280.0;
   4473}
   4474
   4475/*
   4476 * rapl_dram_energy_units_probe()
   4477 * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
   4478 */
   4479static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
   4480{
   4481	/* only called for genuine_intel, family 6 */
   4482
   4483	switch (model) {
   4484	case INTEL_FAM6_HASWELL_X:	/* HSX */
   4485	case INTEL_FAM6_BROADWELL_X:	/* BDX */
   4486	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
   4487	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
   4488	case INTEL_FAM6_ICELAKE_X:	/* ICX */
   4489		return (rapl_dram_energy_units = 15.3 / 1000000);
   4490	default:
   4491		return (rapl_energy_units);
   4492	}
   4493}
   4494
   4495void rapl_probe_intel(unsigned int family, unsigned int model)
   4496{
   4497	unsigned long long msr;
   4498	unsigned int time_unit;
   4499	double tdp;
   4500
   4501	if (family != 6)
   4502		return;
   4503
   4504	switch (model) {
   4505	case INTEL_FAM6_SANDYBRIDGE:
   4506	case INTEL_FAM6_IVYBRIDGE:
   4507	case INTEL_FAM6_HASWELL:	/* HSW */
   4508	case INTEL_FAM6_HASWELL_L:	/* HSW */
   4509	case INTEL_FAM6_HASWELL_G:	/* HSW */
   4510	case INTEL_FAM6_BROADWELL:	/* BDW */
   4511	case INTEL_FAM6_BROADWELL_G:	/* BDW */
   4512		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
   4513		if (rapl_joules) {
   4514			BIC_PRESENT(BIC_Pkg_J);
   4515			BIC_PRESENT(BIC_Cor_J);
   4516			BIC_PRESENT(BIC_GFX_J);
   4517		} else {
   4518			BIC_PRESENT(BIC_PkgWatt);
   4519			BIC_PRESENT(BIC_CorWatt);
   4520			BIC_PRESENT(BIC_GFXWatt);
   4521		}
   4522		break;
   4523	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
   4524	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
   4525		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
   4526		if (rapl_joules)
   4527			BIC_PRESENT(BIC_Pkg_J);
   4528		else
   4529			BIC_PRESENT(BIC_PkgWatt);
   4530		break;
   4531	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
   4532		do_rapl =
   4533		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
   4534		    | RAPL_GFX | RAPL_PKG_POWER_INFO;
   4535		if (rapl_joules) {
   4536			BIC_PRESENT(BIC_Pkg_J);
   4537			BIC_PRESENT(BIC_Cor_J);
   4538			BIC_PRESENT(BIC_RAM_J);
   4539			BIC_PRESENT(BIC_GFX_J);
   4540		} else {
   4541			BIC_PRESENT(BIC_PkgWatt);
   4542			BIC_PRESENT(BIC_CorWatt);
   4543			BIC_PRESENT(BIC_RAMWatt);
   4544			BIC_PRESENT(BIC_GFXWatt);
   4545		}
   4546		break;
   4547	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
   4548		do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
   4549		BIC_PRESENT(BIC_PKG__);
   4550		if (rapl_joules)
   4551			BIC_PRESENT(BIC_Pkg_J);
   4552		else
   4553			BIC_PRESENT(BIC_PkgWatt);
   4554		break;
   4555	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
   4556	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
   4557		do_rapl =
   4558		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
   4559		    | RAPL_GFX | RAPL_PKG_POWER_INFO;
   4560		BIC_PRESENT(BIC_PKG__);
   4561		BIC_PRESENT(BIC_RAM__);
   4562		if (rapl_joules) {
   4563			BIC_PRESENT(BIC_Pkg_J);
   4564			BIC_PRESENT(BIC_Cor_J);
   4565			BIC_PRESENT(BIC_RAM_J);
   4566			BIC_PRESENT(BIC_GFX_J);
   4567		} else {
   4568			BIC_PRESENT(BIC_PkgWatt);
   4569			BIC_PRESENT(BIC_CorWatt);
   4570			BIC_PRESENT(BIC_RAMWatt);
   4571			BIC_PRESENT(BIC_GFXWatt);
   4572		}
   4573		break;
   4574	case INTEL_FAM6_HASWELL_X:	/* HSX */
   4575	case INTEL_FAM6_BROADWELL_X:	/* BDX */
   4576	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
   4577	case INTEL_FAM6_ICELAKE_X:	/* ICX */
   4578	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
   4579		do_rapl =
   4580		    RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
   4581		    RAPL_PKG_POWER_INFO;
   4582		BIC_PRESENT(BIC_PKG__);
   4583		BIC_PRESENT(BIC_RAM__);
   4584		if (rapl_joules) {
   4585			BIC_PRESENT(BIC_Pkg_J);
   4586			BIC_PRESENT(BIC_RAM_J);
   4587		} else {
   4588			BIC_PRESENT(BIC_PkgWatt);
   4589			BIC_PRESENT(BIC_RAMWatt);
   4590		}
   4591		break;
   4592	case INTEL_FAM6_SANDYBRIDGE_X:
   4593	case INTEL_FAM6_IVYBRIDGE_X:
   4594		do_rapl =
   4595		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
   4596		    RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
   4597		BIC_PRESENT(BIC_PKG__);
   4598		BIC_PRESENT(BIC_RAM__);
   4599		if (rapl_joules) {
   4600			BIC_PRESENT(BIC_Pkg_J);
   4601			BIC_PRESENT(BIC_Cor_J);
   4602			BIC_PRESENT(BIC_RAM_J);
   4603		} else {
   4604			BIC_PRESENT(BIC_PkgWatt);
   4605			BIC_PRESENT(BIC_CorWatt);
   4606			BIC_PRESENT(BIC_RAMWatt);
   4607		}
   4608		break;
   4609	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
   4610	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
   4611		do_rapl = RAPL_PKG | RAPL_CORES;
   4612		if (rapl_joules) {
   4613			BIC_PRESENT(BIC_Pkg_J);
   4614			BIC_PRESENT(BIC_Cor_J);
   4615		} else {
   4616			BIC_PRESENT(BIC_PkgWatt);
   4617			BIC_PRESENT(BIC_CorWatt);
   4618		}
   4619		break;
   4620	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
   4621		do_rapl =
   4622		    RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
   4623		    RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
   4624		BIC_PRESENT(BIC_PKG__);
   4625		BIC_PRESENT(BIC_RAM__);
   4626		if (rapl_joules) {
   4627			BIC_PRESENT(BIC_Pkg_J);
   4628			BIC_PRESENT(BIC_Cor_J);
   4629			BIC_PRESENT(BIC_RAM_J);
   4630		} else {
   4631			BIC_PRESENT(BIC_PkgWatt);
   4632			BIC_PRESENT(BIC_CorWatt);
   4633			BIC_PRESENT(BIC_RAMWatt);
   4634		}
   4635		break;
   4636	default:
   4637		return;
   4638	}
   4639
   4640	/* units on package 0, verify later other packages match */
   4641	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
   4642		return;
   4643
   4644	rapl_power_units = 1.0 / (1 << (msr & 0xF));
   4645	if (model == INTEL_FAM6_ATOM_SILVERMONT)
   4646		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
   4647	else
   4648		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
   4649
   4650	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
   4651
   4652	time_unit = msr >> 16 & 0xF;
   4653	if (time_unit == 0)
   4654		time_unit = 0xA;
   4655
   4656	rapl_time_units = 1.0 / (1 << (time_unit));
   4657
   4658	tdp = get_tdp_intel(model);
   4659
   4660	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
   4661	if (!quiet)
   4662		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
   4663}
   4664
   4665void rapl_probe_amd(unsigned int family, unsigned int model)
   4666{
   4667	unsigned long long msr;
   4668	unsigned int eax, ebx, ecx, edx;
   4669	unsigned int has_rapl = 0;
   4670	double tdp;
   4671
   4672	UNUSED(model);
   4673
   4674	if (max_extended_level >= 0x80000007) {
   4675		__cpuid(0x80000007, eax, ebx, ecx, edx);
   4676		/* RAPL (Fam 17h+) */
   4677		has_rapl = edx & (1 << 14);
   4678	}
   4679
   4680	if (!has_rapl || family < 0x17)
   4681		return;
   4682
   4683	do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
   4684	if (rapl_joules) {
   4685		BIC_PRESENT(BIC_Pkg_J);
   4686		BIC_PRESENT(BIC_Cor_J);
   4687	} else {
   4688		BIC_PRESENT(BIC_PkgWatt);
   4689		BIC_PRESENT(BIC_CorWatt);
   4690	}
   4691
   4692	if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
   4693		return;
   4694
   4695	rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
   4696	rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
   4697	rapl_power_units = ldexp(1.0, -(msr & 0xf));
   4698
   4699	tdp = get_tdp_amd(family);
   4700
   4701	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
   4702	if (!quiet)
   4703		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
   4704}
   4705
   4706/*
   4707 * rapl_probe()
   4708 *
   4709 * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
   4710 */
   4711void rapl_probe(unsigned int family, unsigned int model)
   4712{
   4713	if (genuine_intel)
   4714		rapl_probe_intel(family, model);
   4715	if (authentic_amd || hygon_genuine)
   4716		rapl_probe_amd(family, model);
   4717}
   4718
   4719void perf_limit_reasons_probe(unsigned int family, unsigned int model)
   4720{
   4721	if (!genuine_intel)
   4722		return;
   4723
   4724	if (family != 6)
   4725		return;
   4726
   4727	switch (model) {
   4728	case INTEL_FAM6_HASWELL:	/* HSW */
   4729	case INTEL_FAM6_HASWELL_L:	/* HSW */
   4730	case INTEL_FAM6_HASWELL_G:	/* HSW */
   4731		do_gfx_perf_limit_reasons = 1;
   4732		/* FALLTHRU */
   4733	case INTEL_FAM6_HASWELL_X:	/* HSX */
   4734		do_core_perf_limit_reasons = 1;
   4735		do_ring_perf_limit_reasons = 1;
   4736	default:
   4737		return;
   4738	}
   4739}
   4740
   4741void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
   4742{
   4743	if (is_skx(family, model) || is_bdx(family, model) || is_icx(family, model))
   4744		has_automatic_cstate_conversion = 1;
   4745}
   4746
   4747void prewake_cstate_probe(unsigned int family, unsigned int model)
   4748{
   4749	if (is_icx(family, model))
   4750		dis_cstate_prewake = 1;
   4751}
   4752
   4753int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   4754{
   4755	unsigned long long msr;
   4756	unsigned int dts, dts2;
   4757	int cpu;
   4758
   4759	UNUSED(c);
   4760	UNUSED(p);
   4761
   4762	if (!(do_dts || do_ptm))
   4763		return 0;
   4764
   4765	cpu = t->cpu_id;
   4766
   4767	/* DTS is per-core, no need to print for each thread */
   4768	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
   4769		return 0;
   4770
   4771	if (cpu_migrate(cpu)) {
   4772		fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
   4773		return -1;
   4774	}
   4775
   4776	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
   4777		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
   4778			return 0;
   4779
   4780		dts = (msr >> 16) & 0x7F;
   4781		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
   4782
   4783		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
   4784			return 0;
   4785
   4786		dts = (msr >> 16) & 0x7F;
   4787		dts2 = (msr >> 8) & 0x7F;
   4788		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
   4789			cpu, msr, tj_max - dts, tj_max - dts2);
   4790	}
   4791
   4792	if (do_dts && debug) {
   4793		unsigned int resolution;
   4794
   4795		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
   4796			return 0;
   4797
   4798		dts = (msr >> 16) & 0x7F;
   4799		resolution = (msr >> 27) & 0xF;
   4800		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
   4801			cpu, msr, tj_max - dts, resolution);
   4802
   4803		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
   4804			return 0;
   4805
   4806		dts = (msr >> 16) & 0x7F;
   4807		dts2 = (msr >> 8) & 0x7F;
   4808		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
   4809			cpu, msr, tj_max - dts, tj_max - dts2);
   4810	}
   4811
   4812	return 0;
   4813}
   4814
   4815void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
   4816{
   4817	fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
   4818		cpu, label,
   4819		((msr >> 15) & 1) ? "EN" : "DIS",
   4820		((msr >> 0) & 0x7FFF) * rapl_power_units,
   4821		(1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
   4822		(((msr >> 16) & 1) ? "EN" : "DIS"));
   4823
   4824	return;
   4825}
   4826
   4827int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   4828{
   4829	unsigned long long msr;
   4830	const char *msr_name;
   4831	int cpu;
   4832
   4833	UNUSED(c);
   4834	UNUSED(p);
   4835
   4836	if (!do_rapl)
   4837		return 0;
   4838
   4839	/* RAPL counters are per package, so print only for 1st thread/package */
   4840	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   4841		return 0;
   4842
   4843	cpu = t->cpu_id;
   4844	if (cpu_migrate(cpu)) {
   4845		fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
   4846		return -1;
   4847	}
   4848
   4849	if (do_rapl & RAPL_AMD_F17H) {
   4850		msr_name = "MSR_RAPL_PWR_UNIT";
   4851		if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
   4852			return -1;
   4853	} else {
   4854		msr_name = "MSR_RAPL_POWER_UNIT";
   4855		if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
   4856			return -1;
   4857	}
   4858
   4859	fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
   4860		rapl_power_units, rapl_energy_units, rapl_time_units);
   4861
   4862	if (do_rapl & RAPL_PKG_POWER_INFO) {
   4863
   4864		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
   4865			return -5;
   4866
   4867		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
   4868			cpu, msr,
   4869			((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
   4870			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
   4871			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
   4872			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
   4873
   4874	}
   4875	if (do_rapl & RAPL_PKG) {
   4876
   4877		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
   4878			return -9;
   4879
   4880		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
   4881			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
   4882
   4883		print_power_limit_msr(cpu, msr, "PKG Limit #1");
   4884		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
   4885			cpu,
   4886			((msr >> 47) & 1) ? "EN" : "DIS",
   4887			((msr >> 32) & 0x7FFF) * rapl_power_units,
   4888			(1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
   4889			((msr >> 48) & 1) ? "EN" : "DIS");
   4890
   4891		if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
   4892			return -9;
   4893
   4894		fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
   4895		fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
   4896			cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
   4897	}
   4898
   4899	if (do_rapl & RAPL_DRAM_POWER_INFO) {
   4900		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
   4901			return -6;
   4902
   4903		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
   4904			cpu, msr,
   4905			((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
   4906			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
   4907			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
   4908			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
   4909	}
   4910	if (do_rapl & RAPL_DRAM) {
   4911		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
   4912			return -9;
   4913		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
   4914			cpu, msr, (msr >> 31) & 1 ? "" : "UN");
   4915
   4916		print_power_limit_msr(cpu, msr, "DRAM Limit");
   4917	}
   4918	if (do_rapl & RAPL_CORE_POLICY) {
   4919		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
   4920			return -7;
   4921
   4922		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
   4923	}
   4924	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
   4925		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
   4926			return -9;
   4927		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
   4928			cpu, msr, (msr >> 31) & 1 ? "" : "UN");
   4929		print_power_limit_msr(cpu, msr, "Cores Limit");
   4930	}
   4931	if (do_rapl & RAPL_GFX) {
   4932		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
   4933			return -8;
   4934
   4935		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
   4936
   4937		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
   4938			return -9;
   4939		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
   4940			cpu, msr, (msr >> 31) & 1 ? "" : "UN");
   4941		print_power_limit_msr(cpu, msr, "GFX Limit");
   4942	}
   4943	return 0;
   4944}
   4945
   4946/*
   4947 * SNB adds support for additional MSRs:
   4948 *
   4949 * MSR_PKG_C7_RESIDENCY            0x000003fa
   4950 * MSR_CORE_C7_RESIDENCY           0x000003fe
   4951 * MSR_PKG_C2_RESIDENCY            0x0000060d
   4952 */
   4953
   4954int has_snb_msrs(unsigned int family, unsigned int model)
   4955{
   4956	if (!genuine_intel)
   4957		return 0;
   4958
   4959	if (family != 6)
   4960		return 0;
   4961
   4962	switch (model) {
   4963	case INTEL_FAM6_SANDYBRIDGE:
   4964	case INTEL_FAM6_SANDYBRIDGE_X:
   4965	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
   4966	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
   4967	case INTEL_FAM6_HASWELL:	/* HSW */
   4968	case INTEL_FAM6_HASWELL_X:	/* HSW */
   4969	case INTEL_FAM6_HASWELL_L:	/* HSW */
   4970	case INTEL_FAM6_HASWELL_G:	/* HSW */
   4971	case INTEL_FAM6_BROADWELL:	/* BDW */
   4972	case INTEL_FAM6_BROADWELL_G:	/* BDW */
   4973	case INTEL_FAM6_BROADWELL_X:	/* BDX */
   4974	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
   4975	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
   4976	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
   4977	case INTEL_FAM6_ICELAKE_X:	/* ICX */
   4978	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
   4979	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
   4980	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
   4981	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
   4982	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
   4983		return 1;
   4984	}
   4985	return 0;
   4986}
   4987
   4988/*
   4989 * HSW ULT added support for C8/C9/C10 MSRs:
   4990 *
   4991 * MSR_PKG_C8_RESIDENCY		0x00000630
   4992 * MSR_PKG_C9_RESIDENCY		0x00000631
   4993 * MSR_PKG_C10_RESIDENCY	0x00000632
   4994 *
   4995 * MSR_PKGC8_IRTL		0x00000633
   4996 * MSR_PKGC9_IRTL		0x00000634
   4997 * MSR_PKGC10_IRTL		0x00000635
   4998 *
   4999 */
   5000int has_c8910_msrs(unsigned int family, unsigned int model)
   5001{
   5002	if (!genuine_intel)
   5003		return 0;
   5004
   5005	if (family != 6)
   5006		return 0;
   5007
   5008	switch (model) {
   5009	case INTEL_FAM6_HASWELL_L:	/* HSW */
   5010	case INTEL_FAM6_BROADWELL:	/* BDW */
   5011	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
   5012	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
   5013	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
   5014	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
   5015	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
   5016		return 1;
   5017	}
   5018	return 0;
   5019}
   5020
   5021/*
   5022 * SKL adds support for additional MSRS:
   5023 *
   5024 * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
   5025 * MSR_PKG_ANY_CORE_C0_RES         0x00000659
   5026 * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
   5027 * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
   5028 */
   5029int has_skl_msrs(unsigned int family, unsigned int model)
   5030{
   5031	if (!genuine_intel)
   5032		return 0;
   5033
   5034	if (family != 6)
   5035		return 0;
   5036
   5037	switch (model) {
   5038	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
   5039	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
   5040		return 1;
   5041	}
   5042	return 0;
   5043}
   5044
   5045int is_slm(unsigned int family, unsigned int model)
   5046{
   5047	if (!genuine_intel)
   5048		return 0;
   5049
   5050	if (family != 6)
   5051		return 0;
   5052
   5053	switch (model) {
   5054	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
   5055	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
   5056		return 1;
   5057	}
   5058	return 0;
   5059}
   5060
   5061int is_knl(unsigned int family, unsigned int model)
   5062{
   5063	if (!genuine_intel)
   5064		return 0;
   5065
   5066	if (family != 6)
   5067		return 0;
   5068
   5069	switch (model) {
   5070	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
   5071		return 1;
   5072	}
   5073	return 0;
   5074}
   5075
   5076int is_cnl(unsigned int family, unsigned int model)
   5077{
   5078	if (!genuine_intel)
   5079		return 0;
   5080
   5081	if (family != 6)
   5082		return 0;
   5083
   5084	switch (model) {
   5085	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
   5086		return 1;
   5087	}
   5088
   5089	return 0;
   5090}
   5091
   5092unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
   5093{
   5094	if (is_knl(family, model))
   5095		return 1024;
   5096	return 1;
   5097}
   5098
   5099#define SLM_BCLK_FREQS 5
   5100double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
   5101
   5102double slm_bclk(void)
   5103{
   5104	unsigned long long msr = 3;
   5105	unsigned int i;
   5106	double freq;
   5107
   5108	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
   5109		fprintf(outf, "SLM BCLK: unknown\n");
   5110
   5111	i = msr & 0xf;
   5112	if (i >= SLM_BCLK_FREQS) {
   5113		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
   5114		i = 3;
   5115	}
   5116	freq = slm_freq_table[i];
   5117
   5118	if (!quiet)
   5119		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
   5120
   5121	return freq;
   5122}
   5123
   5124double discover_bclk(unsigned int family, unsigned int model)
   5125{
   5126	if (has_snb_msrs(family, model) || is_knl(family, model))
   5127		return 100.00;
   5128	else if (is_slm(family, model))
   5129		return slm_bclk();
   5130	else
   5131		return 133.33;
   5132}
   5133
   5134int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   5135{
   5136	unsigned int eax, ebx, ecx, edx;
   5137
   5138	UNUSED(c);
   5139	UNUSED(p);
   5140
   5141	if (!genuine_intel)
   5142		return 0;
   5143
   5144	if (cpu_migrate(t->cpu_id)) {
   5145		fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
   5146		return -1;
   5147	}
   5148
   5149	if (max_level < 0x1a)
   5150		return 0;
   5151
   5152	__cpuid(0x1a, eax, ebx, ecx, edx);
   5153	eax = (eax >> 24) & 0xFF;
   5154	if (eax == 0x20)
   5155		t->is_atom = true;
   5156	return 0;
   5157}
   5158
   5159/*
   5160 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
   5161 * the Thermal Control Circuit (TCC) activates.
   5162 * This is usually equal to tjMax.
   5163 *
   5164 * Older processors do not have this MSR, so there we guess,
   5165 * but also allow cmdline over-ride with -T.
   5166 *
   5167 * Several MSR temperature values are in units of degrees-C
   5168 * below this value, including the Digital Thermal Sensor (DTS),
   5169 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
   5170 */
   5171int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
   5172{
   5173	unsigned long long msr;
   5174	unsigned int tcc_default, tcc_offset;
   5175	int cpu;
   5176
   5177	UNUSED(c);
   5178	UNUSED(p);
   5179
   5180	/* tj_max is used only for dts or ptm */
   5181	if (!(do_dts || do_ptm))
   5182		return 0;
   5183
   5184	/* this is a per-package concept */
   5185	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
   5186		return 0;
   5187
   5188	cpu = t->cpu_id;
   5189	if (cpu_migrate(cpu)) {
   5190		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
   5191		return -1;
   5192	}
   5193
   5194	if (tj_max_override != 0) {
   5195		tj_max = tj_max_override;
   5196		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
   5197		return 0;
   5198	}
   5199
   5200	/* Temperature Target MSR is Nehalem and newer only */
   5201	if (!do_nhm_platform_info)
   5202		goto guess;
   5203
   5204	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
   5205		goto guess;
   5206
   5207	tcc_default = (msr >> 16) & 0xFF;
   5208
   5209	if (!quiet) {
   5210		switch (tcc_offset_bits) {
   5211		case 4:
   5212			tcc_offset = (msr >> 24) & 0xF;
   5213			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
   5214				cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
   5215			break;
   5216		case 6:
   5217			tcc_offset = (msr >> 24) & 0x3F;
   5218			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
   5219				cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
   5220			break;
   5221		default:
   5222			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
   5223			break;
   5224		}
   5225	}
   5226
   5227	if (!tcc_default)
   5228		goto guess;
   5229
   5230	tj_max = tcc_default;
   5231
   5232	return 0;
   5233
   5234guess:
   5235	tj_max = TJMAX_DEFAULT;
   5236	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
   5237
   5238	return 0;
   5239}
   5240
   5241void decode_feature_control_msr(void)
   5242{
   5243	unsigned long long msr;
   5244
   5245	if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
   5246		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
   5247			base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
   5248}
   5249
   5250void decode_misc_enable_msr(void)
   5251{
   5252	unsigned long long msr;
   5253
   5254	if (!genuine_intel)
   5255		return;
   5256
   5257	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
   5258		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
   5259			base_cpu, msr,
   5260			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
   5261			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
   5262			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
   5263			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
   5264			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
   5265}
   5266
   5267void decode_misc_feature_control(void)
   5268{
   5269	unsigned long long msr;
   5270
   5271	if (!has_misc_feature_control)
   5272		return;
   5273
   5274	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
   5275		fprintf(outf,
   5276			"cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
   5277			base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "",
   5278			msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
   5279}
   5280
   5281/*
   5282 * Decode MSR_MISC_PWR_MGMT
   5283 *
   5284 * Decode the bits according to the Nehalem documentation
   5285 * bit[0] seems to continue to have same meaning going forward
   5286 * bit[1] less so...
   5287 */
   5288void decode_misc_pwr_mgmt_msr(void)
   5289{
   5290	unsigned long long msr;
   5291
   5292	if (!do_nhm_platform_info)
   5293		return;
   5294
   5295	if (no_MSR_MISC_PWR_MGMT)
   5296		return;
   5297
   5298	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
   5299		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
   5300			base_cpu, msr,
   5301			msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
   5302}
   5303
   5304/*
   5305 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
   5306 *
   5307 * This MSRs are present on Silvermont processors,
   5308 * Intel Atom processor E3000 series (Baytrail), and friends.
   5309 */
   5310void decode_c6_demotion_policy_msr(void)
   5311{
   5312	unsigned long long msr;
   5313
   5314	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
   5315		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
   5316			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
   5317
   5318	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
   5319		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
   5320			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
   5321}
   5322
   5323/*
   5324 * When models are the same, for the purpose of turbostat, reuse
   5325 */
   5326unsigned int intel_model_duplicates(unsigned int model)
   5327{
   5328
   5329	switch (model) {
   5330	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
   5331	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
   5332	case 0x1F:		/* Core i7 and i5 Processor - Nehalem */
   5333	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
   5334	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
   5335		return INTEL_FAM6_NEHALEM;
   5336
   5337	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
   5338	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
   5339		return INTEL_FAM6_NEHALEM_EX;
   5340
   5341	case INTEL_FAM6_XEON_PHI_KNM:
   5342		return INTEL_FAM6_XEON_PHI_KNL;
   5343
   5344	case INTEL_FAM6_BROADWELL_X:
   5345	case INTEL_FAM6_BROADWELL_D:	/* BDX-DE */
   5346		return INTEL_FAM6_BROADWELL_X;
   5347
   5348	case INTEL_FAM6_SKYLAKE_L:
   5349	case INTEL_FAM6_SKYLAKE:
   5350	case INTEL_FAM6_KABYLAKE_L:
   5351	case INTEL_FAM6_KABYLAKE:
   5352	case INTEL_FAM6_COMETLAKE_L:
   5353	case INTEL_FAM6_COMETLAKE:
   5354		return INTEL_FAM6_SKYLAKE_L;
   5355
   5356	case INTEL_FAM6_ICELAKE_L:
   5357	case INTEL_FAM6_ICELAKE_NNPI:
   5358	case INTEL_FAM6_TIGERLAKE_L:
   5359	case INTEL_FAM6_TIGERLAKE:
   5360	case INTEL_FAM6_ROCKETLAKE:
   5361	case INTEL_FAM6_LAKEFIELD:
   5362	case INTEL_FAM6_ALDERLAKE:
   5363	case INTEL_FAM6_ALDERLAKE_L:
   5364		return INTEL_FAM6_CANNONLAKE_L;
   5365
   5366	case INTEL_FAM6_ATOM_TREMONT_L:
   5367		return INTEL_FAM6_ATOM_TREMONT;
   5368
   5369	case INTEL_FAM6_ICELAKE_D:
   5370	case INTEL_FAM6_SAPPHIRERAPIDS_X:
   5371		return INTEL_FAM6_ICELAKE_X;
   5372	}
   5373	return model;
   5374}
   5375
   5376void print_dev_latency(void)
   5377{
   5378	char *path = "/dev/cpu_dma_latency";
   5379	int fd;
   5380	int value;
   5381	int retval;
   5382
   5383	fd = open(path, O_RDONLY);
   5384	if (fd < 0) {
   5385		warn("fopen %s\n", path);
   5386		return;
   5387	}
   5388
   5389	retval = read(fd, (void *)&value, sizeof(int));
   5390	if (retval != sizeof(int)) {
   5391		warn("read %s\n", path);
   5392		close(fd);
   5393		return;
   5394	}
   5395	fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
   5396
   5397	close(fd);
   5398}
   5399
   5400/*
   5401 * Linux-perf manages the the HW instructions-retired counter
   5402 * by enabling when requested, and hiding rollover
   5403 */
   5404void linux_perf_init(void)
   5405{
   5406	if (!BIC_IS_ENABLED(BIC_IPC))
   5407		return;
   5408
   5409	if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
   5410		return;
   5411
   5412	fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
   5413	if (fd_instr_count_percpu == NULL)
   5414		err(-1, "calloc fd_instr_count_percpu");
   5415
   5416	BIC_PRESENT(BIC_IPC);
   5417}
   5418
   5419void process_cpuid()
   5420{
   5421	unsigned int eax, ebx, ecx, edx;
   5422	unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
   5423	unsigned int has_turbo;
   5424	unsigned long long ucode_patch = 0;
   5425
   5426	eax = ebx = ecx = edx = 0;
   5427
   5428	__cpuid(0, max_level, ebx, ecx, edx);
   5429
   5430	if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
   5431		genuine_intel = 1;
   5432	else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
   5433		authentic_amd = 1;
   5434	else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
   5435		hygon_genuine = 1;
   5436
   5437	if (!quiet)
   5438		fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
   5439			(char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
   5440
   5441	__cpuid(1, fms, ebx, ecx, edx);
   5442	family = (fms >> 8) & 0xf;
   5443	model = (fms >> 4) & 0xf;
   5444	stepping = fms & 0xf;
   5445	if (family == 0xf)
   5446		family += (fms >> 20) & 0xff;
   5447	if (family >= 6)
   5448		model += ((fms >> 16) & 0xf) << 4;
   5449	ecx_flags = ecx;
   5450	edx_flags = edx;
   5451
   5452	if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
   5453		warnx("get_msr(UCODE)\n");
   5454
   5455	/*
   5456	 * check max extended function levels of CPUID.
   5457	 * This is needed to check for invariant TSC.
   5458	 * This check is valid for both Intel and AMD.
   5459	 */
   5460	ebx = ecx = edx = 0;
   5461	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
   5462
   5463	if (!quiet) {
   5464		fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
   5465			family, model, stepping, family, model, stepping,
   5466			(unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
   5467		fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
   5468		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
   5469			ecx_flags & (1 << 0) ? "SSE3" : "-",
   5470			ecx_flags & (1 << 3) ? "MONITOR" : "-",
   5471			ecx_flags & (1 << 6) ? "SMX" : "-",
   5472			ecx_flags & (1 << 7) ? "EIST" : "-",
   5473			ecx_flags & (1 << 8) ? "TM2" : "-",
   5474			edx_flags & (1 << 4) ? "TSC" : "-",
   5475			edx_flags & (1 << 5) ? "MSR" : "-",
   5476			edx_flags & (1 << 22) ? "ACPI-TM" : "-",
   5477			edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
   5478	}
   5479	if (genuine_intel) {
   5480		model_orig = model;
   5481		model = intel_model_duplicates(model);
   5482	}
   5483
   5484	if (!(edx_flags & (1 << 5)))
   5485		errx(1, "CPUID: no MSR");
   5486
   5487	if (max_extended_level >= 0x80000007) {
   5488
   5489		/*
   5490		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
   5491		 * this check is valid for both Intel and AMD
   5492		 */
   5493		__cpuid(0x80000007, eax, ebx, ecx, edx);
   5494		has_invariant_tsc = edx & (1 << 8);
   5495	}
   5496
   5497	/*
   5498	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
   5499	 * this check is valid for both Intel and AMD
   5500	 */
   5501
   5502	__cpuid(0x6, eax, ebx, ecx, edx);
   5503	has_aperf = ecx & (1 << 0);
   5504	if (has_aperf) {
   5505		BIC_PRESENT(BIC_Avg_MHz);
   5506		BIC_PRESENT(BIC_Busy);
   5507		BIC_PRESENT(BIC_Bzy_MHz);
   5508	}
   5509	do_dts = eax & (1 << 0);
   5510	if (do_dts)
   5511		BIC_PRESENT(BIC_CoreTmp);
   5512	has_turbo = eax & (1 << 1);
   5513	do_ptm = eax & (1 << 6);
   5514	if (do_ptm)
   5515		BIC_PRESENT(BIC_PkgTmp);
   5516	has_hwp = eax & (1 << 7);
   5517	has_hwp_notify = eax & (1 << 8);
   5518	has_hwp_activity_window = eax & (1 << 9);
   5519	has_hwp_epp = eax & (1 << 10);
   5520	has_hwp_pkg = eax & (1 << 11);
   5521	has_epb = ecx & (1 << 3);
   5522
   5523	if (!quiet)
   5524		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
   5525			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
   5526			has_aperf ? "" : "No-",
   5527			has_turbo ? "" : "No-",
   5528			do_dts ? "" : "No-",
   5529			do_ptm ? "" : "No-",
   5530			has_hwp ? "" : "No-",
   5531			has_hwp_notify ? "" : "No-",
   5532			has_hwp_activity_window ? "" : "No-",
   5533			has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
   5534
   5535	if (!quiet)
   5536		decode_misc_enable_msr();
   5537
   5538	if (max_level >= 0x7 && !quiet) {
   5539		int has_sgx;
   5540
   5541		ecx = 0;
   5542
   5543		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
   5544
   5545		has_sgx = ebx & (1 << 2);
   5546		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
   5547
   5548		if (has_sgx)
   5549			decode_feature_control_msr();
   5550	}
   5551
   5552	if (max_level >= 0x15) {
   5553		unsigned int eax_crystal;
   5554		unsigned int ebx_tsc;
   5555
   5556		/*
   5557		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
   5558		 */
   5559		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
   5560		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
   5561
   5562		if (ebx_tsc != 0) {
   5563
   5564			if (!quiet && (ebx != 0))
   5565				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
   5566					eax_crystal, ebx_tsc, crystal_hz);
   5567
   5568			if (crystal_hz == 0)
   5569				switch (model) {
   5570				case INTEL_FAM6_SKYLAKE_L:	/* SKL */
   5571					crystal_hz = 24000000;	/* 24.0 MHz */
   5572					break;
   5573				case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
   5574					crystal_hz = 25000000;	/* 25.0 MHz */
   5575					break;
   5576				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
   5577				case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
   5578					crystal_hz = 19200000;	/* 19.2 MHz */
   5579					break;
   5580				default:
   5581					crystal_hz = 0;
   5582				}
   5583
   5584			if (crystal_hz) {
   5585				tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
   5586				if (!quiet)
   5587					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
   5588						tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
   5589			}
   5590		}
   5591	}
   5592	if (max_level >= 0x16) {
   5593		unsigned int base_mhz, max_mhz, bus_mhz, edx;
   5594
   5595		/*
   5596		 * CPUID 16H Base MHz, Max MHz, Bus MHz
   5597		 */
   5598		base_mhz = max_mhz = bus_mhz = edx = 0;
   5599
   5600		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
   5601		if (!quiet)
   5602			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
   5603				base_mhz, max_mhz, bus_mhz);
   5604	}
   5605
   5606	if (has_aperf)
   5607		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
   5608
   5609	BIC_PRESENT(BIC_IRQ);
   5610	BIC_PRESENT(BIC_TSC_MHz);
   5611
   5612	if (probe_nhm_msrs(family, model)) {
   5613		do_nhm_platform_info = 1;
   5614		BIC_PRESENT(BIC_CPU_c1);
   5615		BIC_PRESENT(BIC_CPU_c3);
   5616		BIC_PRESENT(BIC_CPU_c6);
   5617		BIC_PRESENT(BIC_SMI);
   5618	}
   5619	do_snb_cstates = has_snb_msrs(family, model);
   5620
   5621	if (do_snb_cstates)
   5622		BIC_PRESENT(BIC_CPU_c7);
   5623
   5624	do_irtl_snb = has_snb_msrs(family, model);
   5625	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
   5626		BIC_PRESENT(BIC_Pkgpc2);
   5627	if (pkg_cstate_limit >= PCL__3)
   5628		BIC_PRESENT(BIC_Pkgpc3);
   5629	if (pkg_cstate_limit >= PCL__6)
   5630		BIC_PRESENT(BIC_Pkgpc6);
   5631	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
   5632		BIC_PRESENT(BIC_Pkgpc7);
   5633	if (has_slv_msrs(family, model)) {
   5634		BIC_NOT_PRESENT(BIC_Pkgpc2);
   5635		BIC_NOT_PRESENT(BIC_Pkgpc3);
   5636		BIC_PRESENT(BIC_Pkgpc6);
   5637		BIC_NOT_PRESENT(BIC_Pkgpc7);
   5638		BIC_PRESENT(BIC_Mod_c6);
   5639		use_c1_residency_msr = 1;
   5640	}
   5641	if (is_jvl(family, model)) {
   5642		BIC_NOT_PRESENT(BIC_CPU_c3);
   5643		BIC_NOT_PRESENT(BIC_CPU_c7);
   5644		BIC_NOT_PRESENT(BIC_Pkgpc2);
   5645		BIC_NOT_PRESENT(BIC_Pkgpc3);
   5646		BIC_NOT_PRESENT(BIC_Pkgpc6);
   5647		BIC_NOT_PRESENT(BIC_Pkgpc7);
   5648	}
   5649	if (is_dnv(family, model)) {
   5650		BIC_PRESENT(BIC_CPU_c1);
   5651		BIC_NOT_PRESENT(BIC_CPU_c3);
   5652		BIC_NOT_PRESENT(BIC_Pkgpc3);
   5653		BIC_NOT_PRESENT(BIC_CPU_c7);
   5654		BIC_NOT_PRESENT(BIC_Pkgpc7);
   5655		use_c1_residency_msr = 1;
   5656	}
   5657	if (is_skx(family, model) || is_icx(family, model)) {
   5658		BIC_NOT_PRESENT(BIC_CPU_c3);
   5659		BIC_NOT_PRESENT(BIC_Pkgpc3);
   5660		BIC_NOT_PRESENT(BIC_CPU_c7);
   5661		BIC_NOT_PRESENT(BIC_Pkgpc7);
   5662	}
   5663	if (is_bdx(family, model)) {
   5664		BIC_NOT_PRESENT(BIC_CPU_c7);
   5665		BIC_NOT_PRESENT(BIC_Pkgpc7);
   5666	}
   5667	if (has_c8910_msrs(family, model)) {
   5668		if (pkg_cstate_limit >= PCL__8)
   5669			BIC_PRESENT(BIC_Pkgpc8);
   5670		if (pkg_cstate_limit >= PCL__9)
   5671			BIC_PRESENT(BIC_Pkgpc9);
   5672		if (pkg_cstate_limit >= PCL_10)
   5673			BIC_PRESENT(BIC_Pkgpc10);
   5674	}
   5675	do_irtl_hsw = has_c8910_msrs(family, model);
   5676	if (has_skl_msrs(family, model)) {
   5677		BIC_PRESENT(BIC_Totl_c0);
   5678		BIC_PRESENT(BIC_Any_c0);
   5679		BIC_PRESENT(BIC_GFX_c0);
   5680		BIC_PRESENT(BIC_CPUGFX);
   5681	}
   5682	do_slm_cstates = is_slm(family, model);
   5683	do_knl_cstates = is_knl(family, model);
   5684
   5685	if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
   5686		BIC_NOT_PRESENT(BIC_CPU_c3);
   5687
   5688	if (!quiet)
   5689		decode_misc_pwr_mgmt_msr();
   5690
   5691	if (!quiet && has_slv_msrs(family, model))
   5692		decode_c6_demotion_policy_msr();
   5693
   5694	rapl_probe(family, model);
   5695	perf_limit_reasons_probe(family, model);
   5696	automatic_cstate_conversion_probe(family, model);
   5697
   5698	check_tcc_offset(model_orig);
   5699
   5700	if (!quiet)
   5701		dump_cstate_pstate_config_info(family, model);
   5702
   5703	if (!quiet)
   5704		print_dev_latency();
   5705	if (!quiet)
   5706		dump_sysfs_cstate_config();
   5707	if (!quiet)
   5708		dump_sysfs_pstate_config();
   5709
   5710	if (has_skl_msrs(family, model) || is_ehl(family, model))
   5711		calculate_tsc_tweak();
   5712
   5713	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
   5714		BIC_PRESENT(BIC_GFX_rc6);
   5715
   5716	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
   5717		BIC_PRESENT(BIC_GFXMHz);
   5718
   5719	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
   5720		BIC_PRESENT(BIC_GFXACTMHz);
   5721
   5722	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
   5723		BIC_PRESENT(BIC_CPU_LPI);
   5724	else
   5725		BIC_NOT_PRESENT(BIC_CPU_LPI);
   5726
   5727	if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
   5728		BIC_PRESENT(BIC_CORE_THROT_CNT);
   5729	else
   5730		BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
   5731
   5732	if (!access(sys_lpi_file_sysfs, R_OK)) {
   5733		sys_lpi_file = sys_lpi_file_sysfs;
   5734		BIC_PRESENT(BIC_SYS_LPI);
   5735	} else if (!access(sys_lpi_file_debugfs, R_OK)) {
   5736		sys_lpi_file = sys_lpi_file_debugfs;
   5737		BIC_PRESENT(BIC_SYS_LPI);
   5738	} else {
   5739		sys_lpi_file_sysfs = NULL;
   5740		BIC_NOT_PRESENT(BIC_SYS_LPI);
   5741	}
   5742
   5743	if (!quiet)
   5744		decode_misc_feature_control();
   5745
   5746	return;
   5747}
   5748
   5749/*
   5750 * in /dev/cpu/ return success for names that are numbers
   5751 * ie. filter out ".", "..", "microcode".
   5752 */
   5753int dir_filter(const struct dirent *dirp)
   5754{
   5755	if (isdigit(dirp->d_name[0]))
   5756		return 1;
   5757	else
   5758		return 0;
   5759}
   5760
   5761void topology_probe()
   5762{
   5763	int i;
   5764	int max_core_id = 0;
   5765	int max_package_id = 0;
   5766	int max_die_id = 0;
   5767	int max_siblings = 0;
   5768
   5769	/* Initialize num_cpus, max_cpu_num */
   5770	set_max_cpu_num();
   5771	topo.num_cpus = 0;
   5772	for_all_proc_cpus(count_cpus);
   5773	if (!summary_only && topo.num_cpus > 1)
   5774		BIC_PRESENT(BIC_CPU);
   5775
   5776	if (debug > 1)
   5777		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
   5778
   5779	cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
   5780	if (cpus == NULL)
   5781		err(1, "calloc cpus");
   5782
   5783	/*
   5784	 * Allocate and initialize cpu_present_set
   5785	 */
   5786	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
   5787	if (cpu_present_set == NULL)
   5788		err(3, "CPU_ALLOC");
   5789	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
   5790	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
   5791	for_all_proc_cpus(mark_cpu_present);
   5792
   5793	/*
   5794	 * Validate that all cpus in cpu_subset are also in cpu_present_set
   5795	 */
   5796	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
   5797		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
   5798			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
   5799				err(1, "cpu%d not present", i);
   5800	}
   5801
   5802	/*
   5803	 * Allocate and initialize cpu_affinity_set
   5804	 */
   5805	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
   5806	if (cpu_affinity_set == NULL)
   5807		err(3, "CPU_ALLOC");
   5808	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
   5809	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
   5810
   5811	for_all_proc_cpus(init_thread_id);
   5812
   5813	/*
   5814	 * For online cpus
   5815	 * find max_core_id, max_package_id
   5816	 */
   5817	for (i = 0; i <= topo.max_cpu_num; ++i) {
   5818		int siblings;
   5819
   5820		if (cpu_is_not_present(i)) {
   5821			if (debug > 1)
   5822				fprintf(outf, "cpu%d NOT PRESENT\n", i);
   5823			continue;
   5824		}
   5825
   5826		cpus[i].logical_cpu_id = i;
   5827
   5828		/* get package information */
   5829		cpus[i].physical_package_id = get_physical_package_id(i);
   5830		if (cpus[i].physical_package_id > max_package_id)
   5831			max_package_id = cpus[i].physical_package_id;
   5832
   5833		/* get die information */
   5834		cpus[i].die_id = get_die_id(i);
   5835		if (cpus[i].die_id > max_die_id)
   5836			max_die_id = cpus[i].die_id;
   5837
   5838		/* get numa node information */
   5839		cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
   5840		if (cpus[i].physical_node_id > topo.max_node_num)
   5841			topo.max_node_num = cpus[i].physical_node_id;
   5842
   5843		/* get core information */
   5844		cpus[i].physical_core_id = get_core_id(i);
   5845		if (cpus[i].physical_core_id > max_core_id)
   5846			max_core_id = cpus[i].physical_core_id;
   5847
   5848		/* get thread information */
   5849		siblings = get_thread_siblings(&cpus[i]);
   5850		if (siblings > max_siblings)
   5851			max_siblings = siblings;
   5852		if (cpus[i].thread_id == 0)
   5853			topo.num_cores++;
   5854	}
   5855
   5856	topo.cores_per_node = max_core_id + 1;
   5857	if (debug > 1)
   5858		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
   5859	if (!summary_only && topo.cores_per_node > 1)
   5860		BIC_PRESENT(BIC_Core);
   5861
   5862	topo.num_die = max_die_id + 1;
   5863	if (debug > 1)
   5864		fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die);
   5865	if (!summary_only && topo.num_die > 1)
   5866		BIC_PRESENT(BIC_Die);
   5867
   5868	topo.num_packages = max_package_id + 1;
   5869	if (debug > 1)
   5870		fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
   5871	if (!summary_only && topo.num_packages > 1)
   5872		BIC_PRESENT(BIC_Package);
   5873
   5874	set_node_data();
   5875	if (debug > 1)
   5876		fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
   5877	if (!summary_only && topo.nodes_per_pkg > 1)
   5878		BIC_PRESENT(BIC_Node);
   5879
   5880	topo.threads_per_core = max_siblings;
   5881	if (debug > 1)
   5882		fprintf(outf, "max_siblings %d\n", max_siblings);
   5883
   5884	if (debug < 1)
   5885		return;
   5886
   5887	for (i = 0; i <= topo.max_cpu_num; ++i) {
   5888		if (cpu_is_not_present(i))
   5889			continue;
   5890		fprintf(outf,
   5891			"cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
   5892			i, cpus[i].physical_package_id, cpus[i].die_id,
   5893			cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id);
   5894	}
   5895
   5896}
   5897
   5898void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
   5899{
   5900	int i;
   5901	int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
   5902	int num_threads = topo.threads_per_core * num_cores;
   5903
   5904	*t = calloc(num_threads, sizeof(struct thread_data));
   5905	if (*t == NULL)
   5906		goto error;
   5907
   5908	for (i = 0; i < num_threads; i++)
   5909		(*t)[i].cpu_id = -1;
   5910
   5911	*c = calloc(num_cores, sizeof(struct core_data));
   5912	if (*c == NULL)
   5913		goto error;
   5914
   5915	for (i = 0; i < num_cores; i++)
   5916		(*c)[i].core_id = -1;
   5917
   5918	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
   5919	if (*p == NULL)
   5920		goto error;
   5921
   5922	for (i = 0; i < topo.num_packages; i++)
   5923		(*p)[i].package_id = i;
   5924
   5925	return;
   5926error:
   5927	err(1, "calloc counters");
   5928}
   5929
   5930/*
   5931 * init_counter()
   5932 *
   5933 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
   5934 */
   5935void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
   5936{
   5937	int pkg_id = cpus[cpu_id].physical_package_id;
   5938	int node_id = cpus[cpu_id].logical_node_id;
   5939	int core_id = cpus[cpu_id].physical_core_id;
   5940	int thread_id = cpus[cpu_id].thread_id;
   5941	struct thread_data *t;
   5942	struct core_data *c;
   5943	struct pkg_data *p;
   5944
   5945	/* Workaround for systems where physical_node_id==-1
   5946	 * and logical_node_id==(-1 - topo.num_cpus)
   5947	 */
   5948	if (node_id < 0)
   5949		node_id = 0;
   5950
   5951	t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
   5952	c = GET_CORE(core_base, core_id, node_id, pkg_id);
   5953	p = GET_PKG(pkg_base, pkg_id);
   5954
   5955	t->cpu_id = cpu_id;
   5956	if (thread_id == 0) {
   5957		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
   5958		if (cpu_is_first_core_in_package(cpu_id))
   5959			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
   5960	}
   5961
   5962	c->core_id = core_id;
   5963	p->package_id = pkg_id;
   5964}
   5965
   5966int initialize_counters(int cpu_id)
   5967{
   5968	init_counter(EVEN_COUNTERS, cpu_id);
   5969	init_counter(ODD_COUNTERS, cpu_id);
   5970	return 0;
   5971}
   5972
   5973void allocate_output_buffer()
   5974{
   5975	output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
   5976	outp = output_buffer;
   5977	if (outp == NULL)
   5978		err(-1, "calloc output buffer");
   5979}
   5980
   5981void allocate_fd_percpu(void)
   5982{
   5983	fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
   5984	if (fd_percpu == NULL)
   5985		err(-1, "calloc fd_percpu");
   5986}
   5987
   5988void allocate_irq_buffers(void)
   5989{
   5990	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
   5991	if (irq_column_2_cpu == NULL)
   5992		err(-1, "calloc %d", topo.num_cpus);
   5993
   5994	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
   5995	if (irqs_per_cpu == NULL)
   5996		err(-1, "calloc %d", topo.max_cpu_num + 1);
   5997}
   5998
   5999void setup_all_buffers(void)
   6000{
   6001	topology_probe();
   6002	allocate_irq_buffers();
   6003	allocate_fd_percpu();
   6004	allocate_counters(&thread_even, &core_even, &package_even);
   6005	allocate_counters(&thread_odd, &core_odd, &package_odd);
   6006	allocate_output_buffer();
   6007	for_all_proc_cpus(initialize_counters);
   6008}
   6009
   6010void set_base_cpu(void)
   6011{
   6012	base_cpu = sched_getcpu();
   6013	if (base_cpu < 0)
   6014		err(-ENODEV, "No valid cpus found");
   6015
   6016	if (debug > 1)
   6017		fprintf(outf, "base_cpu = %d\n", base_cpu);
   6018}
   6019
   6020void turbostat_init()
   6021{
   6022	setup_all_buffers();
   6023	set_base_cpu();
   6024	check_dev_msr();
   6025	check_permissions();
   6026	process_cpuid();
   6027	linux_perf_init();
   6028
   6029	if (!quiet)
   6030		for_all_cpus(print_hwp, ODD_COUNTERS);
   6031
   6032	if (!quiet)
   6033		for_all_cpus(print_epb, ODD_COUNTERS);
   6034
   6035	if (!quiet)
   6036		for_all_cpus(print_perf_limit, ODD_COUNTERS);
   6037
   6038	if (!quiet)
   6039		for_all_cpus(print_rapl, ODD_COUNTERS);
   6040
   6041	for_all_cpus(set_temperature_target, ODD_COUNTERS);
   6042
   6043	for_all_cpus(get_cpu_type, ODD_COUNTERS);
   6044	for_all_cpus(get_cpu_type, EVEN_COUNTERS);
   6045
   6046	if (!quiet)
   6047		for_all_cpus(print_thermal, ODD_COUNTERS);
   6048
   6049	if (!quiet && do_irtl_snb)
   6050		print_irtl();
   6051
   6052	if (DO_BIC(BIC_IPC))
   6053		(void)get_instr_count_fd(base_cpu);
   6054}
   6055
   6056int fork_it(char **argv)
   6057{
   6058	pid_t child_pid;
   6059	int status;
   6060
   6061	snapshot_proc_sysfs_files();
   6062	status = for_all_cpus(get_counters, EVEN_COUNTERS);
   6063	first_counter_read = 0;
   6064	if (status)
   6065		exit(status);
   6066	/* clear affinity side-effect of get_counters() */
   6067	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
   6068	gettimeofday(&tv_even, (struct timezone *)NULL);
   6069
   6070	child_pid = fork();
   6071	if (!child_pid) {
   6072		/* child */
   6073		execvp(argv[0], argv);
   6074		err(errno, "exec %s", argv[0]);
   6075	} else {
   6076
   6077		/* parent */
   6078		if (child_pid == -1)
   6079			err(1, "fork");
   6080
   6081		signal(SIGINT, SIG_IGN);
   6082		signal(SIGQUIT, SIG_IGN);
   6083		if (waitpid(child_pid, &status, 0) == -1)
   6084			err(status, "waitpid");
   6085
   6086		if (WIFEXITED(status))
   6087			status = WEXITSTATUS(status);
   6088	}
   6089	/*
   6090	 * n.b. fork_it() does not check for errors from for_all_cpus()
   6091	 * because re-starting is problematic when forking
   6092	 */
   6093	snapshot_proc_sysfs_files();
   6094	for_all_cpus(get_counters, ODD_COUNTERS);
   6095	gettimeofday(&tv_odd, (struct timezone *)NULL);
   6096	timersub(&tv_odd, &tv_even, &tv_delta);
   6097	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
   6098		fprintf(outf, "%s: Counter reset detected\n", progname);
   6099	else {
   6100		compute_average(EVEN_COUNTERS);
   6101		format_all_counters(EVEN_COUNTERS);
   6102	}
   6103
   6104	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
   6105
   6106	flush_output_stderr();
   6107
   6108	return status;
   6109}
   6110
   6111int get_and_dump_counters(void)
   6112{
   6113	int status;
   6114
   6115	snapshot_proc_sysfs_files();
   6116	status = for_all_cpus(get_counters, ODD_COUNTERS);
   6117	if (status)
   6118		return status;
   6119
   6120	status = for_all_cpus(dump_counters, ODD_COUNTERS);
   6121	if (status)
   6122		return status;
   6123
   6124	flush_output_stdout();
   6125
   6126	return status;
   6127}
   6128
   6129void print_version()
   6130{
   6131	fprintf(outf, "turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>\n");
   6132}
   6133
   6134int add_counter(unsigned int msr_num, char *path, char *name,
   6135		unsigned int width, enum counter_scope scope,
   6136		enum counter_type type, enum counter_format format, int flags)
   6137{
   6138	struct msr_counter *msrp;
   6139
   6140	msrp = calloc(1, sizeof(struct msr_counter));
   6141	if (msrp == NULL) {
   6142		perror("calloc");
   6143		exit(1);
   6144	}
   6145
   6146	msrp->msr_num = msr_num;
   6147	strncpy(msrp->name, name, NAME_BYTES - 1);
   6148	if (path)
   6149		strncpy(msrp->path, path, PATH_BYTES - 1);
   6150	msrp->width = width;
   6151	msrp->type = type;
   6152	msrp->format = format;
   6153	msrp->flags = flags;
   6154
   6155	switch (scope) {
   6156
   6157	case SCOPE_CPU:
   6158		msrp->next = sys.tp;
   6159		sys.tp = msrp;
   6160		sys.added_thread_counters++;
   6161		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
   6162			fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS);
   6163			exit(-1);
   6164		}
   6165		break;
   6166
   6167	case SCOPE_CORE:
   6168		msrp->next = sys.cp;
   6169		sys.cp = msrp;
   6170		sys.added_core_counters++;
   6171		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
   6172			fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS);
   6173			exit(-1);
   6174		}
   6175		break;
   6176
   6177	case SCOPE_PACKAGE:
   6178		msrp->next = sys.pp;
   6179		sys.pp = msrp;
   6180		sys.added_package_counters++;
   6181		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
   6182			fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS);
   6183			exit(-1);
   6184		}
   6185		break;
   6186	}
   6187
   6188	return 0;
   6189}
   6190
   6191void parse_add_command(char *add_command)
   6192{
   6193	int msr_num = 0;
   6194	char *path = NULL;
   6195	char name_buffer[NAME_BYTES] = "";
   6196	int width = 64;
   6197	int fail = 0;
   6198	enum counter_scope scope = SCOPE_CPU;
   6199	enum counter_type type = COUNTER_CYCLES;
   6200	enum counter_format format = FORMAT_DELTA;
   6201
   6202	while (add_command) {
   6203
   6204		if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
   6205			goto next;
   6206
   6207		if (sscanf(add_command, "msr%d", &msr_num) == 1)
   6208			goto next;
   6209
   6210		if (*add_command == '/') {
   6211			path = add_command;
   6212			goto next;
   6213		}
   6214
   6215		if (sscanf(add_command, "u%d", &width) == 1) {
   6216			if ((width == 32) || (width == 64))
   6217				goto next;
   6218			width = 64;
   6219		}
   6220		if (!strncmp(add_command, "cpu", strlen("cpu"))) {
   6221			scope = SCOPE_CPU;
   6222			goto next;
   6223		}
   6224		if (!strncmp(add_command, "core", strlen("core"))) {
   6225			scope = SCOPE_CORE;
   6226			goto next;
   6227		}
   6228		if (!strncmp(add_command, "package", strlen("package"))) {
   6229			scope = SCOPE_PACKAGE;
   6230			goto next;
   6231		}
   6232		if (!strncmp(add_command, "cycles", strlen("cycles"))) {
   6233			type = COUNTER_CYCLES;
   6234			goto next;
   6235		}
   6236		if (!strncmp(add_command, "seconds", strlen("seconds"))) {
   6237			type = COUNTER_SECONDS;
   6238			goto next;
   6239		}
   6240		if (!strncmp(add_command, "usec", strlen("usec"))) {
   6241			type = COUNTER_USEC;
   6242			goto next;
   6243		}
   6244		if (!strncmp(add_command, "raw", strlen("raw"))) {
   6245			format = FORMAT_RAW;
   6246			goto next;
   6247		}
   6248		if (!strncmp(add_command, "delta", strlen("delta"))) {
   6249			format = FORMAT_DELTA;
   6250			goto next;
   6251		}
   6252		if (!strncmp(add_command, "percent", strlen("percent"))) {
   6253			format = FORMAT_PERCENT;
   6254			goto next;
   6255		}
   6256
   6257		if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {	/* 18 < NAME_BYTES */
   6258			char *eos;
   6259
   6260			eos = strchr(name_buffer, ',');
   6261			if (eos)
   6262				*eos = '\0';
   6263			goto next;
   6264		}
   6265
   6266next:
   6267		add_command = strchr(add_command, ',');
   6268		if (add_command) {
   6269			*add_command = '\0';
   6270			add_command++;
   6271		}
   6272
   6273	}
   6274	if ((msr_num == 0) && (path == NULL)) {
   6275		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
   6276		fail++;
   6277	}
   6278
   6279	/* generate default column header */
   6280	if (*name_buffer == '\0') {
   6281		if (width == 32)
   6282			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
   6283		else
   6284			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
   6285	}
   6286
   6287	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
   6288		fail++;
   6289
   6290	if (fail) {
   6291		help();
   6292		exit(1);
   6293	}
   6294}
   6295
   6296int is_deferred_add(char *name)
   6297{
   6298	int i;
   6299
   6300	for (i = 0; i < deferred_add_index; ++i)
   6301		if (!strcmp(name, deferred_add_names[i]))
   6302			return 1;
   6303	return 0;
   6304}
   6305
   6306int is_deferred_skip(char *name)
   6307{
   6308	int i;
   6309
   6310	for (i = 0; i < deferred_skip_index; ++i)
   6311		if (!strcmp(name, deferred_skip_names[i]))
   6312			return 1;
   6313	return 0;
   6314}
   6315
   6316void probe_sysfs(void)
   6317{
   6318	char path[64];
   6319	char name_buf[16];
   6320	FILE *input;
   6321	int state;
   6322	char *sp;
   6323
   6324	for (state = 10; state >= 0; --state) {
   6325
   6326		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
   6327		input = fopen(path, "r");
   6328		if (input == NULL)
   6329			continue;
   6330		if (!fgets(name_buf, sizeof(name_buf), input))
   6331			err(1, "%s: failed to read file", path);
   6332
   6333		/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
   6334		sp = strchr(name_buf, '-');
   6335		if (!sp)
   6336			sp = strchrnul(name_buf, '\n');
   6337		*sp = '%';
   6338		*(sp + 1) = '\0';
   6339
   6340		remove_underbar(name_buf);
   6341
   6342		fclose(input);
   6343
   6344		sprintf(path, "cpuidle/state%d/time", state);
   6345
   6346		if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
   6347			continue;
   6348
   6349		if (is_deferred_skip(name_buf))
   6350			continue;
   6351
   6352		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU);
   6353	}
   6354
   6355	for (state = 10; state >= 0; --state) {
   6356
   6357		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
   6358		input = fopen(path, "r");
   6359		if (input == NULL)
   6360			continue;
   6361		if (!fgets(name_buf, sizeof(name_buf), input))
   6362			err(1, "%s: failed to read file", path);
   6363		/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
   6364		sp = strchr(name_buf, '-');
   6365		if (!sp)
   6366			sp = strchrnul(name_buf, '\n');
   6367		*sp = '\0';
   6368		fclose(input);
   6369
   6370		remove_underbar(name_buf);
   6371
   6372		sprintf(path, "cpuidle/state%d/usage", state);
   6373
   6374		if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
   6375			continue;
   6376
   6377		if (is_deferred_skip(name_buf))
   6378			continue;
   6379
   6380		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU);
   6381	}
   6382
   6383}
   6384
   6385/*
   6386 * parse cpuset with following syntax
   6387 * 1,2,4..6,8-10 and set bits in cpu_subset
   6388 */
   6389void parse_cpu_command(char *optarg)
   6390{
   6391	unsigned int start, end;
   6392	char *next;
   6393
   6394	if (!strcmp(optarg, "core")) {
   6395		if (cpu_subset)
   6396			goto error;
   6397		show_core_only++;
   6398		return;
   6399	}
   6400	if (!strcmp(optarg, "package")) {
   6401		if (cpu_subset)
   6402			goto error;
   6403		show_pkg_only++;
   6404		return;
   6405	}
   6406	if (show_core_only || show_pkg_only)
   6407		goto error;
   6408
   6409	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
   6410	if (cpu_subset == NULL)
   6411		err(3, "CPU_ALLOC");
   6412	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
   6413
   6414	CPU_ZERO_S(cpu_subset_size, cpu_subset);
   6415
   6416	next = optarg;
   6417
   6418	while (next && *next) {
   6419
   6420		if (*next == '-')	/* no negative cpu numbers */
   6421			goto error;
   6422
   6423		start = strtoul(next, &next, 10);
   6424
   6425		if (start >= CPU_SUBSET_MAXCPUS)
   6426			goto error;
   6427		CPU_SET_S(start, cpu_subset_size, cpu_subset);
   6428
   6429		if (*next == '\0')
   6430			break;
   6431
   6432		if (*next == ',') {
   6433			next += 1;
   6434			continue;
   6435		}
   6436
   6437		if (*next == '-') {
   6438			next += 1;	/* start range */
   6439		} else if (*next == '.') {
   6440			next += 1;
   6441			if (*next == '.')
   6442				next += 1;	/* start range */
   6443			else
   6444				goto error;
   6445		}
   6446
   6447		end = strtoul(next, &next, 10);
   6448		if (end <= start)
   6449			goto error;
   6450
   6451		while (++start <= end) {
   6452			if (start >= CPU_SUBSET_MAXCPUS)
   6453				goto error;
   6454			CPU_SET_S(start, cpu_subset_size, cpu_subset);
   6455		}
   6456
   6457		if (*next == ',')
   6458			next += 1;
   6459		else if (*next != '\0')
   6460			goto error;
   6461	}
   6462
   6463	return;
   6464
   6465error:
   6466	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
   6467	help();
   6468	exit(-1);
   6469}
   6470
   6471void cmdline(int argc, char **argv)
   6472{
   6473	int opt;
   6474	int option_index = 0;
   6475	static struct option long_options[] = {
   6476		{ "add", required_argument, 0, 'a' },
   6477		{ "cpu", required_argument, 0, 'c' },
   6478		{ "Dump", no_argument, 0, 'D' },
   6479		{ "debug", no_argument, 0, 'd' },	/* internal, not documented */
   6480		{ "enable", required_argument, 0, 'e' },
   6481		{ "interval", required_argument, 0, 'i' },
   6482		{ "IPC", no_argument, 0, 'I' },
   6483		{ "num_iterations", required_argument, 0, 'n' },
   6484		{ "header_iterations", required_argument, 0, 'N' },
   6485		{ "help", no_argument, 0, 'h' },
   6486		{ "hide", required_argument, 0, 'H' },	// meh, -h taken by --help
   6487		{ "Joules", no_argument, 0, 'J' },
   6488		{ "list", no_argument, 0, 'l' },
   6489		{ "out", required_argument, 0, 'o' },
   6490		{ "quiet", no_argument, 0, 'q' },
   6491		{ "show", required_argument, 0, 's' },
   6492		{ "Summary", no_argument, 0, 'S' },
   6493		{ "TCC", required_argument, 0, 'T' },
   6494		{ "version", no_argument, 0, 'v' },
   6495		{ 0, 0, 0, 0 }
   6496	};
   6497
   6498	progname = argv[0];
   6499
   6500	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
   6501		switch (opt) {
   6502		case 'a':
   6503			parse_add_command(optarg);
   6504			break;
   6505		case 'c':
   6506			parse_cpu_command(optarg);
   6507			break;
   6508		case 'D':
   6509			dump_only++;
   6510			break;
   6511		case 'e':
   6512			/* --enable specified counter */
   6513			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
   6514			break;
   6515		case 'd':
   6516			debug++;
   6517			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
   6518			break;
   6519		case 'H':
   6520			/*
   6521			 * --hide: do not show those specified
   6522			 *  multiple invocations simply clear more bits in enabled mask
   6523			 */
   6524			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
   6525			break;
   6526		case 'h':
   6527		default:
   6528			help();
   6529			exit(1);
   6530		case 'i':
   6531			{
   6532				double interval = strtod(optarg, NULL);
   6533
   6534				if (interval < 0.001) {
   6535					fprintf(outf, "interval %f seconds is too small\n", interval);
   6536					exit(2);
   6537				}
   6538
   6539				interval_tv.tv_sec = interval_ts.tv_sec = interval;
   6540				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
   6541				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
   6542			}
   6543			break;
   6544		case 'J':
   6545			rapl_joules++;
   6546			break;
   6547		case 'l':
   6548			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
   6549			list_header_only++;
   6550			quiet++;
   6551			break;
   6552		case 'o':
   6553			outf = fopen_or_die(optarg, "w");
   6554			break;
   6555		case 'q':
   6556			quiet = 1;
   6557			break;
   6558		case 'n':
   6559			num_iterations = strtod(optarg, NULL);
   6560
   6561			if (num_iterations <= 0) {
   6562				fprintf(outf, "iterations %d should be positive number\n", num_iterations);
   6563				exit(2);
   6564			}
   6565			break;
   6566		case 'N':
   6567			header_iterations = strtod(optarg, NULL);
   6568
   6569			if (header_iterations <= 0) {
   6570				fprintf(outf, "iterations %d should be positive number\n", header_iterations);
   6571				exit(2);
   6572			}
   6573			break;
   6574		case 's':
   6575			/*
   6576			 * --show: show only those specified
   6577			 *  The 1st invocation will clear and replace the enabled mask
   6578			 *  subsequent invocations can add to it.
   6579			 */
   6580			if (shown == 0)
   6581				bic_enabled = bic_lookup(optarg, SHOW_LIST);
   6582			else
   6583				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
   6584			shown = 1;
   6585			break;
   6586		case 'S':
   6587			summary_only++;
   6588			break;
   6589		case 'T':
   6590			tj_max_override = atoi(optarg);
   6591			break;
   6592		case 'v':
   6593			print_version();
   6594			exit(0);
   6595			break;
   6596		}
   6597	}
   6598}
   6599
   6600int main(int argc, char **argv)
   6601{
   6602	outf = stderr;
   6603	cmdline(argc, argv);
   6604
   6605	if (!quiet)
   6606		print_version();
   6607
   6608	probe_sysfs();
   6609
   6610	turbostat_init();
   6611
   6612	msr_sum_record();
   6613
   6614	/* dump counters and exit */
   6615	if (dump_only)
   6616		return get_and_dump_counters();
   6617
   6618	/* list header and exit */
   6619	if (list_header_only) {
   6620		print_header(",");
   6621		flush_output_stdout();
   6622		return 0;
   6623	}
   6624
   6625	/*
   6626	 * if any params left, it must be a command to fork
   6627	 */
   6628	if (argc - optind)
   6629		return fork_it(argv + optind);
   6630	else
   6631		turbostat_loop();
   6632
   6633	return 0;
   6634}