cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aperfmperf.c (10952B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * x86 APERF/MPERF KHz calculation for
      4 * /sys/.../cpufreq/scaling_cur_freq
      5 *
      6 * Copyright (C) 2017 Intel Corp.
      7 * Author: Len Brown <len.brown@intel.com>
      8 */
      9#include <linux/cpufreq.h>
     10#include <linux/delay.h>
     11#include <linux/ktime.h>
     12#include <linux/math64.h>
     13#include <linux/percpu.h>
     14#include <linux/rcupdate.h>
     15#include <linux/sched/isolation.h>
     16#include <linux/sched/topology.h>
     17#include <linux/smp.h>
     18#include <linux/syscore_ops.h>
     19
     20#include <asm/cpu.h>
     21#include <asm/cpu_device_id.h>
     22#include <asm/intel-family.h>
     23
     24#include "cpu.h"
     25
     26struct aperfmperf {
     27	seqcount_t	seq;
     28	unsigned long	last_update;
     29	u64		acnt;
     30	u64		mcnt;
     31	u64		aperf;
     32	u64		mperf;
     33};
     34
     35static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = {
     36	.seq = SEQCNT_ZERO(cpu_samples.seq)
     37};
     38
     39static void init_counter_refs(void)
     40{
     41	u64 aperf, mperf;
     42
     43	rdmsrl(MSR_IA32_APERF, aperf);
     44	rdmsrl(MSR_IA32_MPERF, mperf);
     45
     46	this_cpu_write(cpu_samples.aperf, aperf);
     47	this_cpu_write(cpu_samples.mperf, mperf);
     48}
     49
     50#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
     51/*
     52 * APERF/MPERF frequency ratio computation.
     53 *
     54 * The scheduler wants to do frequency invariant accounting and needs a <1
     55 * ratio to account for the 'current' frequency, corresponding to
     56 * freq_curr / freq_max.
     57 *
     58 * Since the frequency freq_curr on x86 is controlled by micro-controller and
     59 * our P-state setting is little more than a request/hint, we need to observe
     60 * the effective frequency 'BusyMHz', i.e. the average frequency over a time
     61 * interval after discarding idle time. This is given by:
     62 *
     63 *   BusyMHz = delta_APERF / delta_MPERF * freq_base
     64 *
     65 * where freq_base is the max non-turbo P-state.
     66 *
     67 * The freq_max term has to be set to a somewhat arbitrary value, because we
     68 * can't know which turbo states will be available at a given point in time:
     69 * it all depends on the thermal headroom of the entire package. We set it to
     70 * the turbo level with 4 cores active.
     71 *
     72 * Benchmarks show that's a good compromise between the 1C turbo ratio
     73 * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
     74 * which would ignore the entire turbo range (a conspicuous part, making
     75 * freq_curr/freq_max always maxed out).
     76 *
     77 * An exception to the heuristic above is the Atom uarch, where we choose the
     78 * highest turbo level for freq_max since Atom's are generally oriented towards
     79 * power efficiency.
     80 *
     81 * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
     82 * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
     83 */
     84
     85DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
     86
     87static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
     88static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
     89
     90void arch_set_max_freq_ratio(bool turbo_disabled)
     91{
     92	arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
     93					arch_turbo_freq_ratio;
     94}
     95EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
     96
     97static bool __init turbo_disabled(void)
     98{
     99	u64 misc_en;
    100	int err;
    101
    102	err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
    103	if (err)
    104		return false;
    105
    106	return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
    107}
    108
    109static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
    110{
    111	int err;
    112
    113	err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
    114	if (err)
    115		return false;
    116
    117	err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
    118	if (err)
    119		return false;
    120
    121	*base_freq = (*base_freq >> 16) & 0x3F;     /* max P state */
    122	*turbo_freq = *turbo_freq & 0x3F;           /* 1C turbo    */
    123
    124	return true;
    125}
    126
    127#define X86_MATCH(model)					\
    128	X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6,		\
    129		INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
    130
    131static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = {
    132	X86_MATCH(XEON_PHI_KNL),
    133	X86_MATCH(XEON_PHI_KNM),
    134	{}
    135};
    136
    137static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = {
    138	X86_MATCH(SKYLAKE_X),
    139	{}
    140};
    141
    142static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = {
    143	X86_MATCH(ATOM_GOLDMONT),
    144	X86_MATCH(ATOM_GOLDMONT_D),
    145	X86_MATCH(ATOM_GOLDMONT_PLUS),
    146	{}
    147};
    148
    149static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
    150					  int num_delta_fratio)
    151{
    152	int fratio, delta_fratio, found;
    153	int err, i;
    154	u64 msr;
    155
    156	err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
    157	if (err)
    158		return false;
    159
    160	*base_freq = (*base_freq >> 8) & 0xFF;	    /* max P state */
    161
    162	err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
    163	if (err)
    164		return false;
    165
    166	fratio = (msr >> 8) & 0xFF;
    167	i = 16;
    168	found = 0;
    169	do {
    170		if (found >= num_delta_fratio) {
    171			*turbo_freq = fratio;
    172			return true;
    173		}
    174
    175		delta_fratio = (msr >> (i + 5)) & 0x7;
    176
    177		if (delta_fratio) {
    178			found += 1;
    179			fratio -= delta_fratio;
    180		}
    181
    182		i += 8;
    183	} while (i < 64);
    184
    185	return true;
    186}
    187
    188static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
    189{
    190	u64 ratios, counts;
    191	u32 group_size;
    192	int err, i;
    193
    194	err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
    195	if (err)
    196		return false;
    197
    198	*base_freq = (*base_freq >> 8) & 0xFF;      /* max P state */
    199
    200	err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
    201	if (err)
    202		return false;
    203
    204	err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
    205	if (err)
    206		return false;
    207
    208	for (i = 0; i < 64; i += 8) {
    209		group_size = (counts >> i) & 0xFF;
    210		if (group_size >= size) {
    211			*turbo_freq = (ratios >> i) & 0xFF;
    212			return true;
    213		}
    214	}
    215
    216	return false;
    217}
    218
    219static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
    220{
    221	u64 msr;
    222	int err;
    223
    224	err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
    225	if (err)
    226		return false;
    227
    228	err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
    229	if (err)
    230		return false;
    231
    232	*base_freq = (*base_freq >> 8) & 0xFF;    /* max P state */
    233	*turbo_freq = (msr >> 24) & 0xFF;         /* 4C turbo    */
    234
    235	/* The CPU may have less than 4 cores */
    236	if (!*turbo_freq)
    237		*turbo_freq = msr & 0xFF;         /* 1C turbo    */
    238
    239	return true;
    240}
    241
    242static bool __init intel_set_max_freq_ratio(void)
    243{
    244	u64 base_freq, turbo_freq;
    245	u64 turbo_ratio;
    246
    247	if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
    248		goto out;
    249
    250	if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
    251	    skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
    252		goto out;
    253
    254	if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
    255	    knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
    256		goto out;
    257
    258	if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
    259	    skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
    260		goto out;
    261
    262	if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
    263		goto out;
    264
    265	return false;
    266
    267out:
    268	/*
    269	 * Some hypervisors advertise X86_FEATURE_APERFMPERF
    270	 * but then fill all MSR's with zeroes.
    271	 * Some CPUs have turbo boost but don't declare any turbo ratio
    272	 * in MSR_TURBO_RATIO_LIMIT.
    273	 */
    274	if (!base_freq || !turbo_freq) {
    275		pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
    276		return false;
    277	}
    278
    279	turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
    280	if (!turbo_ratio) {
    281		pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
    282		return false;
    283	}
    284
    285	arch_turbo_freq_ratio = turbo_ratio;
    286	arch_set_max_freq_ratio(turbo_disabled());
    287
    288	return true;
    289}
    290
    291#ifdef CONFIG_PM_SLEEP
    292static struct syscore_ops freq_invariance_syscore_ops = {
    293	.resume = init_counter_refs,
    294};
    295
    296static void register_freq_invariance_syscore_ops(void)
    297{
    298	register_syscore_ops(&freq_invariance_syscore_ops);
    299}
    300#else
    301static inline void register_freq_invariance_syscore_ops(void) {}
    302#endif
    303
    304static void freq_invariance_enable(void)
    305{
    306	if (static_branch_unlikely(&arch_scale_freq_key)) {
    307		WARN_ON_ONCE(1);
    308		return;
    309	}
    310	static_branch_enable(&arch_scale_freq_key);
    311	register_freq_invariance_syscore_ops();
    312	pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
    313}
    314
    315void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled)
    316{
    317	arch_turbo_freq_ratio = ratio;
    318	arch_set_max_freq_ratio(turbo_disabled);
    319	freq_invariance_enable();
    320}
    321
    322static void __init bp_init_freq_invariance(void)
    323{
    324	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
    325		return;
    326
    327	if (intel_set_max_freq_ratio())
    328		freq_invariance_enable();
    329}
    330
    331static void disable_freq_invariance_workfn(struct work_struct *work)
    332{
    333	static_branch_disable(&arch_scale_freq_key);
    334}
    335
    336static DECLARE_WORK(disable_freq_invariance_work,
    337		    disable_freq_invariance_workfn);
    338
    339DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
    340
    341static void scale_freq_tick(u64 acnt, u64 mcnt)
    342{
    343	u64 freq_scale;
    344
    345	if (!arch_scale_freq_invariant())
    346		return;
    347
    348	if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
    349		goto error;
    350
    351	if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
    352		goto error;
    353
    354	freq_scale = div64_u64(acnt, mcnt);
    355	if (!freq_scale)
    356		goto error;
    357
    358	if (freq_scale > SCHED_CAPACITY_SCALE)
    359		freq_scale = SCHED_CAPACITY_SCALE;
    360
    361	this_cpu_write(arch_freq_scale, freq_scale);
    362	return;
    363
    364error:
    365	pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
    366	schedule_work(&disable_freq_invariance_work);
    367}
    368#else
    369static inline void bp_init_freq_invariance(void) { }
    370static inline void scale_freq_tick(u64 acnt, u64 mcnt) { }
    371#endif /* CONFIG_X86_64 && CONFIG_SMP */
    372
    373void arch_scale_freq_tick(void)
    374{
    375	struct aperfmperf *s = this_cpu_ptr(&cpu_samples);
    376	u64 acnt, mcnt, aperf, mperf;
    377
    378	if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
    379		return;
    380
    381	rdmsrl(MSR_IA32_APERF, aperf);
    382	rdmsrl(MSR_IA32_MPERF, mperf);
    383	acnt = aperf - s->aperf;
    384	mcnt = mperf - s->mperf;
    385
    386	s->aperf = aperf;
    387	s->mperf = mperf;
    388
    389	raw_write_seqcount_begin(&s->seq);
    390	s->last_update = jiffies;
    391	s->acnt = acnt;
    392	s->mcnt = mcnt;
    393	raw_write_seqcount_end(&s->seq);
    394
    395	scale_freq_tick(acnt, mcnt);
    396}
    397
    398/*
    399 * Discard samples older than the define maximum sample age of 20ms. There
    400 * is no point in sending IPIs in such a case. If the scheduler tick was
    401 * not running then the CPU is either idle or isolated.
    402 */
    403#define MAX_SAMPLE_AGE	((unsigned long)HZ / 50)
    404
    405unsigned int arch_freq_get_on_cpu(int cpu)
    406{
    407	struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu);
    408	unsigned int seq, freq;
    409	unsigned long last;
    410	u64 acnt, mcnt;
    411
    412	if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
    413		goto fallback;
    414
    415	do {
    416		seq = raw_read_seqcount_begin(&s->seq);
    417		last = s->last_update;
    418		acnt = s->acnt;
    419		mcnt = s->mcnt;
    420	} while (read_seqcount_retry(&s->seq, seq));
    421
    422	/*
    423	 * Bail on invalid count and when the last update was too long ago,
    424	 * which covers idle and NOHZ full CPUs.
    425	 */
    426	if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE)
    427		goto fallback;
    428
    429	return div64_u64((cpu_khz * acnt), mcnt);
    430
    431fallback:
    432	freq = cpufreq_quick_get(cpu);
    433	return freq ? freq : cpu_khz;
    434}
    435
    436static int __init bp_init_aperfmperf(void)
    437{
    438	if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF))
    439		return 0;
    440
    441	init_counter_refs();
    442	bp_init_freq_invariance();
    443	return 0;
    444}
    445early_initcall(bp_init_aperfmperf);
    446
    447void ap_init_aperfmperf(void)
    448{
    449	if (cpu_feature_enabled(X86_FEATURE_APERFMPERF))
    450		init_counter_refs();
    451}