cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

acpi-cpufreq.c (26740B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * acpi-cpufreq.c - ACPI Processor P-States Driver
      4 *
      5 *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
      6 *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
      7 *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
      8 *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
      9 */
     10
     11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     12
     13#include <linux/kernel.h>
     14#include <linux/module.h>
     15#include <linux/init.h>
     16#include <linux/smp.h>
     17#include <linux/sched.h>
     18#include <linux/cpufreq.h>
     19#include <linux/compiler.h>
     20#include <linux/dmi.h>
     21#include <linux/slab.h>
     22
     23#include <linux/acpi.h>
     24#include <linux/io.h>
     25#include <linux/delay.h>
     26#include <linux/uaccess.h>
     27
     28#include <acpi/processor.h>
     29#include <acpi/cppc_acpi.h>
     30
     31#include <asm/msr.h>
     32#include <asm/processor.h>
     33#include <asm/cpufeature.h>
     34#include <asm/cpu_device_id.h>
     35
     36MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
     37MODULE_DESCRIPTION("ACPI Processor P-States Driver");
     38MODULE_LICENSE("GPL");
     39
     40enum {
     41	UNDEFINED_CAPABLE = 0,
     42	SYSTEM_INTEL_MSR_CAPABLE,
     43	SYSTEM_AMD_MSR_CAPABLE,
     44	SYSTEM_IO_CAPABLE,
     45};
     46
     47#define INTEL_MSR_RANGE		(0xffff)
     48#define AMD_MSR_RANGE		(0x7)
     49#define HYGON_MSR_RANGE		(0x7)
     50
     51#define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
     52
     53struct acpi_cpufreq_data {
     54	unsigned int resume;
     55	unsigned int cpu_feature;
     56	unsigned int acpi_perf_cpu;
     57	cpumask_var_t freqdomain_cpus;
     58	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
     59	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
     60};
     61
     62/* acpi_perf_data is a pointer to percpu data. */
     63static struct acpi_processor_performance __percpu *acpi_perf_data;
     64
     65static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data)
     66{
     67	return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu);
     68}
     69
     70static struct cpufreq_driver acpi_cpufreq_driver;
     71
     72static unsigned int acpi_pstate_strict;
     73
     74static bool boost_state(unsigned int cpu)
     75{
     76	u32 lo, hi;
     77	u64 msr;
     78
     79	switch (boot_cpu_data.x86_vendor) {
     80	case X86_VENDOR_INTEL:
     81		rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
     82		msr = lo | ((u64)hi << 32);
     83		return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
     84	case X86_VENDOR_HYGON:
     85	case X86_VENDOR_AMD:
     86		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
     87		msr = lo | ((u64)hi << 32);
     88		return !(msr & MSR_K7_HWCR_CPB_DIS);
     89	}
     90	return false;
     91}
     92
     93static int boost_set_msr(bool enable)
     94{
     95	u32 msr_addr;
     96	u64 msr_mask, val;
     97
     98	switch (boot_cpu_data.x86_vendor) {
     99	case X86_VENDOR_INTEL:
    100		msr_addr = MSR_IA32_MISC_ENABLE;
    101		msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
    102		break;
    103	case X86_VENDOR_HYGON:
    104	case X86_VENDOR_AMD:
    105		msr_addr = MSR_K7_HWCR;
    106		msr_mask = MSR_K7_HWCR_CPB_DIS;
    107		break;
    108	default:
    109		return -EINVAL;
    110	}
    111
    112	rdmsrl(msr_addr, val);
    113
    114	if (enable)
    115		val &= ~msr_mask;
    116	else
    117		val |= msr_mask;
    118
    119	wrmsrl(msr_addr, val);
    120	return 0;
    121}
    122
    123static void boost_set_msr_each(void *p_en)
    124{
    125	bool enable = (bool) p_en;
    126
    127	boost_set_msr(enable);
    128}
    129
    130static int set_boost(struct cpufreq_policy *policy, int val)
    131{
    132	on_each_cpu_mask(policy->cpus, boost_set_msr_each,
    133			 (void *)(long)val, 1);
    134	pr_debug("CPU %*pbl: Core Boosting %sabled.\n",
    135		 cpumask_pr_args(policy->cpus), val ? "en" : "dis");
    136
    137	return 0;
    138}
    139
    140static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
    141{
    142	struct acpi_cpufreq_data *data = policy->driver_data;
    143
    144	if (unlikely(!data))
    145		return -ENODEV;
    146
    147	return cpufreq_show_cpus(data->freqdomain_cpus, buf);
    148}
    149
    150cpufreq_freq_attr_ro(freqdomain_cpus);
    151
    152#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
    153static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
    154			 size_t count)
    155{
    156	int ret;
    157	unsigned int val = 0;
    158
    159	if (!acpi_cpufreq_driver.set_boost)
    160		return -EINVAL;
    161
    162	ret = kstrtouint(buf, 10, &val);
    163	if (ret || val > 1)
    164		return -EINVAL;
    165
    166	cpus_read_lock();
    167	set_boost(policy, val);
    168	cpus_read_unlock();
    169
    170	return count;
    171}
    172
    173static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
    174{
    175	return sprintf(buf, "%u\n", acpi_cpufreq_driver.boost_enabled);
    176}
    177
    178cpufreq_freq_attr_rw(cpb);
    179#endif
    180
    181static int check_est_cpu(unsigned int cpuid)
    182{
    183	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
    184
    185	return cpu_has(cpu, X86_FEATURE_EST);
    186}
    187
    188static int check_amd_hwpstate_cpu(unsigned int cpuid)
    189{
    190	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
    191
    192	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
    193}
    194
    195static unsigned extract_io(struct cpufreq_policy *policy, u32 value)
    196{
    197	struct acpi_cpufreq_data *data = policy->driver_data;
    198	struct acpi_processor_performance *perf;
    199	int i;
    200
    201	perf = to_perf_data(data);
    202
    203	for (i = 0; i < perf->state_count; i++) {
    204		if (value == perf->states[i].status)
    205			return policy->freq_table[i].frequency;
    206	}
    207	return 0;
    208}
    209
    210static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
    211{
    212	struct acpi_cpufreq_data *data = policy->driver_data;
    213	struct cpufreq_frequency_table *pos;
    214	struct acpi_processor_performance *perf;
    215
    216	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
    217		msr &= AMD_MSR_RANGE;
    218	else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
    219		msr &= HYGON_MSR_RANGE;
    220	else
    221		msr &= INTEL_MSR_RANGE;
    222
    223	perf = to_perf_data(data);
    224
    225	cpufreq_for_each_entry(pos, policy->freq_table)
    226		if (msr == perf->states[pos->driver_data].status)
    227			return pos->frequency;
    228	return policy->freq_table[0].frequency;
    229}
    230
    231static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
    232{
    233	struct acpi_cpufreq_data *data = policy->driver_data;
    234
    235	switch (data->cpu_feature) {
    236	case SYSTEM_INTEL_MSR_CAPABLE:
    237	case SYSTEM_AMD_MSR_CAPABLE:
    238		return extract_msr(policy, val);
    239	case SYSTEM_IO_CAPABLE:
    240		return extract_io(policy, val);
    241	default:
    242		return 0;
    243	}
    244}
    245
    246static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
    247{
    248	u32 val, dummy __always_unused;
    249
    250	rdmsr(MSR_IA32_PERF_CTL, val, dummy);
    251	return val;
    252}
    253
    254static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
    255{
    256	u32 lo, hi;
    257
    258	rdmsr(MSR_IA32_PERF_CTL, lo, hi);
    259	lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
    260	wrmsr(MSR_IA32_PERF_CTL, lo, hi);
    261}
    262
    263static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
    264{
    265	u32 val, dummy __always_unused;
    266
    267	rdmsr(MSR_AMD_PERF_CTL, val, dummy);
    268	return val;
    269}
    270
    271static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
    272{
    273	wrmsr(MSR_AMD_PERF_CTL, val, 0);
    274}
    275
    276static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
    277{
    278	u32 val;
    279
    280	acpi_os_read_port(reg->address, &val, reg->bit_width);
    281	return val;
    282}
    283
    284static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
    285{
    286	acpi_os_write_port(reg->address, val, reg->bit_width);
    287}
    288
    289struct drv_cmd {
    290	struct acpi_pct_register *reg;
    291	u32 val;
    292	union {
    293		void (*write)(struct acpi_pct_register *reg, u32 val);
    294		u32 (*read)(struct acpi_pct_register *reg);
    295	} func;
    296};
    297
    298/* Called via smp_call_function_single(), on the target CPU */
    299static void do_drv_read(void *_cmd)
    300{
    301	struct drv_cmd *cmd = _cmd;
    302
    303	cmd->val = cmd->func.read(cmd->reg);
    304}
    305
    306static u32 drv_read(struct acpi_cpufreq_data *data, const struct cpumask *mask)
    307{
    308	struct acpi_processor_performance *perf = to_perf_data(data);
    309	struct drv_cmd cmd = {
    310		.reg = &perf->control_register,
    311		.func.read = data->cpu_freq_read,
    312	};
    313	int err;
    314
    315	err = smp_call_function_any(mask, do_drv_read, &cmd, 1);
    316	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
    317	return cmd.val;
    318}
    319
    320/* Called via smp_call_function_many(), on the target CPUs */
    321static void do_drv_write(void *_cmd)
    322{
    323	struct drv_cmd *cmd = _cmd;
    324
    325	cmd->func.write(cmd->reg, cmd->val);
    326}
    327
    328static void drv_write(struct acpi_cpufreq_data *data,
    329		      const struct cpumask *mask, u32 val)
    330{
    331	struct acpi_processor_performance *perf = to_perf_data(data);
    332	struct drv_cmd cmd = {
    333		.reg = &perf->control_register,
    334		.val = val,
    335		.func.write = data->cpu_freq_write,
    336	};
    337	int this_cpu;
    338
    339	this_cpu = get_cpu();
    340	if (cpumask_test_cpu(this_cpu, mask))
    341		do_drv_write(&cmd);
    342
    343	smp_call_function_many(mask, do_drv_write, &cmd, 1);
    344	put_cpu();
    345}
    346
    347static u32 get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data)
    348{
    349	u32 val;
    350
    351	if (unlikely(cpumask_empty(mask)))
    352		return 0;
    353
    354	val = drv_read(data, mask);
    355
    356	pr_debug("%s = %u\n", __func__, val);
    357
    358	return val;
    359}
    360
    361static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
    362{
    363	struct acpi_cpufreq_data *data;
    364	struct cpufreq_policy *policy;
    365	unsigned int freq;
    366	unsigned int cached_freq;
    367
    368	pr_debug("%s (%d)\n", __func__, cpu);
    369
    370	policy = cpufreq_cpu_get_raw(cpu);
    371	if (unlikely(!policy))
    372		return 0;
    373
    374	data = policy->driver_data;
    375	if (unlikely(!data || !policy->freq_table))
    376		return 0;
    377
    378	cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
    379	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
    380	if (freq != cached_freq) {
    381		/*
    382		 * The dreaded BIOS frequency change behind our back.
    383		 * Force set the frequency on next target call.
    384		 */
    385		data->resume = 1;
    386	}
    387
    388	pr_debug("cur freq = %u\n", freq);
    389
    390	return freq;
    391}
    392
    393static unsigned int check_freqs(struct cpufreq_policy *policy,
    394				const struct cpumask *mask, unsigned int freq)
    395{
    396	struct acpi_cpufreq_data *data = policy->driver_data;
    397	unsigned int cur_freq;
    398	unsigned int i;
    399
    400	for (i = 0; i < 100; i++) {
    401		cur_freq = extract_freq(policy, get_cur_val(mask, data));
    402		if (cur_freq == freq)
    403			return 1;
    404		udelay(10);
    405	}
    406	return 0;
    407}
    408
    409static int acpi_cpufreq_target(struct cpufreq_policy *policy,
    410			       unsigned int index)
    411{
    412	struct acpi_cpufreq_data *data = policy->driver_data;
    413	struct acpi_processor_performance *perf;
    414	const struct cpumask *mask;
    415	unsigned int next_perf_state = 0; /* Index into perf table */
    416	int result = 0;
    417
    418	if (unlikely(!data)) {
    419		return -ENODEV;
    420	}
    421
    422	perf = to_perf_data(data);
    423	next_perf_state = policy->freq_table[index].driver_data;
    424	if (perf->state == next_perf_state) {
    425		if (unlikely(data->resume)) {
    426			pr_debug("Called after resume, resetting to P%d\n",
    427				next_perf_state);
    428			data->resume = 0;
    429		} else {
    430			pr_debug("Already at target state (P%d)\n",
    431				next_perf_state);
    432			return 0;
    433		}
    434	}
    435
    436	/*
    437	 * The core won't allow CPUs to go away until the governor has been
    438	 * stopped, so we can rely on the stability of policy->cpus.
    439	 */
    440	mask = policy->shared_type == CPUFREQ_SHARED_TYPE_ANY ?
    441		cpumask_of(policy->cpu) : policy->cpus;
    442
    443	drv_write(data, mask, perf->states[next_perf_state].control);
    444
    445	if (acpi_pstate_strict) {
    446		if (!check_freqs(policy, mask,
    447				 policy->freq_table[index].frequency)) {
    448			pr_debug("%s (%d)\n", __func__, policy->cpu);
    449			result = -EAGAIN;
    450		}
    451	}
    452
    453	if (!result)
    454		perf->state = next_perf_state;
    455
    456	return result;
    457}
    458
    459static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
    460					     unsigned int target_freq)
    461{
    462	struct acpi_cpufreq_data *data = policy->driver_data;
    463	struct acpi_processor_performance *perf;
    464	struct cpufreq_frequency_table *entry;
    465	unsigned int next_perf_state, next_freq, index;
    466
    467	/*
    468	 * Find the closest frequency above target_freq.
    469	 */
    470	if (policy->cached_target_freq == target_freq)
    471		index = policy->cached_resolved_idx;
    472	else
    473		index = cpufreq_table_find_index_dl(policy, target_freq,
    474						    false);
    475
    476	entry = &policy->freq_table[index];
    477	next_freq = entry->frequency;
    478	next_perf_state = entry->driver_data;
    479
    480	perf = to_perf_data(data);
    481	if (perf->state == next_perf_state) {
    482		if (unlikely(data->resume))
    483			data->resume = 0;
    484		else
    485			return next_freq;
    486	}
    487
    488	data->cpu_freq_write(&perf->control_register,
    489			     perf->states[next_perf_state].control);
    490	perf->state = next_perf_state;
    491	return next_freq;
    492}
    493
    494static unsigned long
    495acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
    496{
    497	struct acpi_processor_performance *perf;
    498
    499	perf = to_perf_data(data);
    500	if (cpu_khz) {
    501		/* search the closest match to cpu_khz */
    502		unsigned int i;
    503		unsigned long freq;
    504		unsigned long freqn = perf->states[0].core_frequency * 1000;
    505
    506		for (i = 0; i < (perf->state_count-1); i++) {
    507			freq = freqn;
    508			freqn = perf->states[i+1].core_frequency * 1000;
    509			if ((2 * cpu_khz) > (freqn + freq)) {
    510				perf->state = i;
    511				return freq;
    512			}
    513		}
    514		perf->state = perf->state_count-1;
    515		return freqn;
    516	} else {
    517		/* assume CPU is at P0... */
    518		perf->state = 0;
    519		return perf->states[0].core_frequency * 1000;
    520	}
    521}
    522
    523static void free_acpi_perf_data(void)
    524{
    525	unsigned int i;
    526
    527	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
    528	for_each_possible_cpu(i)
    529		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
    530				 ->shared_cpu_map);
    531	free_percpu(acpi_perf_data);
    532}
    533
    534static int cpufreq_boost_online(unsigned int cpu)
    535{
    536	/*
    537	 * On the CPU_UP path we simply keep the boost-disable flag
    538	 * in sync with the current global state.
    539	 */
    540	return boost_set_msr(acpi_cpufreq_driver.boost_enabled);
    541}
    542
    543static int cpufreq_boost_down_prep(unsigned int cpu)
    544{
    545	/*
    546	 * Clear the boost-disable bit on the CPU_DOWN path so that
    547	 * this cpu cannot block the remaining ones from boosting.
    548	 */
    549	return boost_set_msr(1);
    550}
    551
    552/*
    553 * acpi_cpufreq_early_init - initialize ACPI P-States library
    554 *
    555 * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
    556 * in order to determine correct frequency and voltage pairings. We can
    557 * do _PDC and _PSD and find out the processor dependency for the
    558 * actual init that will happen later...
    559 */
    560static int __init acpi_cpufreq_early_init(void)
    561{
    562	unsigned int i;
    563	pr_debug("%s\n", __func__);
    564
    565	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
    566	if (!acpi_perf_data) {
    567		pr_debug("Memory allocation error for acpi_perf_data.\n");
    568		return -ENOMEM;
    569	}
    570	for_each_possible_cpu(i) {
    571		if (!zalloc_cpumask_var_node(
    572			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
    573			GFP_KERNEL, cpu_to_node(i))) {
    574
    575			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
    576			free_acpi_perf_data();
    577			return -ENOMEM;
    578		}
    579	}
    580
    581	/* Do initialization in ACPI core */
    582	acpi_processor_preregister_performance(acpi_perf_data);
    583	return 0;
    584}
    585
    586#ifdef CONFIG_SMP
    587/*
    588 * Some BIOSes do SW_ANY coordination internally, either set it up in hw
    589 * or do it in BIOS firmware and won't inform about it to OS. If not
    590 * detected, this has a side effect of making CPU run at a different speed
    591 * than OS intended it to run at. Detect it and handle it cleanly.
    592 */
    593static int bios_with_sw_any_bug;
    594
    595static int sw_any_bug_found(const struct dmi_system_id *d)
    596{
    597	bios_with_sw_any_bug = 1;
    598	return 0;
    599}
    600
    601static const struct dmi_system_id sw_any_bug_dmi_table[] = {
    602	{
    603		.callback = sw_any_bug_found,
    604		.ident = "Supermicro Server X6DLP",
    605		.matches = {
    606			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
    607			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
    608			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
    609		},
    610	},
    611	{ }
    612};
    613
    614static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
    615{
    616	/* Intel Xeon Processor 7100 Series Specification Update
    617	 * https://www.intel.com/Assets/PDF/specupdate/314554.pdf
    618	 * AL30: A Machine Check Exception (MCE) Occurring during an
    619	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
    620	 * Both Processor Cores to Lock Up. */
    621	if (c->x86_vendor == X86_VENDOR_INTEL) {
    622		if ((c->x86 == 15) &&
    623		    (c->x86_model == 6) &&
    624		    (c->x86_stepping == 8)) {
    625			pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
    626			return -ENODEV;
    627		    }
    628		}
    629	return 0;
    630}
    631#endif
    632
    633#ifdef CONFIG_ACPI_CPPC_LIB
    634static u64 get_max_boost_ratio(unsigned int cpu)
    635{
    636	struct cppc_perf_caps perf_caps;
    637	u64 highest_perf, nominal_perf;
    638	int ret;
    639
    640	if (acpi_pstate_strict)
    641		return 0;
    642
    643	ret = cppc_get_perf_caps(cpu, &perf_caps);
    644	if (ret) {
    645		pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
    646			 cpu, ret);
    647		return 0;
    648	}
    649
    650	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
    651		highest_perf = amd_get_highest_perf();
    652	else
    653		highest_perf = perf_caps.highest_perf;
    654
    655	nominal_perf = perf_caps.nominal_perf;
    656
    657	if (!highest_perf || !nominal_perf) {
    658		pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
    659		return 0;
    660	}
    661
    662	if (highest_perf < nominal_perf) {
    663		pr_debug("CPU%d: nominal performance above highest\n", cpu);
    664		return 0;
    665	}
    666
    667	return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
    668}
    669#else
    670static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
    671#endif
    672
    673static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
    674{
    675	struct cpufreq_frequency_table *freq_table;
    676	struct acpi_processor_performance *perf;
    677	struct acpi_cpufreq_data *data;
    678	unsigned int cpu = policy->cpu;
    679	struct cpuinfo_x86 *c = &cpu_data(cpu);
    680	unsigned int valid_states = 0;
    681	unsigned int result = 0;
    682	u64 max_boost_ratio;
    683	unsigned int i;
    684#ifdef CONFIG_SMP
    685	static int blacklisted;
    686#endif
    687
    688	pr_debug("%s\n", __func__);
    689
    690#ifdef CONFIG_SMP
    691	if (blacklisted)
    692		return blacklisted;
    693	blacklisted = acpi_cpufreq_blacklist(c);
    694	if (blacklisted)
    695		return blacklisted;
    696#endif
    697
    698	data = kzalloc(sizeof(*data), GFP_KERNEL);
    699	if (!data)
    700		return -ENOMEM;
    701
    702	if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) {
    703		result = -ENOMEM;
    704		goto err_free;
    705	}
    706
    707	perf = per_cpu_ptr(acpi_perf_data, cpu);
    708	data->acpi_perf_cpu = cpu;
    709	policy->driver_data = data;
    710
    711	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
    712		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
    713
    714	result = acpi_processor_register_performance(perf, cpu);
    715	if (result)
    716		goto err_free_mask;
    717
    718	policy->shared_type = perf->shared_type;
    719
    720	/*
    721	 * Will let policy->cpus know about dependency only when software
    722	 * coordination is required.
    723	 */
    724	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
    725	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
    726		cpumask_copy(policy->cpus, perf->shared_cpu_map);
    727	}
    728	cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map);
    729
    730#ifdef CONFIG_SMP
    731	dmi_check_system(sw_any_bug_dmi_table);
    732	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
    733		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
    734		cpumask_copy(policy->cpus, topology_core_cpumask(cpu));
    735	}
    736
    737	if (check_amd_hwpstate_cpu(cpu) && boot_cpu_data.x86 < 0x19 &&
    738	    !acpi_pstate_strict) {
    739		cpumask_clear(policy->cpus);
    740		cpumask_set_cpu(cpu, policy->cpus);
    741		cpumask_copy(data->freqdomain_cpus,
    742			     topology_sibling_cpumask(cpu));
    743		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
    744		pr_info_once("overriding BIOS provided _PSD data\n");
    745	}
    746#endif
    747
    748	/* capability check */
    749	if (perf->state_count <= 1) {
    750		pr_debug("No P-States\n");
    751		result = -ENODEV;
    752		goto err_unreg;
    753	}
    754
    755	if (perf->control_register.space_id != perf->status_register.space_id) {
    756		result = -ENODEV;
    757		goto err_unreg;
    758	}
    759
    760	switch (perf->control_register.space_id) {
    761	case ACPI_ADR_SPACE_SYSTEM_IO:
    762		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
    763		    boot_cpu_data.x86 == 0xf) {
    764			pr_debug("AMD K8 systems must use native drivers.\n");
    765			result = -ENODEV;
    766			goto err_unreg;
    767		}
    768		pr_debug("SYSTEM IO addr space\n");
    769		data->cpu_feature = SYSTEM_IO_CAPABLE;
    770		data->cpu_freq_read = cpu_freq_read_io;
    771		data->cpu_freq_write = cpu_freq_write_io;
    772		break;
    773	case ACPI_ADR_SPACE_FIXED_HARDWARE:
    774		pr_debug("HARDWARE addr space\n");
    775		if (check_est_cpu(cpu)) {
    776			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
    777			data->cpu_freq_read = cpu_freq_read_intel;
    778			data->cpu_freq_write = cpu_freq_write_intel;
    779			break;
    780		}
    781		if (check_amd_hwpstate_cpu(cpu)) {
    782			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
    783			data->cpu_freq_read = cpu_freq_read_amd;
    784			data->cpu_freq_write = cpu_freq_write_amd;
    785			break;
    786		}
    787		result = -ENODEV;
    788		goto err_unreg;
    789	default:
    790		pr_debug("Unknown addr space %d\n",
    791			(u32) (perf->control_register.space_id));
    792		result = -ENODEV;
    793		goto err_unreg;
    794	}
    795
    796	freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
    797			     GFP_KERNEL);
    798	if (!freq_table) {
    799		result = -ENOMEM;
    800		goto err_unreg;
    801	}
    802
    803	/* detect transition latency */
    804	policy->cpuinfo.transition_latency = 0;
    805	for (i = 0; i < perf->state_count; i++) {
    806		if ((perf->states[i].transition_latency * 1000) >
    807		    policy->cpuinfo.transition_latency)
    808			policy->cpuinfo.transition_latency =
    809			    perf->states[i].transition_latency * 1000;
    810	}
    811
    812	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
    813	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
    814	    policy->cpuinfo.transition_latency > 20 * 1000) {
    815		policy->cpuinfo.transition_latency = 20 * 1000;
    816		pr_info_once("P-state transition latency capped at 20 uS\n");
    817	}
    818
    819	/* table init */
    820	for (i = 0; i < perf->state_count; i++) {
    821		if (i > 0 && perf->states[i].core_frequency >=
    822		    freq_table[valid_states-1].frequency / 1000)
    823			continue;
    824
    825		freq_table[valid_states].driver_data = i;
    826		freq_table[valid_states].frequency =
    827		    perf->states[i].core_frequency * 1000;
    828		valid_states++;
    829	}
    830	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
    831
    832	max_boost_ratio = get_max_boost_ratio(cpu);
    833	if (max_boost_ratio) {
    834		unsigned int freq = freq_table[0].frequency;
    835
    836		/*
    837		 * Because the loop above sorts the freq_table entries in the
    838		 * descending order, freq is the maximum frequency in the table.
    839		 * Assume that it corresponds to the CPPC nominal frequency and
    840		 * use it to set cpuinfo.max_freq.
    841		 */
    842		policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
    843	} else {
    844		/*
    845		 * If the maximum "boost" frequency is unknown, ask the arch
    846		 * scale-invariance code to use the "nominal" performance for
    847		 * CPU utilization scaling so as to prevent the schedutil
    848		 * governor from selecting inadequate CPU frequencies.
    849		 */
    850		arch_set_max_freq_ratio(true);
    851	}
    852
    853	policy->freq_table = freq_table;
    854	perf->state = 0;
    855
    856	switch (perf->control_register.space_id) {
    857	case ACPI_ADR_SPACE_SYSTEM_IO:
    858		/*
    859		 * The core will not set policy->cur, because
    860		 * cpufreq_driver->get is NULL, so we need to set it here.
    861		 * However, we have to guess it, because the current speed is
    862		 * unknown and not detectable via IO ports.
    863		 */
    864		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
    865		break;
    866	case ACPI_ADR_SPACE_FIXED_HARDWARE:
    867		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
    868		break;
    869	default:
    870		break;
    871	}
    872
    873	/* notify BIOS that we exist */
    874	acpi_processor_notify_smm(THIS_MODULE);
    875
    876	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
    877	for (i = 0; i < perf->state_count; i++)
    878		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
    879			(i == perf->state ? '*' : ' '), i,
    880			(u32) perf->states[i].core_frequency,
    881			(u32) perf->states[i].power,
    882			(u32) perf->states[i].transition_latency);
    883
    884	/*
    885	 * the first call to ->target() should result in us actually
    886	 * writing something to the appropriate registers.
    887	 */
    888	data->resume = 1;
    889
    890	policy->fast_switch_possible = !acpi_pstate_strict &&
    891		!(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);
    892
    893	if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
    894		pr_warn(FW_WARN "P-state 0 is not max freq\n");
    895
    896	return result;
    897
    898err_unreg:
    899	acpi_processor_unregister_performance(cpu);
    900err_free_mask:
    901	free_cpumask_var(data->freqdomain_cpus);
    902err_free:
    903	kfree(data);
    904	policy->driver_data = NULL;
    905
    906	return result;
    907}
    908
    909static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
    910{
    911	struct acpi_cpufreq_data *data = policy->driver_data;
    912
    913	pr_debug("%s\n", __func__);
    914
    915	policy->fast_switch_possible = false;
    916	policy->driver_data = NULL;
    917	acpi_processor_unregister_performance(data->acpi_perf_cpu);
    918	free_cpumask_var(data->freqdomain_cpus);
    919	kfree(policy->freq_table);
    920	kfree(data);
    921
    922	return 0;
    923}
    924
    925static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
    926{
    927	struct acpi_cpufreq_data *data = policy->driver_data;
    928
    929	pr_debug("%s\n", __func__);
    930
    931	data->resume = 1;
    932
    933	return 0;
    934}
    935
    936static struct freq_attr *acpi_cpufreq_attr[] = {
    937	&cpufreq_freq_attr_scaling_available_freqs,
    938	&freqdomain_cpus,
    939#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
    940	&cpb,
    941#endif
    942	NULL,
    943};
    944
    945static struct cpufreq_driver acpi_cpufreq_driver = {
    946	.verify		= cpufreq_generic_frequency_table_verify,
    947	.target_index	= acpi_cpufreq_target,
    948	.fast_switch	= acpi_cpufreq_fast_switch,
    949	.bios_limit	= acpi_processor_get_bios_limit,
    950	.init		= acpi_cpufreq_cpu_init,
    951	.exit		= acpi_cpufreq_cpu_exit,
    952	.resume		= acpi_cpufreq_resume,
    953	.name		= "acpi-cpufreq",
    954	.attr		= acpi_cpufreq_attr,
    955};
    956
    957static enum cpuhp_state acpi_cpufreq_online;
    958
    959static void __init acpi_cpufreq_boost_init(void)
    960{
    961	int ret;
    962
    963	if (!(boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA))) {
    964		pr_debug("Boost capabilities not present in the processor\n");
    965		return;
    966	}
    967
    968	acpi_cpufreq_driver.set_boost = set_boost;
    969	acpi_cpufreq_driver.boost_enabled = boost_state(0);
    970
    971	/*
    972	 * This calls the online callback on all online cpu and forces all
    973	 * MSRs to the same value.
    974	 */
    975	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpufreq/acpi:online",
    976				cpufreq_boost_online, cpufreq_boost_down_prep);
    977	if (ret < 0) {
    978		pr_err("acpi_cpufreq: failed to register hotplug callbacks\n");
    979		return;
    980	}
    981	acpi_cpufreq_online = ret;
    982}
    983
    984static void acpi_cpufreq_boost_exit(void)
    985{
    986	if (acpi_cpufreq_online > 0)
    987		cpuhp_remove_state_nocalls(acpi_cpufreq_online);
    988}
    989
    990static int __init acpi_cpufreq_init(void)
    991{
    992	int ret;
    993
    994	if (acpi_disabled)
    995		return -ENODEV;
    996
    997	/* don't keep reloading if cpufreq_driver exists */
    998	if (cpufreq_get_current_driver())
    999		return -EEXIST;
   1000
   1001	pr_debug("%s\n", __func__);
   1002
   1003	ret = acpi_cpufreq_early_init();
   1004	if (ret)
   1005		return ret;
   1006
   1007#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB
   1008	/* this is a sysfs file with a strange name and an even stranger
   1009	 * semantic - per CPU instantiation, but system global effect.
   1010	 * Lets enable it only on AMD CPUs for compatibility reasons and
   1011	 * only if configured. This is considered legacy code, which
   1012	 * will probably be removed at some point in the future.
   1013	 */
   1014	if (!check_amd_hwpstate_cpu(0)) {
   1015		struct freq_attr **attr;
   1016
   1017		pr_debug("CPB unsupported, do not expose it\n");
   1018
   1019		for (attr = acpi_cpufreq_attr; *attr; attr++)
   1020			if (*attr == &cpb) {
   1021				*attr = NULL;
   1022				break;
   1023			}
   1024	}
   1025#endif
   1026	acpi_cpufreq_boost_init();
   1027
   1028	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
   1029	if (ret) {
   1030		free_acpi_perf_data();
   1031		acpi_cpufreq_boost_exit();
   1032	}
   1033	return ret;
   1034}
   1035
   1036static void __exit acpi_cpufreq_exit(void)
   1037{
   1038	pr_debug("%s\n", __func__);
   1039
   1040	acpi_cpufreq_boost_exit();
   1041
   1042	cpufreq_unregister_driver(&acpi_cpufreq_driver);
   1043
   1044	free_acpi_perf_data();
   1045}
   1046
   1047module_param(acpi_pstate_strict, uint, 0644);
   1048MODULE_PARM_DESC(acpi_pstate_strict,
   1049	"value 0 or non-zero. non-zero -> strict ACPI checks are "
   1050	"performed during frequency changes.");
   1051
   1052late_initcall(acpi_cpufreq_init);
   1053module_exit(acpi_cpufreq_exit);
   1054
   1055static const struct x86_cpu_id __maybe_unused acpi_cpufreq_ids[] = {
   1056	X86_MATCH_FEATURE(X86_FEATURE_ACPI, NULL),
   1057	X86_MATCH_FEATURE(X86_FEATURE_HW_PSTATE, NULL),
   1058	{}
   1059};
   1060MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids);
   1061
   1062static const struct acpi_device_id __maybe_unused processor_device_ids[] = {
   1063	{ACPI_PROCESSOR_OBJECT_HID, },
   1064	{ACPI_PROCESSOR_DEVICE_HID, },
   1065	{},
   1066};
   1067MODULE_DEVICE_TABLE(acpi, processor_device_ids);
   1068
   1069MODULE_ALIAS("acpi");