cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

intel_idle.c (55155B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * intel_idle.c - native hardware idle loop for modern Intel processors
      4 *
      5 * Copyright (c) 2013 - 2020, Intel Corporation.
      6 * Len Brown <len.brown@intel.com>
      7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
      8 */
      9
     10/*
     11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
     12 * in lieu of the legacy ACPI processor_idle driver.  The intent is to
     13 * make Linux more efficient on these processors, as intel_idle knows
     14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
     15 */
     16
     17/*
     18 * Design Assumptions
     19 *
     20 * All CPUs have same idle states as boot CPU
     21 *
     22 * Chipset BM_STS (bus master status) bit is a NOP
     23 *	for preventing entry into deep C-states
     24 *
     25 * CPU will flush caches as needed when entering a C-state via MWAIT
     26 *	(in contrast to entering ACPI C3, in which case the WBINVD
     27 *	instruction needs to be executed to flush the caches)
     28 */
     29
     30/*
     31 * Known limitations
     32 *
     33 * ACPI has a .suspend hack to turn off deep c-statees during suspend
     34 * to avoid complications with the lapic timer workaround.
     35 * Have not seen issues with suspend, but may need same workaround here.
     36 *
     37 */
     38
     39/* un-comment DEBUG to enable pr_debug() statements */
     40/* #define DEBUG */
     41
     42#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     43
     44#include <linux/acpi.h>
     45#include <linux/kernel.h>
     46#include <linux/cpuidle.h>
     47#include <linux/tick.h>
     48#include <trace/events/power.h>
     49#include <linux/sched.h>
     50#include <linux/notifier.h>
     51#include <linux/cpu.h>
     52#include <linux/moduleparam.h>
     53#include <asm/cpu_device_id.h>
     54#include <asm/intel-family.h>
     55#include <asm/mwait.h>
     56#include <asm/msr.h>
     57
     58#define INTEL_IDLE_VERSION "0.5.1"
     59
     60static struct cpuidle_driver intel_idle_driver = {
     61	.name = "intel_idle",
     62	.owner = THIS_MODULE,
     63};
     64/* intel_idle.max_cstate=0 disables driver */
     65static int max_cstate = CPUIDLE_STATE_MAX - 1;
     66static unsigned int disabled_states_mask;
     67static unsigned int preferred_states_mask;
     68
     69static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
     70
     71static unsigned long auto_demotion_disable_flags;
     72
     73static enum {
     74	C1E_PROMOTION_PRESERVE,
     75	C1E_PROMOTION_ENABLE,
     76	C1E_PROMOTION_DISABLE
     77} c1e_promotion = C1E_PROMOTION_PRESERVE;
     78
     79struct idle_cpu {
     80	struct cpuidle_state *state_table;
     81
     82	/*
     83	 * Hardware C-state auto-demotion may not always be optimal.
     84	 * Indicate which enable bits to clear here.
     85	 */
     86	unsigned long auto_demotion_disable_flags;
     87	bool byt_auto_demotion_disable_flag;
     88	bool disable_promotion_to_c1e;
     89	bool use_acpi;
     90};
     91
     92static const struct idle_cpu *icpu __initdata;
     93static struct cpuidle_state *cpuidle_state_table __initdata;
     94
     95static unsigned int mwait_substates __initdata;
     96
     97/*
     98 * Enable interrupts before entering the C-state. On some platforms and for
     99 * some C-states, this may measurably decrease interrupt latency.
    100 */
    101#define CPUIDLE_FLAG_IRQ_ENABLE		BIT(14)
    102
    103/*
    104 * Enable this state by default even if the ACPI _CST does not list it.
    105 */
    106#define CPUIDLE_FLAG_ALWAYS_ENABLE	BIT(15)
    107
    108/*
    109 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
    110 * the C-state (top nibble) and sub-state (bottom nibble)
    111 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
    112 *
    113 * We store the hint at the top of our "flags" for each state.
    114 */
    115#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
    116#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
    117
    118static __always_inline int __intel_idle(struct cpuidle_device *dev,
    119					struct cpuidle_driver *drv, int index)
    120{
    121	struct cpuidle_state *state = &drv->states[index];
    122	unsigned long eax = flg2MWAIT(state->flags);
    123	unsigned long ecx = 1; /* break on interrupt flag */
    124
    125	mwait_idle_with_hints(eax, ecx);
    126
    127	return index;
    128}
    129
    130/**
    131 * intel_idle - Ask the processor to enter the given idle state.
    132 * @dev: cpuidle device of the target CPU.
    133 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
    134 * @index: Target idle state index.
    135 *
    136 * Use the MWAIT instruction to notify the processor that the CPU represented by
    137 * @dev is idle and it can try to enter the idle state corresponding to @index.
    138 *
    139 * If the local APIC timer is not known to be reliable in the target idle state,
    140 * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
    141 *
    142 * Must be called under local_irq_disable().
    143 */
    144static __cpuidle int intel_idle(struct cpuidle_device *dev,
    145				struct cpuidle_driver *drv, int index)
    146{
    147	return __intel_idle(dev, drv, index);
    148}
    149
    150static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
    151				    struct cpuidle_driver *drv, int index)
    152{
    153	int ret;
    154
    155	raw_local_irq_enable();
    156	ret = __intel_idle(dev, drv, index);
    157	raw_local_irq_disable();
    158
    159	return ret;
    160}
    161
    162/**
    163 * intel_idle_s2idle - Ask the processor to enter the given idle state.
    164 * @dev: cpuidle device of the target CPU.
    165 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
    166 * @index: Target idle state index.
    167 *
    168 * Use the MWAIT instruction to notify the processor that the CPU represented by
    169 * @dev is idle and it can try to enter the idle state corresponding to @index.
    170 *
    171 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
    172 * scheduler tick and suspended scheduler clock on the target CPU.
    173 */
    174static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
    175				       struct cpuidle_driver *drv, int index)
    176{
    177	unsigned long eax = flg2MWAIT(drv->states[index].flags);
    178	unsigned long ecx = 1; /* break on interrupt flag */
    179
    180	mwait_idle_with_hints(eax, ecx);
    181
    182	return 0;
    183}
    184
    185/*
    186 * States are indexed by the cstate number,
    187 * which is also the index into the MWAIT hint array.
    188 * Thus C0 is a dummy.
    189 */
    190static struct cpuidle_state nehalem_cstates[] __initdata = {
    191	{
    192		.name = "C1",
    193		.desc = "MWAIT 0x00",
    194		.flags = MWAIT2flg(0x00),
    195		.exit_latency = 3,
    196		.target_residency = 6,
    197		.enter = &intel_idle,
    198		.enter_s2idle = intel_idle_s2idle, },
    199	{
    200		.name = "C1E",
    201		.desc = "MWAIT 0x01",
    202		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    203		.exit_latency = 10,
    204		.target_residency = 20,
    205		.enter = &intel_idle,
    206		.enter_s2idle = intel_idle_s2idle, },
    207	{
    208		.name = "C3",
    209		.desc = "MWAIT 0x10",
    210		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    211		.exit_latency = 20,
    212		.target_residency = 80,
    213		.enter = &intel_idle,
    214		.enter_s2idle = intel_idle_s2idle, },
    215	{
    216		.name = "C6",
    217		.desc = "MWAIT 0x20",
    218		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    219		.exit_latency = 200,
    220		.target_residency = 800,
    221		.enter = &intel_idle,
    222		.enter_s2idle = intel_idle_s2idle, },
    223	{
    224		.enter = NULL }
    225};
    226
    227static struct cpuidle_state snb_cstates[] __initdata = {
    228	{
    229		.name = "C1",
    230		.desc = "MWAIT 0x00",
    231		.flags = MWAIT2flg(0x00),
    232		.exit_latency = 2,
    233		.target_residency = 2,
    234		.enter = &intel_idle,
    235		.enter_s2idle = intel_idle_s2idle, },
    236	{
    237		.name = "C1E",
    238		.desc = "MWAIT 0x01",
    239		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    240		.exit_latency = 10,
    241		.target_residency = 20,
    242		.enter = &intel_idle,
    243		.enter_s2idle = intel_idle_s2idle, },
    244	{
    245		.name = "C3",
    246		.desc = "MWAIT 0x10",
    247		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    248		.exit_latency = 80,
    249		.target_residency = 211,
    250		.enter = &intel_idle,
    251		.enter_s2idle = intel_idle_s2idle, },
    252	{
    253		.name = "C6",
    254		.desc = "MWAIT 0x20",
    255		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    256		.exit_latency = 104,
    257		.target_residency = 345,
    258		.enter = &intel_idle,
    259		.enter_s2idle = intel_idle_s2idle, },
    260	{
    261		.name = "C7",
    262		.desc = "MWAIT 0x30",
    263		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
    264		.exit_latency = 109,
    265		.target_residency = 345,
    266		.enter = &intel_idle,
    267		.enter_s2idle = intel_idle_s2idle, },
    268	{
    269		.enter = NULL }
    270};
    271
    272static struct cpuidle_state byt_cstates[] __initdata = {
    273	{
    274		.name = "C1",
    275		.desc = "MWAIT 0x00",
    276		.flags = MWAIT2flg(0x00),
    277		.exit_latency = 1,
    278		.target_residency = 1,
    279		.enter = &intel_idle,
    280		.enter_s2idle = intel_idle_s2idle, },
    281	{
    282		.name = "C6N",
    283		.desc = "MWAIT 0x58",
    284		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
    285		.exit_latency = 300,
    286		.target_residency = 275,
    287		.enter = &intel_idle,
    288		.enter_s2idle = intel_idle_s2idle, },
    289	{
    290		.name = "C6S",
    291		.desc = "MWAIT 0x52",
    292		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
    293		.exit_latency = 500,
    294		.target_residency = 560,
    295		.enter = &intel_idle,
    296		.enter_s2idle = intel_idle_s2idle, },
    297	{
    298		.name = "C7",
    299		.desc = "MWAIT 0x60",
    300		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
    301		.exit_latency = 1200,
    302		.target_residency = 4000,
    303		.enter = &intel_idle,
    304		.enter_s2idle = intel_idle_s2idle, },
    305	{
    306		.name = "C7S",
    307		.desc = "MWAIT 0x64",
    308		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
    309		.exit_latency = 10000,
    310		.target_residency = 20000,
    311		.enter = &intel_idle,
    312		.enter_s2idle = intel_idle_s2idle, },
    313	{
    314		.enter = NULL }
    315};
    316
    317static struct cpuidle_state cht_cstates[] __initdata = {
    318	{
    319		.name = "C1",
    320		.desc = "MWAIT 0x00",
    321		.flags = MWAIT2flg(0x00),
    322		.exit_latency = 1,
    323		.target_residency = 1,
    324		.enter = &intel_idle,
    325		.enter_s2idle = intel_idle_s2idle, },
    326	{
    327		.name = "C6N",
    328		.desc = "MWAIT 0x58",
    329		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
    330		.exit_latency = 80,
    331		.target_residency = 275,
    332		.enter = &intel_idle,
    333		.enter_s2idle = intel_idle_s2idle, },
    334	{
    335		.name = "C6S",
    336		.desc = "MWAIT 0x52",
    337		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
    338		.exit_latency = 200,
    339		.target_residency = 560,
    340		.enter = &intel_idle,
    341		.enter_s2idle = intel_idle_s2idle, },
    342	{
    343		.name = "C7",
    344		.desc = "MWAIT 0x60",
    345		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
    346		.exit_latency = 1200,
    347		.target_residency = 4000,
    348		.enter = &intel_idle,
    349		.enter_s2idle = intel_idle_s2idle, },
    350	{
    351		.name = "C7S",
    352		.desc = "MWAIT 0x64",
    353		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
    354		.exit_latency = 10000,
    355		.target_residency = 20000,
    356		.enter = &intel_idle,
    357		.enter_s2idle = intel_idle_s2idle, },
    358	{
    359		.enter = NULL }
    360};
    361
    362static struct cpuidle_state ivb_cstates[] __initdata = {
    363	{
    364		.name = "C1",
    365		.desc = "MWAIT 0x00",
    366		.flags = MWAIT2flg(0x00),
    367		.exit_latency = 1,
    368		.target_residency = 1,
    369		.enter = &intel_idle,
    370		.enter_s2idle = intel_idle_s2idle, },
    371	{
    372		.name = "C1E",
    373		.desc = "MWAIT 0x01",
    374		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    375		.exit_latency = 10,
    376		.target_residency = 20,
    377		.enter = &intel_idle,
    378		.enter_s2idle = intel_idle_s2idle, },
    379	{
    380		.name = "C3",
    381		.desc = "MWAIT 0x10",
    382		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    383		.exit_latency = 59,
    384		.target_residency = 156,
    385		.enter = &intel_idle,
    386		.enter_s2idle = intel_idle_s2idle, },
    387	{
    388		.name = "C6",
    389		.desc = "MWAIT 0x20",
    390		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    391		.exit_latency = 80,
    392		.target_residency = 300,
    393		.enter = &intel_idle,
    394		.enter_s2idle = intel_idle_s2idle, },
    395	{
    396		.name = "C7",
    397		.desc = "MWAIT 0x30",
    398		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
    399		.exit_latency = 87,
    400		.target_residency = 300,
    401		.enter = &intel_idle,
    402		.enter_s2idle = intel_idle_s2idle, },
    403	{
    404		.enter = NULL }
    405};
    406
    407static struct cpuidle_state ivt_cstates[] __initdata = {
    408	{
    409		.name = "C1",
    410		.desc = "MWAIT 0x00",
    411		.flags = MWAIT2flg(0x00),
    412		.exit_latency = 1,
    413		.target_residency = 1,
    414		.enter = &intel_idle,
    415		.enter_s2idle = intel_idle_s2idle, },
    416	{
    417		.name = "C1E",
    418		.desc = "MWAIT 0x01",
    419		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    420		.exit_latency = 10,
    421		.target_residency = 80,
    422		.enter = &intel_idle,
    423		.enter_s2idle = intel_idle_s2idle, },
    424	{
    425		.name = "C3",
    426		.desc = "MWAIT 0x10",
    427		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    428		.exit_latency = 59,
    429		.target_residency = 156,
    430		.enter = &intel_idle,
    431		.enter_s2idle = intel_idle_s2idle, },
    432	{
    433		.name = "C6",
    434		.desc = "MWAIT 0x20",
    435		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    436		.exit_latency = 82,
    437		.target_residency = 300,
    438		.enter = &intel_idle,
    439		.enter_s2idle = intel_idle_s2idle, },
    440	{
    441		.enter = NULL }
    442};
    443
    444static struct cpuidle_state ivt_cstates_4s[] __initdata = {
    445	{
    446		.name = "C1",
    447		.desc = "MWAIT 0x00",
    448		.flags = MWAIT2flg(0x00),
    449		.exit_latency = 1,
    450		.target_residency = 1,
    451		.enter = &intel_idle,
    452		.enter_s2idle = intel_idle_s2idle, },
    453	{
    454		.name = "C1E",
    455		.desc = "MWAIT 0x01",
    456		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    457		.exit_latency = 10,
    458		.target_residency = 250,
    459		.enter = &intel_idle,
    460		.enter_s2idle = intel_idle_s2idle, },
    461	{
    462		.name = "C3",
    463		.desc = "MWAIT 0x10",
    464		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    465		.exit_latency = 59,
    466		.target_residency = 300,
    467		.enter = &intel_idle,
    468		.enter_s2idle = intel_idle_s2idle, },
    469	{
    470		.name = "C6",
    471		.desc = "MWAIT 0x20",
    472		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    473		.exit_latency = 84,
    474		.target_residency = 400,
    475		.enter = &intel_idle,
    476		.enter_s2idle = intel_idle_s2idle, },
    477	{
    478		.enter = NULL }
    479};
    480
    481static struct cpuidle_state ivt_cstates_8s[] __initdata = {
    482	{
    483		.name = "C1",
    484		.desc = "MWAIT 0x00",
    485		.flags = MWAIT2flg(0x00),
    486		.exit_latency = 1,
    487		.target_residency = 1,
    488		.enter = &intel_idle,
    489		.enter_s2idle = intel_idle_s2idle, },
    490	{
    491		.name = "C1E",
    492		.desc = "MWAIT 0x01",
    493		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    494		.exit_latency = 10,
    495		.target_residency = 500,
    496		.enter = &intel_idle,
    497		.enter_s2idle = intel_idle_s2idle, },
    498	{
    499		.name = "C3",
    500		.desc = "MWAIT 0x10",
    501		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    502		.exit_latency = 59,
    503		.target_residency = 600,
    504		.enter = &intel_idle,
    505		.enter_s2idle = intel_idle_s2idle, },
    506	{
    507		.name = "C6",
    508		.desc = "MWAIT 0x20",
    509		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    510		.exit_latency = 88,
    511		.target_residency = 700,
    512		.enter = &intel_idle,
    513		.enter_s2idle = intel_idle_s2idle, },
    514	{
    515		.enter = NULL }
    516};
    517
    518static struct cpuidle_state hsw_cstates[] __initdata = {
    519	{
    520		.name = "C1",
    521		.desc = "MWAIT 0x00",
    522		.flags = MWAIT2flg(0x00),
    523		.exit_latency = 2,
    524		.target_residency = 2,
    525		.enter = &intel_idle,
    526		.enter_s2idle = intel_idle_s2idle, },
    527	{
    528		.name = "C1E",
    529		.desc = "MWAIT 0x01",
    530		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    531		.exit_latency = 10,
    532		.target_residency = 20,
    533		.enter = &intel_idle,
    534		.enter_s2idle = intel_idle_s2idle, },
    535	{
    536		.name = "C3",
    537		.desc = "MWAIT 0x10",
    538		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    539		.exit_latency = 33,
    540		.target_residency = 100,
    541		.enter = &intel_idle,
    542		.enter_s2idle = intel_idle_s2idle, },
    543	{
    544		.name = "C6",
    545		.desc = "MWAIT 0x20",
    546		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    547		.exit_latency = 133,
    548		.target_residency = 400,
    549		.enter = &intel_idle,
    550		.enter_s2idle = intel_idle_s2idle, },
    551	{
    552		.name = "C7s",
    553		.desc = "MWAIT 0x32",
    554		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
    555		.exit_latency = 166,
    556		.target_residency = 500,
    557		.enter = &intel_idle,
    558		.enter_s2idle = intel_idle_s2idle, },
    559	{
    560		.name = "C8",
    561		.desc = "MWAIT 0x40",
    562		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
    563		.exit_latency = 300,
    564		.target_residency = 900,
    565		.enter = &intel_idle,
    566		.enter_s2idle = intel_idle_s2idle, },
    567	{
    568		.name = "C9",
    569		.desc = "MWAIT 0x50",
    570		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
    571		.exit_latency = 600,
    572		.target_residency = 1800,
    573		.enter = &intel_idle,
    574		.enter_s2idle = intel_idle_s2idle, },
    575	{
    576		.name = "C10",
    577		.desc = "MWAIT 0x60",
    578		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
    579		.exit_latency = 2600,
    580		.target_residency = 7700,
    581		.enter = &intel_idle,
    582		.enter_s2idle = intel_idle_s2idle, },
    583	{
    584		.enter = NULL }
    585};
    586static struct cpuidle_state bdw_cstates[] __initdata = {
    587	{
    588		.name = "C1",
    589		.desc = "MWAIT 0x00",
    590		.flags = MWAIT2flg(0x00),
    591		.exit_latency = 2,
    592		.target_residency = 2,
    593		.enter = &intel_idle,
    594		.enter_s2idle = intel_idle_s2idle, },
    595	{
    596		.name = "C1E",
    597		.desc = "MWAIT 0x01",
    598		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    599		.exit_latency = 10,
    600		.target_residency = 20,
    601		.enter = &intel_idle,
    602		.enter_s2idle = intel_idle_s2idle, },
    603	{
    604		.name = "C3",
    605		.desc = "MWAIT 0x10",
    606		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    607		.exit_latency = 40,
    608		.target_residency = 100,
    609		.enter = &intel_idle,
    610		.enter_s2idle = intel_idle_s2idle, },
    611	{
    612		.name = "C6",
    613		.desc = "MWAIT 0x20",
    614		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    615		.exit_latency = 133,
    616		.target_residency = 400,
    617		.enter = &intel_idle,
    618		.enter_s2idle = intel_idle_s2idle, },
    619	{
    620		.name = "C7s",
    621		.desc = "MWAIT 0x32",
    622		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
    623		.exit_latency = 166,
    624		.target_residency = 500,
    625		.enter = &intel_idle,
    626		.enter_s2idle = intel_idle_s2idle, },
    627	{
    628		.name = "C8",
    629		.desc = "MWAIT 0x40",
    630		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
    631		.exit_latency = 300,
    632		.target_residency = 900,
    633		.enter = &intel_idle,
    634		.enter_s2idle = intel_idle_s2idle, },
    635	{
    636		.name = "C9",
    637		.desc = "MWAIT 0x50",
    638		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
    639		.exit_latency = 600,
    640		.target_residency = 1800,
    641		.enter = &intel_idle,
    642		.enter_s2idle = intel_idle_s2idle, },
    643	{
    644		.name = "C10",
    645		.desc = "MWAIT 0x60",
    646		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
    647		.exit_latency = 2600,
    648		.target_residency = 7700,
    649		.enter = &intel_idle,
    650		.enter_s2idle = intel_idle_s2idle, },
    651	{
    652		.enter = NULL }
    653};
    654
    655static struct cpuidle_state skl_cstates[] __initdata = {
    656	{
    657		.name = "C1",
    658		.desc = "MWAIT 0x00",
    659		.flags = MWAIT2flg(0x00),
    660		.exit_latency = 2,
    661		.target_residency = 2,
    662		.enter = &intel_idle,
    663		.enter_s2idle = intel_idle_s2idle, },
    664	{
    665		.name = "C1E",
    666		.desc = "MWAIT 0x01",
    667		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    668		.exit_latency = 10,
    669		.target_residency = 20,
    670		.enter = &intel_idle,
    671		.enter_s2idle = intel_idle_s2idle, },
    672	{
    673		.name = "C3",
    674		.desc = "MWAIT 0x10",
    675		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
    676		.exit_latency = 70,
    677		.target_residency = 100,
    678		.enter = &intel_idle,
    679		.enter_s2idle = intel_idle_s2idle, },
    680	{
    681		.name = "C6",
    682		.desc = "MWAIT 0x20",
    683		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    684		.exit_latency = 85,
    685		.target_residency = 200,
    686		.enter = &intel_idle,
    687		.enter_s2idle = intel_idle_s2idle, },
    688	{
    689		.name = "C7s",
    690		.desc = "MWAIT 0x33",
    691		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
    692		.exit_latency = 124,
    693		.target_residency = 800,
    694		.enter = &intel_idle,
    695		.enter_s2idle = intel_idle_s2idle, },
    696	{
    697		.name = "C8",
    698		.desc = "MWAIT 0x40",
    699		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
    700		.exit_latency = 200,
    701		.target_residency = 800,
    702		.enter = &intel_idle,
    703		.enter_s2idle = intel_idle_s2idle, },
    704	{
    705		.name = "C9",
    706		.desc = "MWAIT 0x50",
    707		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
    708		.exit_latency = 480,
    709		.target_residency = 5000,
    710		.enter = &intel_idle,
    711		.enter_s2idle = intel_idle_s2idle, },
    712	{
    713		.name = "C10",
    714		.desc = "MWAIT 0x60",
    715		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
    716		.exit_latency = 890,
    717		.target_residency = 5000,
    718		.enter = &intel_idle,
    719		.enter_s2idle = intel_idle_s2idle, },
    720	{
    721		.enter = NULL }
    722};
    723
    724static struct cpuidle_state skx_cstates[] __initdata = {
    725	{
    726		.name = "C1",
    727		.desc = "MWAIT 0x00",
    728		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
    729		.exit_latency = 2,
    730		.target_residency = 2,
    731		.enter = &intel_idle,
    732		.enter_s2idle = intel_idle_s2idle, },
    733	{
    734		.name = "C1E",
    735		.desc = "MWAIT 0x01",
    736		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    737		.exit_latency = 10,
    738		.target_residency = 20,
    739		.enter = &intel_idle,
    740		.enter_s2idle = intel_idle_s2idle, },
    741	{
    742		.name = "C6",
    743		.desc = "MWAIT 0x20",
    744		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    745		.exit_latency = 133,
    746		.target_residency = 600,
    747		.enter = &intel_idle,
    748		.enter_s2idle = intel_idle_s2idle, },
    749	{
    750		.enter = NULL }
    751};
    752
    753static struct cpuidle_state icx_cstates[] __initdata = {
    754	{
    755		.name = "C1",
    756		.desc = "MWAIT 0x00",
    757		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
    758		.exit_latency = 1,
    759		.target_residency = 1,
    760		.enter = &intel_idle,
    761		.enter_s2idle = intel_idle_s2idle, },
    762	{
    763		.name = "C1E",
    764		.desc = "MWAIT 0x01",
    765		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    766		.exit_latency = 4,
    767		.target_residency = 4,
    768		.enter = &intel_idle,
    769		.enter_s2idle = intel_idle_s2idle, },
    770	{
    771		.name = "C6",
    772		.desc = "MWAIT 0x20",
    773		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    774		.exit_latency = 170,
    775		.target_residency = 600,
    776		.enter = &intel_idle,
    777		.enter_s2idle = intel_idle_s2idle, },
    778	{
    779		.enter = NULL }
    780};
    781
    782/*
    783 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
    784 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
    785 * But in this case there is effectively no C1, because C1 requests are
    786 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
    787 * and C1E requests end up with C1, so there is effectively no C1E.
    788 *
    789 * By default we enable C1E and disable C1 by marking it with
    790 * 'CPUIDLE_FLAG_UNUSABLE'.
    791 */
    792static struct cpuidle_state adl_cstates[] __initdata = {
    793	{
    794		.name = "C1",
    795		.desc = "MWAIT 0x00",
    796		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
    797		.exit_latency = 1,
    798		.target_residency = 1,
    799		.enter = &intel_idle,
    800		.enter_s2idle = intel_idle_s2idle, },
    801	{
    802		.name = "C1E",
    803		.desc = "MWAIT 0x01",
    804		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    805		.exit_latency = 2,
    806		.target_residency = 4,
    807		.enter = &intel_idle,
    808		.enter_s2idle = intel_idle_s2idle, },
    809	{
    810		.name = "C6",
    811		.desc = "MWAIT 0x20",
    812		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    813		.exit_latency = 220,
    814		.target_residency = 600,
    815		.enter = &intel_idle,
    816		.enter_s2idle = intel_idle_s2idle, },
    817	{
    818		.name = "C8",
    819		.desc = "MWAIT 0x40",
    820		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
    821		.exit_latency = 280,
    822		.target_residency = 800,
    823		.enter = &intel_idle,
    824		.enter_s2idle = intel_idle_s2idle, },
    825	{
    826		.name = "C10",
    827		.desc = "MWAIT 0x60",
    828		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
    829		.exit_latency = 680,
    830		.target_residency = 2000,
    831		.enter = &intel_idle,
    832		.enter_s2idle = intel_idle_s2idle, },
    833	{
    834		.enter = NULL }
    835};
    836
    837static struct cpuidle_state adl_l_cstates[] __initdata = {
    838	{
    839		.name = "C1",
    840		.desc = "MWAIT 0x00",
    841		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
    842		.exit_latency = 1,
    843		.target_residency = 1,
    844		.enter = &intel_idle,
    845		.enter_s2idle = intel_idle_s2idle, },
    846	{
    847		.name = "C1E",
    848		.desc = "MWAIT 0x01",
    849		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
    850		.exit_latency = 2,
    851		.target_residency = 4,
    852		.enter = &intel_idle,
    853		.enter_s2idle = intel_idle_s2idle, },
    854	{
    855		.name = "C6",
    856		.desc = "MWAIT 0x20",
    857		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    858		.exit_latency = 170,
    859		.target_residency = 500,
    860		.enter = &intel_idle,
    861		.enter_s2idle = intel_idle_s2idle, },
    862	{
    863		.name = "C8",
    864		.desc = "MWAIT 0x40",
    865		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
    866		.exit_latency = 200,
    867		.target_residency = 600,
    868		.enter = &intel_idle,
    869		.enter_s2idle = intel_idle_s2idle, },
    870	{
    871		.name = "C10",
    872		.desc = "MWAIT 0x60",
    873		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
    874		.exit_latency = 230,
    875		.target_residency = 700,
    876		.enter = &intel_idle,
    877		.enter_s2idle = intel_idle_s2idle, },
    878	{
    879		.enter = NULL }
    880};
    881
    882/*
    883 * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
    884 * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
    885 * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
    886 * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
    887 * both C1 and C1E requests end up with C1, so there is effectively no C1E.
    888 *
    889 * By default we enable C1 and disable C1E by marking it with
    890 * 'CPUIDLE_FLAG_UNUSABLE'.
    891 */
    892static struct cpuidle_state spr_cstates[] __initdata = {
    893	{
    894		.name = "C1",
    895		.desc = "MWAIT 0x00",
    896		.flags = MWAIT2flg(0x00),
    897		.exit_latency = 1,
    898		.target_residency = 1,
    899		.enter = &intel_idle,
    900		.enter_s2idle = intel_idle_s2idle, },
    901	{
    902		.name = "C1E",
    903		.desc = "MWAIT 0x01",
    904		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
    905					   CPUIDLE_FLAG_UNUSABLE,
    906		.exit_latency = 2,
    907		.target_residency = 4,
    908		.enter = &intel_idle,
    909		.enter_s2idle = intel_idle_s2idle, },
    910	{
    911		.name = "C6",
    912		.desc = "MWAIT 0x20",
    913		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
    914		.exit_latency = 290,
    915		.target_residency = 800,
    916		.enter = &intel_idle,
    917		.enter_s2idle = intel_idle_s2idle, },
    918	{
    919		.enter = NULL }
    920};
    921
    922static struct cpuidle_state atom_cstates[] __initdata = {
    923	{
    924		.name = "C1E",
    925		.desc = "MWAIT 0x00",
    926		.flags = MWAIT2flg(0x00),
    927		.exit_latency = 10,
    928		.target_residency = 20,
    929		.enter = &intel_idle,
    930		.enter_s2idle = intel_idle_s2idle, },
    931	{
    932		.name = "C2",
    933		.desc = "MWAIT 0x10",
    934		.flags = MWAIT2flg(0x10),
    935		.exit_latency = 20,
    936		.target_residency = 80,
    937		.enter = &intel_idle,
    938		.enter_s2idle = intel_idle_s2idle, },
    939	{
    940		.name = "C4",
    941		.desc = "MWAIT 0x30",
    942		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
    943		.exit_latency = 100,
    944		.target_residency = 400,
    945		.enter = &intel_idle,
    946		.enter_s2idle = intel_idle_s2idle, },
    947	{
    948		.name = "C6",
    949		.desc = "MWAIT 0x52",
    950		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
    951		.exit_latency = 140,
    952		.target_residency = 560,
    953		.enter = &intel_idle,
    954		.enter_s2idle = intel_idle_s2idle, },
    955	{
    956		.enter = NULL }
    957};
    958static struct cpuidle_state tangier_cstates[] __initdata = {
    959	{
    960		.name = "C1",
    961		.desc = "MWAIT 0x00",
    962		.flags = MWAIT2flg(0x00),
    963		.exit_latency = 1,
    964		.target_residency = 4,
    965		.enter = &intel_idle,
    966		.enter_s2idle = intel_idle_s2idle, },
    967	{
    968		.name = "C4",
    969		.desc = "MWAIT 0x30",
    970		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
    971		.exit_latency = 100,
    972		.target_residency = 400,
    973		.enter = &intel_idle,
    974		.enter_s2idle = intel_idle_s2idle, },
    975	{
    976		.name = "C6",
    977		.desc = "MWAIT 0x52",
    978		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
    979		.exit_latency = 140,
    980		.target_residency = 560,
    981		.enter = &intel_idle,
    982		.enter_s2idle = intel_idle_s2idle, },
    983	{
    984		.name = "C7",
    985		.desc = "MWAIT 0x60",
    986		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
    987		.exit_latency = 1200,
    988		.target_residency = 4000,
    989		.enter = &intel_idle,
    990		.enter_s2idle = intel_idle_s2idle, },
    991	{
    992		.name = "C9",
    993		.desc = "MWAIT 0x64",
    994		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
    995		.exit_latency = 10000,
    996		.target_residency = 20000,
    997		.enter = &intel_idle,
    998		.enter_s2idle = intel_idle_s2idle, },
    999	{
   1000		.enter = NULL }
   1001};
   1002static struct cpuidle_state avn_cstates[] __initdata = {
   1003	{
   1004		.name = "C1",
   1005		.desc = "MWAIT 0x00",
   1006		.flags = MWAIT2flg(0x00),
   1007		.exit_latency = 2,
   1008		.target_residency = 2,
   1009		.enter = &intel_idle,
   1010		.enter_s2idle = intel_idle_s2idle, },
   1011	{
   1012		.name = "C6",
   1013		.desc = "MWAIT 0x51",
   1014		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
   1015		.exit_latency = 15,
   1016		.target_residency = 45,
   1017		.enter = &intel_idle,
   1018		.enter_s2idle = intel_idle_s2idle, },
   1019	{
   1020		.enter = NULL }
   1021};
   1022static struct cpuidle_state knl_cstates[] __initdata = {
   1023	{
   1024		.name = "C1",
   1025		.desc = "MWAIT 0x00",
   1026		.flags = MWAIT2flg(0x00),
   1027		.exit_latency = 1,
   1028		.target_residency = 2,
   1029		.enter = &intel_idle,
   1030		.enter_s2idle = intel_idle_s2idle },
   1031	{
   1032		.name = "C6",
   1033		.desc = "MWAIT 0x10",
   1034		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
   1035		.exit_latency = 120,
   1036		.target_residency = 500,
   1037		.enter = &intel_idle,
   1038		.enter_s2idle = intel_idle_s2idle },
   1039	{
   1040		.enter = NULL }
   1041};
   1042
   1043static struct cpuidle_state bxt_cstates[] __initdata = {
   1044	{
   1045		.name = "C1",
   1046		.desc = "MWAIT 0x00",
   1047		.flags = MWAIT2flg(0x00),
   1048		.exit_latency = 2,
   1049		.target_residency = 2,
   1050		.enter = &intel_idle,
   1051		.enter_s2idle = intel_idle_s2idle, },
   1052	{
   1053		.name = "C1E",
   1054		.desc = "MWAIT 0x01",
   1055		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
   1056		.exit_latency = 10,
   1057		.target_residency = 20,
   1058		.enter = &intel_idle,
   1059		.enter_s2idle = intel_idle_s2idle, },
   1060	{
   1061		.name = "C6",
   1062		.desc = "MWAIT 0x20",
   1063		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
   1064		.exit_latency = 133,
   1065		.target_residency = 133,
   1066		.enter = &intel_idle,
   1067		.enter_s2idle = intel_idle_s2idle, },
   1068	{
   1069		.name = "C7s",
   1070		.desc = "MWAIT 0x31",
   1071		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
   1072		.exit_latency = 155,
   1073		.target_residency = 155,
   1074		.enter = &intel_idle,
   1075		.enter_s2idle = intel_idle_s2idle, },
   1076	{
   1077		.name = "C8",
   1078		.desc = "MWAIT 0x40",
   1079		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
   1080		.exit_latency = 1000,
   1081		.target_residency = 1000,
   1082		.enter = &intel_idle,
   1083		.enter_s2idle = intel_idle_s2idle, },
   1084	{
   1085		.name = "C9",
   1086		.desc = "MWAIT 0x50",
   1087		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
   1088		.exit_latency = 2000,
   1089		.target_residency = 2000,
   1090		.enter = &intel_idle,
   1091		.enter_s2idle = intel_idle_s2idle, },
   1092	{
   1093		.name = "C10",
   1094		.desc = "MWAIT 0x60",
   1095		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
   1096		.exit_latency = 10000,
   1097		.target_residency = 10000,
   1098		.enter = &intel_idle,
   1099		.enter_s2idle = intel_idle_s2idle, },
   1100	{
   1101		.enter = NULL }
   1102};
   1103
   1104static struct cpuidle_state dnv_cstates[] __initdata = {
   1105	{
   1106		.name = "C1",
   1107		.desc = "MWAIT 0x00",
   1108		.flags = MWAIT2flg(0x00),
   1109		.exit_latency = 2,
   1110		.target_residency = 2,
   1111		.enter = &intel_idle,
   1112		.enter_s2idle = intel_idle_s2idle, },
   1113	{
   1114		.name = "C1E",
   1115		.desc = "MWAIT 0x01",
   1116		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
   1117		.exit_latency = 10,
   1118		.target_residency = 20,
   1119		.enter = &intel_idle,
   1120		.enter_s2idle = intel_idle_s2idle, },
   1121	{
   1122		.name = "C6",
   1123		.desc = "MWAIT 0x20",
   1124		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
   1125		.exit_latency = 50,
   1126		.target_residency = 500,
   1127		.enter = &intel_idle,
   1128		.enter_s2idle = intel_idle_s2idle, },
   1129	{
   1130		.enter = NULL }
   1131};
   1132
   1133/*
   1134 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
   1135 * C6, and this is indicated in the CPUID mwait leaf.
   1136 */
   1137static struct cpuidle_state snr_cstates[] __initdata = {
   1138	{
   1139		.name = "C1",
   1140		.desc = "MWAIT 0x00",
   1141		.flags = MWAIT2flg(0x00),
   1142		.exit_latency = 2,
   1143		.target_residency = 2,
   1144		.enter = &intel_idle,
   1145		.enter_s2idle = intel_idle_s2idle, },
   1146	{
   1147		.name = "C1E",
   1148		.desc = "MWAIT 0x01",
   1149		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
   1150		.exit_latency = 15,
   1151		.target_residency = 25,
   1152		.enter = &intel_idle,
   1153		.enter_s2idle = intel_idle_s2idle, },
   1154	{
   1155		.name = "C6",
   1156		.desc = "MWAIT 0x20",
   1157		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
   1158		.exit_latency = 130,
   1159		.target_residency = 500,
   1160		.enter = &intel_idle,
   1161		.enter_s2idle = intel_idle_s2idle, },
   1162	{
   1163		.enter = NULL }
   1164};
   1165
   1166static const struct idle_cpu idle_cpu_nehalem __initconst = {
   1167	.state_table = nehalem_cstates,
   1168	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
   1169	.disable_promotion_to_c1e = true,
   1170};
   1171
   1172static const struct idle_cpu idle_cpu_nhx __initconst = {
   1173	.state_table = nehalem_cstates,
   1174	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
   1175	.disable_promotion_to_c1e = true,
   1176	.use_acpi = true,
   1177};
   1178
   1179static const struct idle_cpu idle_cpu_atom __initconst = {
   1180	.state_table = atom_cstates,
   1181};
   1182
   1183static const struct idle_cpu idle_cpu_tangier __initconst = {
   1184	.state_table = tangier_cstates,
   1185};
   1186
   1187static const struct idle_cpu idle_cpu_lincroft __initconst = {
   1188	.state_table = atom_cstates,
   1189	.auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
   1190};
   1191
   1192static const struct idle_cpu idle_cpu_snb __initconst = {
   1193	.state_table = snb_cstates,
   1194	.disable_promotion_to_c1e = true,
   1195};
   1196
   1197static const struct idle_cpu idle_cpu_snx __initconst = {
   1198	.state_table = snb_cstates,
   1199	.disable_promotion_to_c1e = true,
   1200	.use_acpi = true,
   1201};
   1202
   1203static const struct idle_cpu idle_cpu_byt __initconst = {
   1204	.state_table = byt_cstates,
   1205	.disable_promotion_to_c1e = true,
   1206	.byt_auto_demotion_disable_flag = true,
   1207};
   1208
   1209static const struct idle_cpu idle_cpu_cht __initconst = {
   1210	.state_table = cht_cstates,
   1211	.disable_promotion_to_c1e = true,
   1212	.byt_auto_demotion_disable_flag = true,
   1213};
   1214
   1215static const struct idle_cpu idle_cpu_ivb __initconst = {
   1216	.state_table = ivb_cstates,
   1217	.disable_promotion_to_c1e = true,
   1218};
   1219
   1220static const struct idle_cpu idle_cpu_ivt __initconst = {
   1221	.state_table = ivt_cstates,
   1222	.disable_promotion_to_c1e = true,
   1223	.use_acpi = true,
   1224};
   1225
   1226static const struct idle_cpu idle_cpu_hsw __initconst = {
   1227	.state_table = hsw_cstates,
   1228	.disable_promotion_to_c1e = true,
   1229};
   1230
   1231static const struct idle_cpu idle_cpu_hsx __initconst = {
   1232	.state_table = hsw_cstates,
   1233	.disable_promotion_to_c1e = true,
   1234	.use_acpi = true,
   1235};
   1236
   1237static const struct idle_cpu idle_cpu_bdw __initconst = {
   1238	.state_table = bdw_cstates,
   1239	.disable_promotion_to_c1e = true,
   1240};
   1241
   1242static const struct idle_cpu idle_cpu_bdx __initconst = {
   1243	.state_table = bdw_cstates,
   1244	.disable_promotion_to_c1e = true,
   1245	.use_acpi = true,
   1246};
   1247
   1248static const struct idle_cpu idle_cpu_skl __initconst = {
   1249	.state_table = skl_cstates,
   1250	.disable_promotion_to_c1e = true,
   1251};
   1252
   1253static const struct idle_cpu idle_cpu_skx __initconst = {
   1254	.state_table = skx_cstates,
   1255	.disable_promotion_to_c1e = true,
   1256	.use_acpi = true,
   1257};
   1258
   1259static const struct idle_cpu idle_cpu_icx __initconst = {
   1260	.state_table = icx_cstates,
   1261	.disable_promotion_to_c1e = true,
   1262	.use_acpi = true,
   1263};
   1264
   1265static const struct idle_cpu idle_cpu_adl __initconst = {
   1266	.state_table = adl_cstates,
   1267};
   1268
   1269static const struct idle_cpu idle_cpu_adl_l __initconst = {
   1270	.state_table = adl_l_cstates,
   1271};
   1272
   1273static const struct idle_cpu idle_cpu_spr __initconst = {
   1274	.state_table = spr_cstates,
   1275	.disable_promotion_to_c1e = true,
   1276	.use_acpi = true,
   1277};
   1278
   1279static const struct idle_cpu idle_cpu_avn __initconst = {
   1280	.state_table = avn_cstates,
   1281	.disable_promotion_to_c1e = true,
   1282	.use_acpi = true,
   1283};
   1284
   1285static const struct idle_cpu idle_cpu_knl __initconst = {
   1286	.state_table = knl_cstates,
   1287	.use_acpi = true,
   1288};
   1289
   1290static const struct idle_cpu idle_cpu_bxt __initconst = {
   1291	.state_table = bxt_cstates,
   1292	.disable_promotion_to_c1e = true,
   1293};
   1294
   1295static const struct idle_cpu idle_cpu_dnv __initconst = {
   1296	.state_table = dnv_cstates,
   1297	.disable_promotion_to_c1e = true,
   1298	.use_acpi = true,
   1299};
   1300
   1301static const struct idle_cpu idle_cpu_snr __initconst = {
   1302	.state_table = snr_cstates,
   1303	.disable_promotion_to_c1e = true,
   1304	.use_acpi = true,
   1305};
   1306
   1307static const struct x86_cpu_id intel_idle_ids[] __initconst = {
   1308	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&idle_cpu_nhx),
   1309	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&idle_cpu_nehalem),
   1310	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,		&idle_cpu_nehalem),
   1311	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,		&idle_cpu_nehalem),
   1312	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,		&idle_cpu_nhx),
   1313	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		&idle_cpu_nhx),
   1314	X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,	&idle_cpu_atom),
   1315	X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,	&idle_cpu_lincroft),
   1316	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,		&idle_cpu_nhx),
   1317	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&idle_cpu_snb),
   1318	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&idle_cpu_snx),
   1319	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,	&idle_cpu_atom),
   1320	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,	&idle_cpu_byt),
   1321	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID,	&idle_cpu_tangier),
   1322	X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,	&idle_cpu_cht),
   1323	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&idle_cpu_ivb),
   1324	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&idle_cpu_ivt),
   1325	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&idle_cpu_hsw),
   1326	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&idle_cpu_hsx),
   1327	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&idle_cpu_hsw),
   1328	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&idle_cpu_hsw),
   1329	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,	&idle_cpu_avn),
   1330	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&idle_cpu_bdw),
   1331	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&idle_cpu_bdw),
   1332	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&idle_cpu_bdx),
   1333	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&idle_cpu_bdx),
   1334	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&idle_cpu_skl),
   1335	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&idle_cpu_skl),
   1336	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&idle_cpu_skl),
   1337	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&idle_cpu_skl),
   1338	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&idle_cpu_skx),
   1339	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&idle_cpu_icx),
   1340	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&idle_cpu_icx),
   1341	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&idle_cpu_adl),
   1342	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&idle_cpu_adl_l),
   1343	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&idle_cpu_spr),
   1344	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&idle_cpu_knl),
   1345	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&idle_cpu_knl),
   1346	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&idle_cpu_bxt),
   1347	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	&idle_cpu_bxt),
   1348	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,	&idle_cpu_dnv),
   1349	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&idle_cpu_snr),
   1350	{}
   1351};
   1352
   1353static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
   1354	X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
   1355	{}
   1356};
   1357
   1358static bool __init intel_idle_max_cstate_reached(int cstate)
   1359{
   1360	if (cstate + 1 > max_cstate) {
   1361		pr_info("max_cstate %d reached\n", max_cstate);
   1362		return true;
   1363	}
   1364	return false;
   1365}
   1366
   1367static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
   1368{
   1369	unsigned long eax = flg2MWAIT(state->flags);
   1370
   1371	if (boot_cpu_has(X86_FEATURE_ARAT))
   1372		return false;
   1373
   1374	/*
   1375	 * Switch over to one-shot tick broadcast if the target C-state
   1376	 * is deeper than C1.
   1377	 */
   1378	return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
   1379}
   1380
   1381#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
   1382#include <acpi/processor.h>
   1383
   1384static bool no_acpi __read_mostly;
   1385module_param(no_acpi, bool, 0444);
   1386MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
   1387
   1388static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
   1389module_param_named(use_acpi, force_use_acpi, bool, 0444);
   1390MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
   1391
   1392static struct acpi_processor_power acpi_state_table __initdata;
   1393
   1394/**
   1395 * intel_idle_cst_usable - Check if the _CST information can be used.
   1396 *
   1397 * Check if all of the C-states listed by _CST in the max_cstate range are
   1398 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
   1399 */
   1400static bool __init intel_idle_cst_usable(void)
   1401{
   1402	int cstate, limit;
   1403
   1404	limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
   1405		      acpi_state_table.count);
   1406
   1407	for (cstate = 1; cstate < limit; cstate++) {
   1408		struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
   1409
   1410		if (cx->entry_method != ACPI_CSTATE_FFH)
   1411			return false;
   1412	}
   1413
   1414	return true;
   1415}
   1416
   1417static bool __init intel_idle_acpi_cst_extract(void)
   1418{
   1419	unsigned int cpu;
   1420
   1421	if (no_acpi) {
   1422		pr_debug("Not allowed to use ACPI _CST\n");
   1423		return false;
   1424	}
   1425
   1426	for_each_possible_cpu(cpu) {
   1427		struct acpi_processor *pr = per_cpu(processors, cpu);
   1428
   1429		if (!pr)
   1430			continue;
   1431
   1432		if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
   1433			continue;
   1434
   1435		acpi_state_table.count++;
   1436
   1437		if (!intel_idle_cst_usable())
   1438			continue;
   1439
   1440		if (!acpi_processor_claim_cst_control())
   1441			break;
   1442
   1443		return true;
   1444	}
   1445
   1446	acpi_state_table.count = 0;
   1447	pr_debug("ACPI _CST not found or not usable\n");
   1448	return false;
   1449}
   1450
   1451static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
   1452{
   1453	int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
   1454
   1455	/*
   1456	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
   1457	 * the interesting states are ACPI_CSTATE_FFH.
   1458	 */
   1459	for (cstate = 1; cstate < limit; cstate++) {
   1460		struct acpi_processor_cx *cx;
   1461		struct cpuidle_state *state;
   1462
   1463		if (intel_idle_max_cstate_reached(cstate - 1))
   1464			break;
   1465
   1466		cx = &acpi_state_table.states[cstate];
   1467
   1468		state = &drv->states[drv->state_count++];
   1469
   1470		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
   1471		strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
   1472		state->exit_latency = cx->latency;
   1473		/*
   1474		 * For C1-type C-states use the same number for both the exit
   1475		 * latency and target residency, because that is the case for
   1476		 * C1 in the majority of the static C-states tables above.
   1477		 * For the other types of C-states, however, set the target
   1478		 * residency to 3 times the exit latency which should lead to
   1479		 * a reasonable balance between energy-efficiency and
   1480		 * performance in the majority of interesting cases.
   1481		 */
   1482		state->target_residency = cx->latency;
   1483		if (cx->type > ACPI_STATE_C1)
   1484			state->target_residency *= 3;
   1485
   1486		state->flags = MWAIT2flg(cx->address);
   1487		if (cx->type > ACPI_STATE_C2)
   1488			state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
   1489
   1490		if (disabled_states_mask & BIT(cstate))
   1491			state->flags |= CPUIDLE_FLAG_OFF;
   1492
   1493		if (intel_idle_state_needs_timer_stop(state))
   1494			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
   1495
   1496		state->enter = intel_idle;
   1497		state->enter_s2idle = intel_idle_s2idle;
   1498	}
   1499}
   1500
   1501static bool __init intel_idle_off_by_default(u32 mwait_hint)
   1502{
   1503	int cstate, limit;
   1504
   1505	/*
   1506	 * If there are no _CST C-states, do not disable any C-states by
   1507	 * default.
   1508	 */
   1509	if (!acpi_state_table.count)
   1510		return false;
   1511
   1512	limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
   1513	/*
   1514	 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
   1515	 * the interesting states are ACPI_CSTATE_FFH.
   1516	 */
   1517	for (cstate = 1; cstate < limit; cstate++) {
   1518		if (acpi_state_table.states[cstate].address == mwait_hint)
   1519			return false;
   1520	}
   1521	return true;
   1522}
   1523#else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
   1524#define force_use_acpi	(false)
   1525
   1526static inline bool intel_idle_acpi_cst_extract(void) { return false; }
   1527static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
   1528static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
   1529#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
   1530
   1531/**
   1532 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
   1533 *
   1534 * Tune IVT multi-socket targets.
   1535 * Assumption: num_sockets == (max_package_num + 1).
   1536 */
   1537static void __init ivt_idle_state_table_update(void)
   1538{
   1539	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
   1540	int cpu, package_num, num_sockets = 1;
   1541
   1542	for_each_online_cpu(cpu) {
   1543		package_num = topology_physical_package_id(cpu);
   1544		if (package_num + 1 > num_sockets) {
   1545			num_sockets = package_num + 1;
   1546
   1547			if (num_sockets > 4) {
   1548				cpuidle_state_table = ivt_cstates_8s;
   1549				return;
   1550			}
   1551		}
   1552	}
   1553
   1554	if (num_sockets > 2)
   1555		cpuidle_state_table = ivt_cstates_4s;
   1556
   1557	/* else, 1 and 2 socket systems use default ivt_cstates */
   1558}
   1559
   1560/**
   1561 * irtl_2_usec - IRTL to microseconds conversion.
   1562 * @irtl: IRTL MSR value.
   1563 *
   1564 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
   1565 */
   1566static unsigned long long __init irtl_2_usec(unsigned long long irtl)
   1567{
   1568	static const unsigned int irtl_ns_units[] __initconst = {
   1569		1, 32, 1024, 32768, 1048576, 33554432, 0, 0
   1570	};
   1571	unsigned long long ns;
   1572
   1573	if (!irtl)
   1574		return 0;
   1575
   1576	ns = irtl_ns_units[(irtl >> 10) & 0x7];
   1577
   1578	return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
   1579}
   1580
   1581/**
   1582 * bxt_idle_state_table_update - Fix up the Broxton idle states table.
   1583 *
   1584 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
   1585 * definitive maximum latency and use the same value for target_residency.
   1586 */
   1587static void __init bxt_idle_state_table_update(void)
   1588{
   1589	unsigned long long msr;
   1590	unsigned int usec;
   1591
   1592	rdmsrl(MSR_PKGC6_IRTL, msr);
   1593	usec = irtl_2_usec(msr);
   1594	if (usec) {
   1595		bxt_cstates[2].exit_latency = usec;
   1596		bxt_cstates[2].target_residency = usec;
   1597	}
   1598
   1599	rdmsrl(MSR_PKGC7_IRTL, msr);
   1600	usec = irtl_2_usec(msr);
   1601	if (usec) {
   1602		bxt_cstates[3].exit_latency = usec;
   1603		bxt_cstates[3].target_residency = usec;
   1604	}
   1605
   1606	rdmsrl(MSR_PKGC8_IRTL, msr);
   1607	usec = irtl_2_usec(msr);
   1608	if (usec) {
   1609		bxt_cstates[4].exit_latency = usec;
   1610		bxt_cstates[4].target_residency = usec;
   1611	}
   1612
   1613	rdmsrl(MSR_PKGC9_IRTL, msr);
   1614	usec = irtl_2_usec(msr);
   1615	if (usec) {
   1616		bxt_cstates[5].exit_latency = usec;
   1617		bxt_cstates[5].target_residency = usec;
   1618	}
   1619
   1620	rdmsrl(MSR_PKGC10_IRTL, msr);
   1621	usec = irtl_2_usec(msr);
   1622	if (usec) {
   1623		bxt_cstates[6].exit_latency = usec;
   1624		bxt_cstates[6].target_residency = usec;
   1625	}
   1626
   1627}
   1628
   1629/**
   1630 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
   1631 *
   1632 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
   1633 */
   1634static void __init sklh_idle_state_table_update(void)
   1635{
   1636	unsigned long long msr;
   1637	unsigned int eax, ebx, ecx, edx;
   1638
   1639
   1640	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
   1641	if (max_cstate <= 7)
   1642		return;
   1643
   1644	/* if PC10 not present in CPUID.MWAIT.EDX */
   1645	if ((mwait_substates & (0xF << 28)) == 0)
   1646		return;
   1647
   1648	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
   1649
   1650	/* PC10 is not enabled in PKG C-state limit */
   1651	if ((msr & 0xF) != 8)
   1652		return;
   1653
   1654	ecx = 0;
   1655	cpuid(7, &eax, &ebx, &ecx, &edx);
   1656
   1657	/* if SGX is present */
   1658	if (ebx & (1 << 2)) {
   1659
   1660		rdmsrl(MSR_IA32_FEAT_CTL, msr);
   1661
   1662		/* if SGX is enabled */
   1663		if (msr & (1 << 18))
   1664			return;
   1665	}
   1666
   1667	skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C8-SKL */
   1668	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
   1669}
   1670
   1671/**
   1672 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
   1673 * idle states table.
   1674 */
   1675static void __init skx_idle_state_table_update(void)
   1676{
   1677	unsigned long long msr;
   1678
   1679	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
   1680
   1681	/*
   1682	 * 000b: C0/C1 (no package C-state support)
   1683	 * 001b: C2
   1684	 * 010b: C6 (non-retention)
   1685	 * 011b: C6 (retention)
   1686	 * 111b: No Package C state limits.
   1687	 */
   1688	if ((msr & 0x7) < 2) {
   1689		/*
   1690		 * Uses the CC6 + PC0 latency and 3 times of
   1691		 * latency for target_residency if the PC6
   1692		 * is disabled in BIOS. This is consistent
   1693		 * with how intel_idle driver uses _CST
   1694		 * to set the target_residency.
   1695		 */
   1696		skx_cstates[2].exit_latency = 92;
   1697		skx_cstates[2].target_residency = 276;
   1698	}
   1699}
   1700
   1701/**
   1702 * adl_idle_state_table_update - Adjust AlderLake idle states table.
   1703 */
   1704static void __init adl_idle_state_table_update(void)
   1705{
   1706	/* Check if user prefers C1 over C1E. */
   1707	if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
   1708		cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
   1709		cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
   1710
   1711		/* Disable C1E by clearing the "C1E promotion" bit. */
   1712		c1e_promotion = C1E_PROMOTION_DISABLE;
   1713		return;
   1714	}
   1715
   1716	/* Make sure C1E is enabled by default */
   1717	c1e_promotion = C1E_PROMOTION_ENABLE;
   1718}
   1719
   1720/**
   1721 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
   1722 */
   1723static void __init spr_idle_state_table_update(void)
   1724{
   1725	unsigned long long msr;
   1726
   1727	/* Check if user prefers C1E over C1. */
   1728	if ((preferred_states_mask & BIT(2)) &&
   1729	    !(preferred_states_mask & BIT(1))) {
   1730		/* Disable C1 and enable C1E. */
   1731		spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
   1732		spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
   1733
   1734		/* Enable C1E using the "C1E promotion" bit. */
   1735		c1e_promotion = C1E_PROMOTION_ENABLE;
   1736	}
   1737
   1738	/*
   1739	 * By default, the C6 state assumes the worst-case scenario of package
   1740	 * C6. However, if PC6 is disabled, we update the numbers to match
   1741	 * core C6.
   1742	 */
   1743	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
   1744
   1745	/* Limit value 2 and above allow for PC6. */
   1746	if ((msr & 0x7) < 2) {
   1747		spr_cstates[2].exit_latency = 190;
   1748		spr_cstates[2].target_residency = 600;
   1749	}
   1750}
   1751
   1752static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
   1753{
   1754	unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
   1755	unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
   1756					MWAIT_SUBSTATE_MASK;
   1757
   1758	/* Ignore the C-state if there are NO sub-states in CPUID for it. */
   1759	if (num_substates == 0)
   1760		return false;
   1761
   1762	if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
   1763		mark_tsc_unstable("TSC halts in idle states deeper than C2");
   1764
   1765	return true;
   1766}
   1767
   1768static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
   1769{
   1770	int cstate;
   1771
   1772	switch (boot_cpu_data.x86_model) {
   1773	case INTEL_FAM6_IVYBRIDGE_X:
   1774		ivt_idle_state_table_update();
   1775		break;
   1776	case INTEL_FAM6_ATOM_GOLDMONT:
   1777	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
   1778		bxt_idle_state_table_update();
   1779		break;
   1780	case INTEL_FAM6_SKYLAKE:
   1781		sklh_idle_state_table_update();
   1782		break;
   1783	case INTEL_FAM6_SKYLAKE_X:
   1784		skx_idle_state_table_update();
   1785		break;
   1786	case INTEL_FAM6_SAPPHIRERAPIDS_X:
   1787		spr_idle_state_table_update();
   1788		break;
   1789	case INTEL_FAM6_ALDERLAKE:
   1790	case INTEL_FAM6_ALDERLAKE_L:
   1791		adl_idle_state_table_update();
   1792		break;
   1793	}
   1794
   1795	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
   1796		unsigned int mwait_hint;
   1797
   1798		if (intel_idle_max_cstate_reached(cstate))
   1799			break;
   1800
   1801		if (!cpuidle_state_table[cstate].enter &&
   1802		    !cpuidle_state_table[cstate].enter_s2idle)
   1803			break;
   1804
   1805		/* If marked as unusable, skip this state. */
   1806		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
   1807			pr_debug("state %s is disabled\n",
   1808				 cpuidle_state_table[cstate].name);
   1809			continue;
   1810		}
   1811
   1812		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
   1813		if (!intel_idle_verify_cstate(mwait_hint))
   1814			continue;
   1815
   1816		/* Structure copy. */
   1817		drv->states[drv->state_count] = cpuidle_state_table[cstate];
   1818
   1819		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
   1820			drv->states[drv->state_count].enter = intel_idle_irq;
   1821
   1822		if ((disabled_states_mask & BIT(drv->state_count)) ||
   1823		    ((icpu->use_acpi || force_use_acpi) &&
   1824		     intel_idle_off_by_default(mwait_hint) &&
   1825		     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
   1826			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
   1827
   1828		if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
   1829			drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
   1830
   1831		drv->state_count++;
   1832	}
   1833
   1834	if (icpu->byt_auto_demotion_disable_flag) {
   1835		wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
   1836		wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
   1837	}
   1838}
   1839
   1840/**
   1841 * intel_idle_cpuidle_driver_init - Create the list of available idle states.
   1842 * @drv: cpuidle driver structure to initialize.
   1843 */
   1844static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
   1845{
   1846	cpuidle_poll_state_init(drv);
   1847
   1848	if (disabled_states_mask & BIT(0))
   1849		drv->states[0].flags |= CPUIDLE_FLAG_OFF;
   1850
   1851	drv->state_count = 1;
   1852
   1853	if (icpu)
   1854		intel_idle_init_cstates_icpu(drv);
   1855	else
   1856		intel_idle_init_cstates_acpi(drv);
   1857}
   1858
   1859static void auto_demotion_disable(void)
   1860{
   1861	unsigned long long msr_bits;
   1862
   1863	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
   1864	msr_bits &= ~auto_demotion_disable_flags;
   1865	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
   1866}
   1867
   1868static void c1e_promotion_enable(void)
   1869{
   1870	unsigned long long msr_bits;
   1871
   1872	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
   1873	msr_bits |= 0x2;
   1874	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
   1875}
   1876
   1877static void c1e_promotion_disable(void)
   1878{
   1879	unsigned long long msr_bits;
   1880
   1881	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
   1882	msr_bits &= ~0x2;
   1883	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
   1884}
   1885
   1886/**
   1887 * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
   1888 * @cpu: CPU to initialize.
   1889 *
   1890 * Register a cpuidle device object for @cpu and update its MSRs in accordance
   1891 * with the processor model flags.
   1892 */
   1893static int intel_idle_cpu_init(unsigned int cpu)
   1894{
   1895	struct cpuidle_device *dev;
   1896
   1897	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
   1898	dev->cpu = cpu;
   1899
   1900	if (cpuidle_register_device(dev)) {
   1901		pr_debug("cpuidle_register_device %d failed!\n", cpu);
   1902		return -EIO;
   1903	}
   1904
   1905	if (auto_demotion_disable_flags)
   1906		auto_demotion_disable();
   1907
   1908	if (c1e_promotion == C1E_PROMOTION_ENABLE)
   1909		c1e_promotion_enable();
   1910	else if (c1e_promotion == C1E_PROMOTION_DISABLE)
   1911		c1e_promotion_disable();
   1912
   1913	return 0;
   1914}
   1915
   1916static int intel_idle_cpu_online(unsigned int cpu)
   1917{
   1918	struct cpuidle_device *dev;
   1919
   1920	if (!boot_cpu_has(X86_FEATURE_ARAT))
   1921		tick_broadcast_enable();
   1922
   1923	/*
   1924	 * Some systems can hotplug a cpu at runtime after
   1925	 * the kernel has booted, we have to initialize the
   1926	 * driver in this case
   1927	 */
   1928	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
   1929	if (!dev->registered)
   1930		return intel_idle_cpu_init(cpu);
   1931
   1932	return 0;
   1933}
   1934
   1935/**
   1936 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
   1937 */
   1938static void __init intel_idle_cpuidle_devices_uninit(void)
   1939{
   1940	int i;
   1941
   1942	for_each_online_cpu(i)
   1943		cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
   1944}
   1945
   1946static int __init intel_idle_init(void)
   1947{
   1948	const struct x86_cpu_id *id;
   1949	unsigned int eax, ebx, ecx;
   1950	int retval;
   1951
   1952	/* Do not load intel_idle at all for now if idle= is passed */
   1953	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
   1954		return -ENODEV;
   1955
   1956	if (max_cstate == 0) {
   1957		pr_debug("disabled\n");
   1958		return -EPERM;
   1959	}
   1960
   1961	id = x86_match_cpu(intel_idle_ids);
   1962	if (id) {
   1963		if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
   1964			pr_debug("Please enable MWAIT in BIOS SETUP\n");
   1965			return -ENODEV;
   1966		}
   1967	} else {
   1968		id = x86_match_cpu(intel_mwait_ids);
   1969		if (!id)
   1970			return -ENODEV;
   1971	}
   1972
   1973	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
   1974		return -ENODEV;
   1975
   1976	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
   1977
   1978	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
   1979	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
   1980	    !mwait_substates)
   1981			return -ENODEV;
   1982
   1983	pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
   1984
   1985	icpu = (const struct idle_cpu *)id->driver_data;
   1986	if (icpu) {
   1987		cpuidle_state_table = icpu->state_table;
   1988		auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
   1989		if (icpu->disable_promotion_to_c1e)
   1990			c1e_promotion = C1E_PROMOTION_DISABLE;
   1991		if (icpu->use_acpi || force_use_acpi)
   1992			intel_idle_acpi_cst_extract();
   1993	} else if (!intel_idle_acpi_cst_extract()) {
   1994		return -ENODEV;
   1995	}
   1996
   1997	pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
   1998		 boot_cpu_data.x86_model);
   1999
   2000	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
   2001	if (!intel_idle_cpuidle_devices)
   2002		return -ENOMEM;
   2003
   2004	intel_idle_cpuidle_driver_init(&intel_idle_driver);
   2005
   2006	retval = cpuidle_register_driver(&intel_idle_driver);
   2007	if (retval) {
   2008		struct cpuidle_driver *drv = cpuidle_get_driver();
   2009		printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
   2010		       drv ? drv->name : "none");
   2011		goto init_driver_fail;
   2012	}
   2013
   2014	retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
   2015				   intel_idle_cpu_online, NULL);
   2016	if (retval < 0)
   2017		goto hp_setup_fail;
   2018
   2019	pr_debug("Local APIC timer is reliable in %s\n",
   2020		 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
   2021
   2022	return 0;
   2023
   2024hp_setup_fail:
   2025	intel_idle_cpuidle_devices_uninit();
   2026	cpuidle_unregister_driver(&intel_idle_driver);
   2027init_driver_fail:
   2028	free_percpu(intel_idle_cpuidle_devices);
   2029	return retval;
   2030
   2031}
   2032device_initcall(intel_idle_init);
   2033
   2034/*
   2035 * We are not really modular, but we used to support that.  Meaning we also
   2036 * support "intel_idle.max_cstate=..." at boot and also a read-only export of
   2037 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
   2038 * is the easiest way (currently) to continue doing that.
   2039 */
   2040module_param(max_cstate, int, 0444);
   2041/*
   2042 * The positions of the bits that are set in this number are the indices of the
   2043 * idle states to be disabled by default (as reflected by the names of the
   2044 * corresponding idle state directories in sysfs, "state0", "state1" ...
   2045 * "state<i>" ..., where <i> is the index of the given state).
   2046 */
   2047module_param_named(states_off, disabled_states_mask, uint, 0444);
   2048MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
   2049/*
   2050 * Some platforms come with mutually exclusive C-states, so that if one is
   2051 * enabled, the other C-states must not be used. Example: C1 and C1E on
   2052 * Sapphire Rapids platform. This parameter allows for selecting the
   2053 * preferred C-states among the groups of mutually exclusive C-states - the
   2054 * selected C-states will be registered, the other C-states from the mutually
   2055 * exclusive group won't be registered. If the platform has no mutually
   2056 * exclusive C-states, this parameter has no effect.
   2057 */
   2058module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
   2059MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");