cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

apic.c (76078B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *	Local APIC handling, local APIC timers
      4 *
      5 *	(c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
      6 *
      7 *	Fixes
      8 *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
      9 *					thanks to Eric Gilmore
     10 *					and Rolf G. Tews
     11 *					for testing these extensively.
     12 *	Maciej W. Rozycki	:	Various updates and fixes.
     13 *	Mikael Pettersson	:	Power Management for UP-APIC.
     14 *	Pavel Machek and
     15 *	Mikael Pettersson	:	PM converted to driver model.
     16 */
     17
     18#include <linux/perf_event.h>
     19#include <linux/kernel_stat.h>
     20#include <linux/mc146818rtc.h>
     21#include <linux/acpi_pmtmr.h>
     22#include <linux/clockchips.h>
     23#include <linux/interrupt.h>
     24#include <linux/memblock.h>
     25#include <linux/ftrace.h>
     26#include <linux/ioport.h>
     27#include <linux/export.h>
     28#include <linux/syscore_ops.h>
     29#include <linux/delay.h>
     30#include <linux/timex.h>
     31#include <linux/i8253.h>
     32#include <linux/dmar.h>
     33#include <linux/init.h>
     34#include <linux/cpu.h>
     35#include <linux/dmi.h>
     36#include <linux/smp.h>
     37#include <linux/mm.h>
     38
     39#include <asm/trace/irq_vectors.h>
     40#include <asm/irq_remapping.h>
     41#include <asm/pc-conf-reg.h>
     42#include <asm/perf_event.h>
     43#include <asm/x86_init.h>
     44#include <linux/atomic.h>
     45#include <asm/barrier.h>
     46#include <asm/mpspec.h>
     47#include <asm/i8259.h>
     48#include <asm/proto.h>
     49#include <asm/traps.h>
     50#include <asm/apic.h>
     51#include <asm/acpi.h>
     52#include <asm/io_apic.h>
     53#include <asm/desc.h>
     54#include <asm/hpet.h>
     55#include <asm/mtrr.h>
     56#include <asm/time.h>
     57#include <asm/smp.h>
     58#include <asm/mce.h>
     59#include <asm/tsc.h>
     60#include <asm/hypervisor.h>
     61#include <asm/cpu_device_id.h>
     62#include <asm/intel-family.h>
     63#include <asm/irq_regs.h>
     64
     65unsigned int num_processors;
     66
     67unsigned disabled_cpus;
     68
     69/* Processor that is doing the boot up */
     70unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
     71EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
     72
     73u8 boot_cpu_apic_version __ro_after_init;
     74
     75/*
     76 * The highest APIC ID seen during enumeration.
     77 */
     78static unsigned int max_physical_apicid;
     79
     80/*
     81 * Bitmask of physically existing CPUs:
     82 */
     83physid_mask_t phys_cpu_present_map;
     84
     85/*
     86 * Processor to be disabled specified by kernel parameter
     87 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
     88 * avoid undefined behaviour caused by sending INIT from AP to BSP.
     89 */
     90static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
     91
     92/*
     93 * This variable controls which CPUs receive external NMIs.  By default,
     94 * external NMIs are delivered only to the BSP.
     95 */
     96static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
     97
     98/*
     99 * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID
    100 */
    101static bool virt_ext_dest_id __ro_after_init;
    102
    103/*
    104 * Map cpu index to physical APIC ID
    105 */
    106DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
    107DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
    108DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
    109EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
    110EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
    111EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
    112
    113#ifdef CONFIG_X86_32
    114
    115/*
    116 * On x86_32, the mapping between cpu and logical apicid may vary
    117 * depending on apic in use.  The following early percpu variable is
    118 * used for the mapping.  This is where the behaviors of x86_64 and 32
    119 * actually diverge.  Let's keep it ugly for now.
    120 */
    121DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
    122
    123/* Local APIC was disabled by the BIOS and enabled by the kernel */
    124static int enabled_via_apicbase __ro_after_init;
    125
    126/*
    127 * Handle interrupt mode configuration register (IMCR).
    128 * This register controls whether the interrupt signals
    129 * that reach the BSP come from the master PIC or from the
    130 * local APIC. Before entering Symmetric I/O Mode, either
    131 * the BIOS or the operating system must switch out of
    132 * PIC Mode by changing the IMCR.
    133 */
    134static inline void imcr_pic_to_apic(void)
    135{
    136	/* NMI and 8259 INTR go through APIC */
    137	pc_conf_set(PC_CONF_MPS_IMCR, 0x01);
    138}
    139
    140static inline void imcr_apic_to_pic(void)
    141{
    142	/* NMI and 8259 INTR go directly to BSP */
    143	pc_conf_set(PC_CONF_MPS_IMCR, 0x00);
    144}
    145#endif
    146
    147/*
    148 * Knob to control our willingness to enable the local APIC.
    149 *
    150 * +1=force-enable
    151 */
    152static int force_enable_local_apic __initdata;
    153
    154/*
    155 * APIC command line parameters
    156 */
    157static int __init parse_lapic(char *arg)
    158{
    159	if (IS_ENABLED(CONFIG_X86_32) && !arg)
    160		force_enable_local_apic = 1;
    161	else if (arg && !strncmp(arg, "notscdeadline", 13))
    162		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
    163	return 0;
    164}
    165early_param("lapic", parse_lapic);
    166
    167#ifdef CONFIG_X86_64
    168static int apic_calibrate_pmtmr __initdata;
    169static __init int setup_apicpmtimer(char *s)
    170{
    171	apic_calibrate_pmtmr = 1;
    172	notsc_setup(NULL);
    173	return 1;
    174}
    175__setup("apicpmtimer", setup_apicpmtimer);
    176#endif
    177
    178unsigned long mp_lapic_addr __ro_after_init;
    179int disable_apic __ro_after_init;
    180/* Disable local APIC timer from the kernel commandline or via dmi quirk */
    181static int disable_apic_timer __initdata;
    182/* Local APIC timer works in C2 */
    183int local_apic_timer_c2_ok __ro_after_init;
    184EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
    185
    186/*
    187 * Debug level, exported for io_apic.c
    188 */
    189int apic_verbosity __ro_after_init;
    190
    191int pic_mode __ro_after_init;
    192
    193/* Have we found an MP table */
    194int smp_found_config __ro_after_init;
    195
    196static struct resource lapic_resource = {
    197	.name = "Local APIC",
    198	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
    199};
    200
    201unsigned int lapic_timer_period = 0;
    202
    203static void apic_pm_activate(void);
    204
    205static unsigned long apic_phys __ro_after_init;
    206
    207/*
    208 * Get the LAPIC version
    209 */
    210static inline int lapic_get_version(void)
    211{
    212	return GET_APIC_VERSION(apic_read(APIC_LVR));
    213}
    214
    215/*
    216 * Check, if the APIC is integrated or a separate chip
    217 */
    218static inline int lapic_is_integrated(void)
    219{
    220	return APIC_INTEGRATED(lapic_get_version());
    221}
    222
    223/*
    224 * Check, whether this is a modern or a first generation APIC
    225 */
    226static int modern_apic(void)
    227{
    228	/* AMD systems use old APIC versions, so check the CPU */
    229	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
    230	    boot_cpu_data.x86 >= 0xf)
    231		return 1;
    232
    233	/* Hygon systems use modern APIC */
    234	if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
    235		return 1;
    236
    237	return lapic_get_version() >= 0x14;
    238}
    239
    240/*
    241 * right after this call apic become NOOP driven
    242 * so apic->write/read doesn't do anything
    243 */
    244static void __init apic_disable(void)
    245{
    246	pr_info("APIC: switched to apic NOOP\n");
    247	apic = &apic_noop;
    248}
    249
    250void native_apic_wait_icr_idle(void)
    251{
    252	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
    253		cpu_relax();
    254}
    255
    256u32 native_safe_apic_wait_icr_idle(void)
    257{
    258	u32 send_status;
    259	int timeout;
    260
    261	timeout = 0;
    262	do {
    263		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
    264		if (!send_status)
    265			break;
    266		inc_irq_stat(icr_read_retry_count);
    267		udelay(100);
    268	} while (timeout++ < 1000);
    269
    270	return send_status;
    271}
    272
    273void native_apic_icr_write(u32 low, u32 id)
    274{
    275	unsigned long flags;
    276
    277	local_irq_save(flags);
    278	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
    279	apic_write(APIC_ICR, low);
    280	local_irq_restore(flags);
    281}
    282
    283u64 native_apic_icr_read(void)
    284{
    285	u32 icr1, icr2;
    286
    287	icr2 = apic_read(APIC_ICR2);
    288	icr1 = apic_read(APIC_ICR);
    289
    290	return icr1 | ((u64)icr2 << 32);
    291}
    292
    293#ifdef CONFIG_X86_32
    294/**
    295 * get_physical_broadcast - Get number of physical broadcast IDs
    296 */
    297int get_physical_broadcast(void)
    298{
    299	return modern_apic() ? 0xff : 0xf;
    300}
    301#endif
    302
    303/**
    304 * lapic_get_maxlvt - get the maximum number of local vector table entries
    305 */
    306int lapic_get_maxlvt(void)
    307{
    308	/*
    309	 * - we always have APIC integrated on 64bit mode
    310	 * - 82489DXs do not report # of LVT entries
    311	 */
    312	return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
    313}
    314
    315/*
    316 * Local APIC timer
    317 */
    318
    319/* Clock divisor */
    320#define APIC_DIVISOR 16
    321#define TSC_DIVISOR  8
    322
    323/* i82489DX specific */
    324#define		I82489DX_BASE_DIVIDER		(((0x2) << 18))
    325
    326/*
    327 * This function sets up the local APIC timer, with a timeout of
    328 * 'clocks' APIC bus clock. During calibration we actually call
    329 * this function twice on the boot CPU, once with a bogus timeout
    330 * value, second time for real. The other (noncalibrating) CPUs
    331 * call this function only once, with the real, calibrated value.
    332 *
    333 * We do reads before writes even if unnecessary, to get around the
    334 * P5 APIC double write bug.
    335 */
    336static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
    337{
    338	unsigned int lvtt_value, tmp_value;
    339
    340	lvtt_value = LOCAL_TIMER_VECTOR;
    341	if (!oneshot)
    342		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
    343	else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
    344		lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
    345
    346	/*
    347	 * The i82489DX APIC uses bit 18 and 19 for the base divider.  This
    348	 * overlaps with bit 18 on integrated APICs, but is not documented
    349	 * in the SDM. No problem though. i82489DX equipped systems do not
    350	 * have TSC deadline timer.
    351	 */
    352	if (!lapic_is_integrated())
    353		lvtt_value |= I82489DX_BASE_DIVIDER;
    354
    355	if (!irqen)
    356		lvtt_value |= APIC_LVT_MASKED;
    357
    358	apic_write(APIC_LVTT, lvtt_value);
    359
    360	if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
    361		/*
    362		 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
    363		 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
    364		 * According to Intel, MFENCE can do the serialization here.
    365		 */
    366		asm volatile("mfence" : : : "memory");
    367		return;
    368	}
    369
    370	/*
    371	 * Divide PICLK by 16
    372	 */
    373	tmp_value = apic_read(APIC_TDCR);
    374	apic_write(APIC_TDCR,
    375		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
    376		APIC_TDR_DIV_16);
    377
    378	if (!oneshot)
    379		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
    380}
    381
    382/*
    383 * Setup extended LVT, AMD specific
    384 *
    385 * Software should use the LVT offsets the BIOS provides.  The offsets
    386 * are determined by the subsystems using it like those for MCE
    387 * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
    388 * are supported. Beginning with family 10h at least 4 offsets are
    389 * available.
    390 *
    391 * Since the offsets must be consistent for all cores, we keep track
    392 * of the LVT offsets in software and reserve the offset for the same
    393 * vector also to be used on other cores. An offset is freed by
    394 * setting the entry to APIC_EILVT_MASKED.
    395 *
    396 * If the BIOS is right, there should be no conflicts. Otherwise a
    397 * "[Firmware Bug]: ..." error message is generated. However, if
    398 * software does not properly determines the offsets, it is not
    399 * necessarily a BIOS bug.
    400 */
    401
    402static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
    403
    404static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
    405{
    406	return (old & APIC_EILVT_MASKED)
    407		|| (new == APIC_EILVT_MASKED)
    408		|| ((new & ~APIC_EILVT_MASKED) == old);
    409}
    410
    411static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
    412{
    413	unsigned int rsvd, vector;
    414
    415	if (offset >= APIC_EILVT_NR_MAX)
    416		return ~0;
    417
    418	rsvd = atomic_read(&eilvt_offsets[offset]);
    419	do {
    420		vector = rsvd & ~APIC_EILVT_MASKED;	/* 0: unassigned */
    421		if (vector && !eilvt_entry_is_changeable(vector, new))
    422			/* may not change if vectors are different */
    423			return rsvd;
    424		rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
    425	} while (rsvd != new);
    426
    427	rsvd &= ~APIC_EILVT_MASKED;
    428	if (rsvd && rsvd != vector)
    429		pr_info("LVT offset %d assigned for vector 0x%02x\n",
    430			offset, rsvd);
    431
    432	return new;
    433}
    434
    435/*
    436 * If mask=1, the LVT entry does not generate interrupts while mask=0
    437 * enables the vector. See also the BKDGs. Must be called with
    438 * preemption disabled.
    439 */
    440
    441int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
    442{
    443	unsigned long reg = APIC_EILVTn(offset);
    444	unsigned int new, old, reserved;
    445
    446	new = (mask << 16) | (msg_type << 8) | vector;
    447	old = apic_read(reg);
    448	reserved = reserve_eilvt_offset(offset, new);
    449
    450	if (reserved != new) {
    451		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
    452		       "vector 0x%x, but the register is already in use for "
    453		       "vector 0x%x on another cpu\n",
    454		       smp_processor_id(), reg, offset, new, reserved);
    455		return -EINVAL;
    456	}
    457
    458	if (!eilvt_entry_is_changeable(old, new)) {
    459		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
    460		       "vector 0x%x, but the register is already in use for "
    461		       "vector 0x%x on this cpu\n",
    462		       smp_processor_id(), reg, offset, new, old);
    463		return -EBUSY;
    464	}
    465
    466	apic_write(reg, new);
    467
    468	return 0;
    469}
    470EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
    471
    472/*
    473 * Program the next event, relative to now
    474 */
    475static int lapic_next_event(unsigned long delta,
    476			    struct clock_event_device *evt)
    477{
    478	apic_write(APIC_TMICT, delta);
    479	return 0;
    480}
    481
    482static int lapic_next_deadline(unsigned long delta,
    483			       struct clock_event_device *evt)
    484{
    485	u64 tsc;
    486
    487	/* This MSR is special and need a special fence: */
    488	weak_wrmsr_fence();
    489
    490	tsc = rdtsc();
    491	wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
    492	return 0;
    493}
    494
    495static int lapic_timer_shutdown(struct clock_event_device *evt)
    496{
    497	unsigned int v;
    498
    499	/* Lapic used as dummy for broadcast ? */
    500	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
    501		return 0;
    502
    503	v = apic_read(APIC_LVTT);
    504	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
    505	apic_write(APIC_LVTT, v);
    506	apic_write(APIC_TMICT, 0);
    507	return 0;
    508}
    509
    510static inline int
    511lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
    512{
    513	/* Lapic used as dummy for broadcast ? */
    514	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
    515		return 0;
    516
    517	__setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
    518	return 0;
    519}
    520
    521static int lapic_timer_set_periodic(struct clock_event_device *evt)
    522{
    523	return lapic_timer_set_periodic_oneshot(evt, false);
    524}
    525
    526static int lapic_timer_set_oneshot(struct clock_event_device *evt)
    527{
    528	return lapic_timer_set_periodic_oneshot(evt, true);
    529}
    530
    531/*
    532 * Local APIC timer broadcast function
    533 */
    534static void lapic_timer_broadcast(const struct cpumask *mask)
    535{
    536#ifdef CONFIG_SMP
    537	apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
    538#endif
    539}
    540
    541
    542/*
    543 * The local apic timer can be used for any function which is CPU local.
    544 */
    545static struct clock_event_device lapic_clockevent = {
    546	.name				= "lapic",
    547	.features			= CLOCK_EVT_FEAT_PERIODIC |
    548					  CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
    549					  | CLOCK_EVT_FEAT_DUMMY,
    550	.shift				= 32,
    551	.set_state_shutdown		= lapic_timer_shutdown,
    552	.set_state_periodic		= lapic_timer_set_periodic,
    553	.set_state_oneshot		= lapic_timer_set_oneshot,
    554	.set_state_oneshot_stopped	= lapic_timer_shutdown,
    555	.set_next_event			= lapic_next_event,
    556	.broadcast			= lapic_timer_broadcast,
    557	.rating				= 100,
    558	.irq				= -1,
    559};
    560static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
    561
    562static const struct x86_cpu_id deadline_match[] __initconst = {
    563	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */
    564	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */
    565
    566	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,	0x0b000020),
    567
    568	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011),
    569	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e),
    570	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c),
    571	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003),
    572
    573	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136),
    574	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014),
    575	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0),
    576
    577	X86_MATCH_INTEL_FAM6_MODEL( HASWELL,		0x22),
    578	X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L,		0x20),
    579	X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G,		0x17),
    580
    581	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL,		0x25),
    582	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G,	0x17),
    583
    584	X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L,		0xb2),
    585	X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE,		0xb2),
    586
    587	X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L,		0x52),
    588	X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE,		0x52),
    589
    590	{},
    591};
    592
    593static __init bool apic_validate_deadline_timer(void)
    594{
    595	const struct x86_cpu_id *m;
    596	u32 rev;
    597
    598	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
    599		return false;
    600	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
    601		return true;
    602
    603	m = x86_match_cpu(deadline_match);
    604	if (!m)
    605		return true;
    606
    607	rev = (u32)m->driver_data;
    608
    609	if (boot_cpu_data.microcode >= rev)
    610		return true;
    611
    612	setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
    613	pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
    614	       "please update microcode to version: 0x%x (or later)\n", rev);
    615	return false;
    616}
    617
    618/*
    619 * Setup the local APIC timer for this CPU. Copy the initialized values
    620 * of the boot CPU and register the clock event in the framework.
    621 */
    622static void setup_APIC_timer(void)
    623{
    624	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
    625
    626	if (this_cpu_has(X86_FEATURE_ARAT)) {
    627		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
    628		/* Make LAPIC timer preferable over percpu HPET */
    629		lapic_clockevent.rating = 150;
    630	}
    631
    632	memcpy(levt, &lapic_clockevent, sizeof(*levt));
    633	levt->cpumask = cpumask_of(smp_processor_id());
    634
    635	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
    636		levt->name = "lapic-deadline";
    637		levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
    638				    CLOCK_EVT_FEAT_DUMMY);
    639		levt->set_next_event = lapic_next_deadline;
    640		clockevents_config_and_register(levt,
    641						tsc_khz * (1000 / TSC_DIVISOR),
    642						0xF, ~0UL);
    643	} else
    644		clockevents_register_device(levt);
    645}
    646
    647/*
    648 * Install the updated TSC frequency from recalibration at the TSC
    649 * deadline clockevent devices.
    650 */
    651static void __lapic_update_tsc_freq(void *info)
    652{
    653	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
    654
    655	if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
    656		return;
    657
    658	clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
    659}
    660
    661void lapic_update_tsc_freq(void)
    662{
    663	/*
    664	 * The clockevent device's ->mult and ->shift can both be
    665	 * changed. In order to avoid races, schedule the frequency
    666	 * update code on each CPU.
    667	 */
    668	on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
    669}
    670
    671/*
    672 * In this functions we calibrate APIC bus clocks to the external timer.
    673 *
    674 * We want to do the calibration only once since we want to have local timer
    675 * irqs synchronous. CPUs connected by the same APIC bus have the very same bus
    676 * frequency.
    677 *
    678 * This was previously done by reading the PIT/HPET and waiting for a wrap
    679 * around to find out, that a tick has elapsed. I have a box, where the PIT
    680 * readout is broken, so it never gets out of the wait loop again. This was
    681 * also reported by others.
    682 *
    683 * Monitoring the jiffies value is inaccurate and the clockevents
    684 * infrastructure allows us to do a simple substitution of the interrupt
    685 * handler.
    686 *
    687 * The calibration routine also uses the pm_timer when possible, as the PIT
    688 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
    689 * back to normal later in the boot process).
    690 */
    691
    692#define LAPIC_CAL_LOOPS		(HZ/10)
    693
    694static __initdata int lapic_cal_loops = -1;
    695static __initdata long lapic_cal_t1, lapic_cal_t2;
    696static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
    697static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
    698static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
    699
    700/*
    701 * Temporary interrupt handler and polled calibration function.
    702 */
    703static void __init lapic_cal_handler(struct clock_event_device *dev)
    704{
    705	unsigned long long tsc = 0;
    706	long tapic = apic_read(APIC_TMCCT);
    707	unsigned long pm = acpi_pm_read_early();
    708
    709	if (boot_cpu_has(X86_FEATURE_TSC))
    710		tsc = rdtsc();
    711
    712	switch (lapic_cal_loops++) {
    713	case 0:
    714		lapic_cal_t1 = tapic;
    715		lapic_cal_tsc1 = tsc;
    716		lapic_cal_pm1 = pm;
    717		lapic_cal_j1 = jiffies;
    718		break;
    719
    720	case LAPIC_CAL_LOOPS:
    721		lapic_cal_t2 = tapic;
    722		lapic_cal_tsc2 = tsc;
    723		if (pm < lapic_cal_pm1)
    724			pm += ACPI_PM_OVRRUN;
    725		lapic_cal_pm2 = pm;
    726		lapic_cal_j2 = jiffies;
    727		break;
    728	}
    729}
    730
    731static int __init
    732calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
    733{
    734	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
    735	const long pm_thresh = pm_100ms / 100;
    736	unsigned long mult;
    737	u64 res;
    738
    739#ifndef CONFIG_X86_PM_TIMER
    740	return -1;
    741#endif
    742
    743	apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
    744
    745	/* Check, if the PM timer is available */
    746	if (!deltapm)
    747		return -1;
    748
    749	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
    750
    751	if (deltapm > (pm_100ms - pm_thresh) &&
    752	    deltapm < (pm_100ms + pm_thresh)) {
    753		apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
    754		return 0;
    755	}
    756
    757	res = (((u64)deltapm) *  mult) >> 22;
    758	do_div(res, 1000000);
    759	pr_warn("APIC calibration not consistent "
    760		"with PM-Timer: %ldms instead of 100ms\n", (long)res);
    761
    762	/* Correct the lapic counter value */
    763	res = (((u64)(*delta)) * pm_100ms);
    764	do_div(res, deltapm);
    765	pr_info("APIC delta adjusted to PM-Timer: "
    766		"%lu (%ld)\n", (unsigned long)res, *delta);
    767	*delta = (long)res;
    768
    769	/* Correct the tsc counter value */
    770	if (boot_cpu_has(X86_FEATURE_TSC)) {
    771		res = (((u64)(*deltatsc)) * pm_100ms);
    772		do_div(res, deltapm);
    773		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
    774					  "PM-Timer: %lu (%ld)\n",
    775					(unsigned long)res, *deltatsc);
    776		*deltatsc = (long)res;
    777	}
    778
    779	return 0;
    780}
    781
    782static int __init lapic_init_clockevent(void)
    783{
    784	if (!lapic_timer_period)
    785		return -1;
    786
    787	/* Calculate the scaled math multiplication factor */
    788	lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
    789					TICK_NSEC, lapic_clockevent.shift);
    790	lapic_clockevent.max_delta_ns =
    791		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
    792	lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
    793	lapic_clockevent.min_delta_ns =
    794		clockevent_delta2ns(0xF, &lapic_clockevent);
    795	lapic_clockevent.min_delta_ticks = 0xF;
    796
    797	return 0;
    798}
    799
    800bool __init apic_needs_pit(void)
    801{
    802	/*
    803	 * If the frequencies are not known, PIT is required for both TSC
    804	 * and apic timer calibration.
    805	 */
    806	if (!tsc_khz || !cpu_khz)
    807		return true;
    808
    809	/* Is there an APIC at all or is it disabled? */
    810	if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic)
    811		return true;
    812
    813	/*
    814	 * If interrupt delivery mode is legacy PIC or virtual wire without
    815	 * configuration, the local APIC timer wont be set up. Make sure
    816	 * that the PIT is initialized.
    817	 */
    818	if (apic_intr_mode == APIC_PIC ||
    819	    apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
    820		return true;
    821
    822	/* Virt guests may lack ARAT, but still have DEADLINE */
    823	if (!boot_cpu_has(X86_FEATURE_ARAT))
    824		return true;
    825
    826	/* Deadline timer is based on TSC so no further PIT action required */
    827	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
    828		return false;
    829
    830	/* APIC timer disabled? */
    831	if (disable_apic_timer)
    832		return true;
    833	/*
    834	 * The APIC timer frequency is known already, no PIT calibration
    835	 * required. If unknown, let the PIT be initialized.
    836	 */
    837	return lapic_timer_period == 0;
    838}
    839
    840static int __init calibrate_APIC_clock(void)
    841{
    842	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
    843	u64 tsc_perj = 0, tsc_start = 0;
    844	unsigned long jif_start;
    845	unsigned long deltaj;
    846	long delta, deltatsc;
    847	int pm_referenced = 0;
    848
    849	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
    850		return 0;
    851
    852	/*
    853	 * Check if lapic timer has already been calibrated by platform
    854	 * specific routine, such as tsc calibration code. If so just fill
    855	 * in the clockevent structure and return.
    856	 */
    857	if (!lapic_init_clockevent()) {
    858		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
    859			    lapic_timer_period);
    860		/*
    861		 * Direct calibration methods must have an always running
    862		 * local APIC timer, no need for broadcast timer.
    863		 */
    864		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
    865		return 0;
    866	}
    867
    868	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
    869		    "calibrating APIC timer ...\n");
    870
    871	/*
    872	 * There are platforms w/o global clockevent devices. Instead of
    873	 * making the calibration conditional on that, use a polling based
    874	 * approach everywhere.
    875	 */
    876	local_irq_disable();
    877
    878	/*
    879	 * Setup the APIC counter to maximum. There is no way the lapic
    880	 * can underflow in the 100ms detection time frame
    881	 */
    882	__setup_APIC_LVTT(0xffffffff, 0, 0);
    883
    884	/*
    885	 * Methods to terminate the calibration loop:
    886	 *  1) Global clockevent if available (jiffies)
    887	 *  2) TSC if available and frequency is known
    888	 */
    889	jif_start = READ_ONCE(jiffies);
    890
    891	if (tsc_khz) {
    892		tsc_start = rdtsc();
    893		tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
    894	}
    895
    896	/*
    897	 * Enable interrupts so the tick can fire, if a global
    898	 * clockevent device is available
    899	 */
    900	local_irq_enable();
    901
    902	while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
    903		/* Wait for a tick to elapse */
    904		while (1) {
    905			if (tsc_khz) {
    906				u64 tsc_now = rdtsc();
    907				if ((tsc_now - tsc_start) >= tsc_perj) {
    908					tsc_start += tsc_perj;
    909					break;
    910				}
    911			} else {
    912				unsigned long jif_now = READ_ONCE(jiffies);
    913
    914				if (time_after(jif_now, jif_start)) {
    915					jif_start = jif_now;
    916					break;
    917				}
    918			}
    919			cpu_relax();
    920		}
    921
    922		/* Invoke the calibration routine */
    923		local_irq_disable();
    924		lapic_cal_handler(NULL);
    925		local_irq_enable();
    926	}
    927
    928	local_irq_disable();
    929
    930	/* Build delta t1-t2 as apic timer counts down */
    931	delta = lapic_cal_t1 - lapic_cal_t2;
    932	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
    933
    934	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
    935
    936	/* we trust the PM based calibration if possible */
    937	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
    938					&delta, &deltatsc);
    939
    940	lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
    941	lapic_init_clockevent();
    942
    943	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
    944	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
    945	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
    946		    lapic_timer_period);
    947
    948	if (boot_cpu_has(X86_FEATURE_TSC)) {
    949		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
    950			    "%ld.%04ld MHz.\n",
    951			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
    952			    (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
    953	}
    954
    955	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
    956		    "%u.%04u MHz.\n",
    957		    lapic_timer_period / (1000000 / HZ),
    958		    lapic_timer_period % (1000000 / HZ));
    959
    960	/*
    961	 * Do a sanity check on the APIC calibration result
    962	 */
    963	if (lapic_timer_period < (1000000 / HZ)) {
    964		local_irq_enable();
    965		pr_warn("APIC frequency too slow, disabling apic timer\n");
    966		return -1;
    967	}
    968
    969	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
    970
    971	/*
    972	 * PM timer calibration failed or not turned on so lets try APIC
    973	 * timer based calibration, if a global clockevent device is
    974	 * available.
    975	 */
    976	if (!pm_referenced && global_clock_event) {
    977		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
    978
    979		/*
    980		 * Setup the apic timer manually
    981		 */
    982		levt->event_handler = lapic_cal_handler;
    983		lapic_timer_set_periodic(levt);
    984		lapic_cal_loops = -1;
    985
    986		/* Let the interrupts run */
    987		local_irq_enable();
    988
    989		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
    990			cpu_relax();
    991
    992		/* Stop the lapic timer */
    993		local_irq_disable();
    994		lapic_timer_shutdown(levt);
    995
    996		/* Jiffies delta */
    997		deltaj = lapic_cal_j2 - lapic_cal_j1;
    998		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
    999
   1000		/* Check, if the jiffies result is consistent */
   1001		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
   1002			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
   1003		else
   1004			levt->features |= CLOCK_EVT_FEAT_DUMMY;
   1005	}
   1006	local_irq_enable();
   1007
   1008	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
   1009		pr_warn("APIC timer disabled due to verification failure\n");
   1010		return -1;
   1011	}
   1012
   1013	return 0;
   1014}
   1015
   1016/*
   1017 * Setup the boot APIC
   1018 *
   1019 * Calibrate and verify the result.
   1020 */
   1021void __init setup_boot_APIC_clock(void)
   1022{
   1023	/*
   1024	 * The local apic timer can be disabled via the kernel
   1025	 * commandline or from the CPU detection code. Register the lapic
   1026	 * timer as a dummy clock event source on SMP systems, so the
   1027	 * broadcast mechanism is used. On UP systems simply ignore it.
   1028	 */
   1029	if (disable_apic_timer) {
   1030		pr_info("Disabling APIC timer\n");
   1031		/* No broadcast on UP ! */
   1032		if (num_possible_cpus() > 1) {
   1033			lapic_clockevent.mult = 1;
   1034			setup_APIC_timer();
   1035		}
   1036		return;
   1037	}
   1038
   1039	if (calibrate_APIC_clock()) {
   1040		/* No broadcast on UP ! */
   1041		if (num_possible_cpus() > 1)
   1042			setup_APIC_timer();
   1043		return;
   1044	}
   1045
   1046	/*
   1047	 * If nmi_watchdog is set to IO_APIC, we need the
   1048	 * PIT/HPET going.  Otherwise register lapic as a dummy
   1049	 * device.
   1050	 */
   1051	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
   1052
   1053	/* Setup the lapic or request the broadcast */
   1054	setup_APIC_timer();
   1055	amd_e400_c1e_apic_setup();
   1056}
   1057
   1058void setup_secondary_APIC_clock(void)
   1059{
   1060	setup_APIC_timer();
   1061	amd_e400_c1e_apic_setup();
   1062}
   1063
   1064/*
   1065 * The guts of the apic timer interrupt
   1066 */
   1067static void local_apic_timer_interrupt(void)
   1068{
   1069	struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
   1070
   1071	/*
   1072	 * Normally we should not be here till LAPIC has been initialized but
   1073	 * in some cases like kdump, its possible that there is a pending LAPIC
   1074	 * timer interrupt from previous kernel's context and is delivered in
   1075	 * new kernel the moment interrupts are enabled.
   1076	 *
   1077	 * Interrupts are enabled early and LAPIC is setup much later, hence
   1078	 * its possible that when we get here evt->event_handler is NULL.
   1079	 * Check for event_handler being NULL and discard the interrupt as
   1080	 * spurious.
   1081	 */
   1082	if (!evt->event_handler) {
   1083		pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
   1084			smp_processor_id());
   1085		/* Switch it off */
   1086		lapic_timer_shutdown(evt);
   1087		return;
   1088	}
   1089
   1090	/*
   1091	 * the NMI deadlock-detector uses this.
   1092	 */
   1093	inc_irq_stat(apic_timer_irqs);
   1094
   1095	evt->event_handler(evt);
   1096}
   1097
   1098/*
   1099 * Local APIC timer interrupt. This is the most natural way for doing
   1100 * local interrupts, but local timer interrupts can be emulated by
   1101 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
   1102 *
   1103 * [ if a single-CPU system runs an SMP kernel then we call the local
   1104 *   interrupt as well. Thus we cannot inline the local irq ... ]
   1105 */
   1106DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)
   1107{
   1108	struct pt_regs *old_regs = set_irq_regs(regs);
   1109
   1110	ack_APIC_irq();
   1111	trace_local_timer_entry(LOCAL_TIMER_VECTOR);
   1112	local_apic_timer_interrupt();
   1113	trace_local_timer_exit(LOCAL_TIMER_VECTOR);
   1114
   1115	set_irq_regs(old_regs);
   1116}
   1117
   1118int setup_profiling_timer(unsigned int multiplier)
   1119{
   1120	return -EINVAL;
   1121}
   1122
   1123/*
   1124 * Local APIC start and shutdown
   1125 */
   1126
   1127/**
   1128 * clear_local_APIC - shutdown the local APIC
   1129 *
   1130 * This is called, when a CPU is disabled and before rebooting, so the state of
   1131 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
   1132 * leftovers during boot.
   1133 */
   1134void clear_local_APIC(void)
   1135{
   1136	int maxlvt;
   1137	u32 v;
   1138
   1139	/* APIC hasn't been mapped yet */
   1140	if (!x2apic_mode && !apic_phys)
   1141		return;
   1142
   1143	maxlvt = lapic_get_maxlvt();
   1144	/*
   1145	 * Masking an LVT entry can trigger a local APIC error
   1146	 * if the vector is zero. Mask LVTERR first to prevent this.
   1147	 */
   1148	if (maxlvt >= 3) {
   1149		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
   1150		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
   1151	}
   1152	/*
   1153	 * Careful: we have to set masks only first to deassert
   1154	 * any level-triggered sources.
   1155	 */
   1156	v = apic_read(APIC_LVTT);
   1157	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
   1158	v = apic_read(APIC_LVT0);
   1159	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
   1160	v = apic_read(APIC_LVT1);
   1161	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
   1162	if (maxlvt >= 4) {
   1163		v = apic_read(APIC_LVTPC);
   1164		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
   1165	}
   1166
   1167	/* lets not touch this if we didn't frob it */
   1168#ifdef CONFIG_X86_THERMAL_VECTOR
   1169	if (maxlvt >= 5) {
   1170		v = apic_read(APIC_LVTTHMR);
   1171		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
   1172	}
   1173#endif
   1174#ifdef CONFIG_X86_MCE_INTEL
   1175	if (maxlvt >= 6) {
   1176		v = apic_read(APIC_LVTCMCI);
   1177		if (!(v & APIC_LVT_MASKED))
   1178			apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
   1179	}
   1180#endif
   1181
   1182	/*
   1183	 * Clean APIC state for other OSs:
   1184	 */
   1185	apic_write(APIC_LVTT, APIC_LVT_MASKED);
   1186	apic_write(APIC_LVT0, APIC_LVT_MASKED);
   1187	apic_write(APIC_LVT1, APIC_LVT_MASKED);
   1188	if (maxlvt >= 3)
   1189		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
   1190	if (maxlvt >= 4)
   1191		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
   1192
   1193	/* Integrated APIC (!82489DX) ? */
   1194	if (lapic_is_integrated()) {
   1195		if (maxlvt > 3)
   1196			/* Clear ESR due to Pentium errata 3AP and 11AP */
   1197			apic_write(APIC_ESR, 0);
   1198		apic_read(APIC_ESR);
   1199	}
   1200}
   1201
   1202/**
   1203 * apic_soft_disable - Clears and software disables the local APIC on hotplug
   1204 *
   1205 * Contrary to disable_local_APIC() this does not touch the enable bit in
   1206 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
   1207 * bus would require a hardware reset as the APIC would lose track of bus
   1208 * arbitration. On systems with FSB delivery APICBASE could be disabled,
   1209 * but it has to be guaranteed that no interrupt is sent to the APIC while
   1210 * in that state and it's not clear from the SDM whether it still responds
   1211 * to INIT/SIPI messages. Stay on the safe side and use software disable.
   1212 */
   1213void apic_soft_disable(void)
   1214{
   1215	u32 value;
   1216
   1217	clear_local_APIC();
   1218
   1219	/* Soft disable APIC (implies clearing of registers for 82489DX!). */
   1220	value = apic_read(APIC_SPIV);
   1221	value &= ~APIC_SPIV_APIC_ENABLED;
   1222	apic_write(APIC_SPIV, value);
   1223}
   1224
   1225/**
   1226 * disable_local_APIC - clear and disable the local APIC
   1227 */
   1228void disable_local_APIC(void)
   1229{
   1230	/* APIC hasn't been mapped yet */
   1231	if (!x2apic_mode && !apic_phys)
   1232		return;
   1233
   1234	apic_soft_disable();
   1235
   1236#ifdef CONFIG_X86_32
   1237	/*
   1238	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
   1239	 * restore the disabled state.
   1240	 */
   1241	if (enabled_via_apicbase) {
   1242		unsigned int l, h;
   1243
   1244		rdmsr(MSR_IA32_APICBASE, l, h);
   1245		l &= ~MSR_IA32_APICBASE_ENABLE;
   1246		wrmsr(MSR_IA32_APICBASE, l, h);
   1247	}
   1248#endif
   1249}
   1250
   1251/*
   1252 * If Linux enabled the LAPIC against the BIOS default disable it down before
   1253 * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
   1254 * not power-off.  Additionally clear all LVT entries before disable_local_APIC
   1255 * for the case where Linux didn't enable the LAPIC.
   1256 */
   1257void lapic_shutdown(void)
   1258{
   1259	unsigned long flags;
   1260
   1261	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
   1262		return;
   1263
   1264	local_irq_save(flags);
   1265
   1266#ifdef CONFIG_X86_32
   1267	if (!enabled_via_apicbase)
   1268		clear_local_APIC();
   1269	else
   1270#endif
   1271		disable_local_APIC();
   1272
   1273
   1274	local_irq_restore(flags);
   1275}
   1276
   1277/**
   1278 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
   1279 */
   1280void __init sync_Arb_IDs(void)
   1281{
   1282	/*
   1283	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
   1284	 * needed on AMD.
   1285	 */
   1286	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
   1287		return;
   1288
   1289	/*
   1290	 * Wait for idle.
   1291	 */
   1292	apic_wait_icr_idle();
   1293
   1294	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
   1295	apic_write(APIC_ICR, APIC_DEST_ALLINC |
   1296			APIC_INT_LEVELTRIG | APIC_DM_INIT);
   1297}
   1298
   1299enum apic_intr_mode_id apic_intr_mode __ro_after_init;
   1300
   1301static int __init __apic_intr_mode_select(void)
   1302{
   1303	/* Check kernel option */
   1304	if (disable_apic) {
   1305		pr_info("APIC disabled via kernel command line\n");
   1306		return APIC_PIC;
   1307	}
   1308
   1309	/* Check BIOS */
   1310#ifdef CONFIG_X86_64
   1311	/* On 64-bit, the APIC must be integrated, Check local APIC only */
   1312	if (!boot_cpu_has(X86_FEATURE_APIC)) {
   1313		disable_apic = 1;
   1314		pr_info("APIC disabled by BIOS\n");
   1315		return APIC_PIC;
   1316	}
   1317#else
   1318	/* On 32-bit, the APIC may be integrated APIC or 82489DX */
   1319
   1320	/* Neither 82489DX nor integrated APIC ? */
   1321	if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
   1322		disable_apic = 1;
   1323		return APIC_PIC;
   1324	}
   1325
   1326	/* If the BIOS pretends there is an integrated APIC ? */
   1327	if (!boot_cpu_has(X86_FEATURE_APIC) &&
   1328		APIC_INTEGRATED(boot_cpu_apic_version)) {
   1329		disable_apic = 1;
   1330		pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
   1331				       boot_cpu_physical_apicid);
   1332		return APIC_PIC;
   1333	}
   1334#endif
   1335
   1336	/* Check MP table or ACPI MADT configuration */
   1337	if (!smp_found_config) {
   1338		disable_ioapic_support();
   1339		if (!acpi_lapic) {
   1340			pr_info("APIC: ACPI MADT or MP tables are not detected\n");
   1341			return APIC_VIRTUAL_WIRE_NO_CONFIG;
   1342		}
   1343		return APIC_VIRTUAL_WIRE;
   1344	}
   1345
   1346#ifdef CONFIG_SMP
   1347	/* If SMP should be disabled, then really disable it! */
   1348	if (!setup_max_cpus) {
   1349		pr_info("APIC: SMP mode deactivated\n");
   1350		return APIC_SYMMETRIC_IO_NO_ROUTING;
   1351	}
   1352
   1353	if (read_apic_id() != boot_cpu_physical_apicid) {
   1354		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
   1355		     read_apic_id(), boot_cpu_physical_apicid);
   1356		/* Or can we switch back to PIC here? */
   1357	}
   1358#endif
   1359
   1360	return APIC_SYMMETRIC_IO;
   1361}
   1362
   1363/* Select the interrupt delivery mode for the BSP */
   1364void __init apic_intr_mode_select(void)
   1365{
   1366	apic_intr_mode = __apic_intr_mode_select();
   1367}
   1368
   1369/*
   1370 * An initial setup of the virtual wire mode.
   1371 */
   1372void __init init_bsp_APIC(void)
   1373{
   1374	unsigned int value;
   1375
   1376	/*
   1377	 * Don't do the setup now if we have a SMP BIOS as the
   1378	 * through-I/O-APIC virtual wire mode might be active.
   1379	 */
   1380	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
   1381		return;
   1382
   1383	/*
   1384	 * Do not trust the local APIC being empty at bootup.
   1385	 */
   1386	clear_local_APIC();
   1387
   1388	/*
   1389	 * Enable APIC.
   1390	 */
   1391	value = apic_read(APIC_SPIV);
   1392	value &= ~APIC_VECTOR_MASK;
   1393	value |= APIC_SPIV_APIC_ENABLED;
   1394
   1395#ifdef CONFIG_X86_32
   1396	/* This bit is reserved on P4/Xeon and should be cleared */
   1397	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
   1398	    (boot_cpu_data.x86 == 15))
   1399		value &= ~APIC_SPIV_FOCUS_DISABLED;
   1400	else
   1401#endif
   1402		value |= APIC_SPIV_FOCUS_DISABLED;
   1403	value |= SPURIOUS_APIC_VECTOR;
   1404	apic_write(APIC_SPIV, value);
   1405
   1406	/*
   1407	 * Set up the virtual wire mode.
   1408	 */
   1409	apic_write(APIC_LVT0, APIC_DM_EXTINT);
   1410	value = APIC_DM_NMI;
   1411	if (!lapic_is_integrated())		/* 82489DX */
   1412		value |= APIC_LVT_LEVEL_TRIGGER;
   1413	if (apic_extnmi == APIC_EXTNMI_NONE)
   1414		value |= APIC_LVT_MASKED;
   1415	apic_write(APIC_LVT1, value);
   1416}
   1417
   1418static void __init apic_bsp_setup(bool upmode);
   1419
   1420/* Init the interrupt delivery mode for the BSP */
   1421void __init apic_intr_mode_init(void)
   1422{
   1423	bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
   1424
   1425	switch (apic_intr_mode) {
   1426	case APIC_PIC:
   1427		pr_info("APIC: Keep in PIC mode(8259)\n");
   1428		return;
   1429	case APIC_VIRTUAL_WIRE:
   1430		pr_info("APIC: Switch to virtual wire mode setup\n");
   1431		break;
   1432	case APIC_VIRTUAL_WIRE_NO_CONFIG:
   1433		pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
   1434		upmode = true;
   1435		break;
   1436	case APIC_SYMMETRIC_IO:
   1437		pr_info("APIC: Switch to symmetric I/O mode setup\n");
   1438		break;
   1439	case APIC_SYMMETRIC_IO_NO_ROUTING:
   1440		pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
   1441		break;
   1442	}
   1443
   1444	default_setup_apic_routing();
   1445
   1446	if (x86_platform.apic_post_init)
   1447		x86_platform.apic_post_init();
   1448
   1449	apic_bsp_setup(upmode);
   1450}
   1451
   1452static void lapic_setup_esr(void)
   1453{
   1454	unsigned int oldvalue, value, maxlvt;
   1455
   1456	if (!lapic_is_integrated()) {
   1457		pr_info("No ESR for 82489DX.\n");
   1458		return;
   1459	}
   1460
   1461	if (apic->disable_esr) {
   1462		/*
   1463		 * Something untraceable is creating bad interrupts on
   1464		 * secondary quads ... for the moment, just leave the
   1465		 * ESR disabled - we can't do anything useful with the
   1466		 * errors anyway - mbligh
   1467		 */
   1468		pr_info("Leaving ESR disabled.\n");
   1469		return;
   1470	}
   1471
   1472	maxlvt = lapic_get_maxlvt();
   1473	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
   1474		apic_write(APIC_ESR, 0);
   1475	oldvalue = apic_read(APIC_ESR);
   1476
   1477	/* enables sending errors */
   1478	value = ERROR_APIC_VECTOR;
   1479	apic_write(APIC_LVTERR, value);
   1480
   1481	/*
   1482	 * spec says clear errors after enabling vector.
   1483	 */
   1484	if (maxlvt > 3)
   1485		apic_write(APIC_ESR, 0);
   1486	value = apic_read(APIC_ESR);
   1487	if (value != oldvalue)
   1488		apic_printk(APIC_VERBOSE, "ESR value before enabling "
   1489			"vector: 0x%08x  after: 0x%08x\n",
   1490			oldvalue, value);
   1491}
   1492
   1493#define APIC_IR_REGS		APIC_ISR_NR
   1494#define APIC_IR_BITS		(APIC_IR_REGS * 32)
   1495#define APIC_IR_MAPSIZE		(APIC_IR_BITS / BITS_PER_LONG)
   1496
   1497union apic_ir {
   1498	unsigned long	map[APIC_IR_MAPSIZE];
   1499	u32		regs[APIC_IR_REGS];
   1500};
   1501
   1502static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
   1503{
   1504	int i, bit;
   1505
   1506	/* Read the IRRs */
   1507	for (i = 0; i < APIC_IR_REGS; i++)
   1508		irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
   1509
   1510	/* Read the ISRs */
   1511	for (i = 0; i < APIC_IR_REGS; i++)
   1512		isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
   1513
   1514	/*
   1515	 * If the ISR map is not empty. ACK the APIC and run another round
   1516	 * to verify whether a pending IRR has been unblocked and turned
   1517	 * into a ISR.
   1518	 */
   1519	if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
   1520		/*
   1521		 * There can be multiple ISR bits set when a high priority
   1522		 * interrupt preempted a lower priority one. Issue an ACK
   1523		 * per set bit.
   1524		 */
   1525		for_each_set_bit(bit, isr->map, APIC_IR_BITS)
   1526			ack_APIC_irq();
   1527		return true;
   1528	}
   1529
   1530	return !bitmap_empty(irr->map, APIC_IR_BITS);
   1531}
   1532
   1533/*
   1534 * After a crash, we no longer service the interrupts and a pending
   1535 * interrupt from previous kernel might still have ISR bit set.
   1536 *
   1537 * Most probably by now the CPU has serviced that pending interrupt and it
   1538 * might not have done the ack_APIC_irq() because it thought, interrupt
   1539 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
   1540 * the ISR bit and cpu thinks it has already serviced the interrupt. Hence
   1541 * a vector might get locked. It was noticed for timer irq (vector
   1542 * 0x31). Issue an extra EOI to clear ISR.
   1543 *
   1544 * If there are pending IRR bits they turn into ISR bits after a higher
   1545 * priority ISR bit has been acked.
   1546 */
   1547static void apic_pending_intr_clear(void)
   1548{
   1549	union apic_ir irr, isr;
   1550	unsigned int i;
   1551
   1552	/* 512 loops are way oversized and give the APIC a chance to obey. */
   1553	for (i = 0; i < 512; i++) {
   1554		if (!apic_check_and_ack(&irr, &isr))
   1555			return;
   1556	}
   1557	/* Dump the IRR/ISR content if that failed */
   1558	pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
   1559}
   1560
   1561/**
   1562 * setup_local_APIC - setup the local APIC
   1563 *
   1564 * Used to setup local APIC while initializing BSP or bringing up APs.
   1565 * Always called with preemption disabled.
   1566 */
   1567static void setup_local_APIC(void)
   1568{
   1569	int cpu = smp_processor_id();
   1570	unsigned int value;
   1571
   1572	if (disable_apic) {
   1573		disable_ioapic_support();
   1574		return;
   1575	}
   1576
   1577	/*
   1578	 * If this comes from kexec/kcrash the APIC might be enabled in
   1579	 * SPIV. Soft disable it before doing further initialization.
   1580	 */
   1581	value = apic_read(APIC_SPIV);
   1582	value &= ~APIC_SPIV_APIC_ENABLED;
   1583	apic_write(APIC_SPIV, value);
   1584
   1585#ifdef CONFIG_X86_32
   1586	/* Pound the ESR really hard over the head with a big hammer - mbligh */
   1587	if (lapic_is_integrated() && apic->disable_esr) {
   1588		apic_write(APIC_ESR, 0);
   1589		apic_write(APIC_ESR, 0);
   1590		apic_write(APIC_ESR, 0);
   1591		apic_write(APIC_ESR, 0);
   1592	}
   1593#endif
   1594	/*
   1595	 * Double-check whether this APIC is really registered.
   1596	 * This is meaningless in clustered apic mode, so we skip it.
   1597	 */
   1598	BUG_ON(!apic->apic_id_registered());
   1599
   1600	/*
   1601	 * Intel recommends to set DFR, LDR and TPR before enabling
   1602	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
   1603	 * document number 292116).  So here it goes...
   1604	 */
   1605	apic->init_apic_ldr();
   1606
   1607#ifdef CONFIG_X86_32
   1608	if (apic->dest_mode_logical) {
   1609		int logical_apicid, ldr_apicid;
   1610
   1611		/*
   1612		 * APIC LDR is initialized.  If logical_apicid mapping was
   1613		 * initialized during get_smp_config(), make sure it matches
   1614		 * the actual value.
   1615		 */
   1616		logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
   1617		ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
   1618		if (logical_apicid != BAD_APICID)
   1619			WARN_ON(logical_apicid != ldr_apicid);
   1620		/* Always use the value from LDR. */
   1621		early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
   1622	}
   1623#endif
   1624
   1625	/*
   1626	 * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
   1627	 * vector in the 16-31 range could be delivered if TPR == 0, but we
   1628	 * would think it's an exception and terrible things will happen.  We
   1629	 * never change this later on.
   1630	 */
   1631	value = apic_read(APIC_TASKPRI);
   1632	value &= ~APIC_TPRI_MASK;
   1633	value |= 0x10;
   1634	apic_write(APIC_TASKPRI, value);
   1635
   1636	/* Clear eventually stale ISR/IRR bits */
   1637	apic_pending_intr_clear();
   1638
   1639	/*
   1640	 * Now that we are all set up, enable the APIC
   1641	 */
   1642	value = apic_read(APIC_SPIV);
   1643	value &= ~APIC_VECTOR_MASK;
   1644	/*
   1645	 * Enable APIC
   1646	 */
   1647	value |= APIC_SPIV_APIC_ENABLED;
   1648
   1649#ifdef CONFIG_X86_32
   1650	/*
   1651	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
   1652	 * certain networking cards. If high frequency interrupts are
   1653	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
   1654	 * entry is masked/unmasked at a high rate as well then sooner or
   1655	 * later IOAPIC line gets 'stuck', no more interrupts are received
   1656	 * from the device. If focus CPU is disabled then the hang goes
   1657	 * away, oh well :-(
   1658	 *
   1659	 * [ This bug can be reproduced easily with a level-triggered
   1660	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
   1661	 *   BX chipset. ]
   1662	 */
   1663	/*
   1664	 * Actually disabling the focus CPU check just makes the hang less
   1665	 * frequent as it makes the interrupt distribution model be more
   1666	 * like LRU than MRU (the short-term load is more even across CPUs).
   1667	 */
   1668
   1669	/*
   1670	 * - enable focus processor (bit==0)
   1671	 * - 64bit mode always use processor focus
   1672	 *   so no need to set it
   1673	 */
   1674	value &= ~APIC_SPIV_FOCUS_DISABLED;
   1675#endif
   1676
   1677	/*
   1678	 * Set spurious IRQ vector
   1679	 */
   1680	value |= SPURIOUS_APIC_VECTOR;
   1681	apic_write(APIC_SPIV, value);
   1682
   1683	perf_events_lapic_init();
   1684
   1685	/*
   1686	 * Set up LVT0, LVT1:
   1687	 *
   1688	 * set up through-local-APIC on the boot CPU's LINT0. This is not
   1689	 * strictly necessary in pure symmetric-IO mode, but sometimes
   1690	 * we delegate interrupts to the 8259A.
   1691	 */
   1692	/*
   1693	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
   1694	 */
   1695	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
   1696	if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
   1697		value = APIC_DM_EXTINT;
   1698		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
   1699	} else {
   1700		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
   1701		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
   1702	}
   1703	apic_write(APIC_LVT0, value);
   1704
   1705	/*
   1706	 * Only the BSP sees the LINT1 NMI signal by default. This can be
   1707	 * modified by apic_extnmi= boot option.
   1708	 */
   1709	if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
   1710	    apic_extnmi == APIC_EXTNMI_ALL)
   1711		value = APIC_DM_NMI;
   1712	else
   1713		value = APIC_DM_NMI | APIC_LVT_MASKED;
   1714
   1715	/* Is 82489DX ? */
   1716	if (!lapic_is_integrated())
   1717		value |= APIC_LVT_LEVEL_TRIGGER;
   1718	apic_write(APIC_LVT1, value);
   1719
   1720#ifdef CONFIG_X86_MCE_INTEL
   1721	/* Recheck CMCI information after local APIC is up on CPU #0 */
   1722	if (!cpu)
   1723		cmci_recheck();
   1724#endif
   1725}
   1726
   1727static void end_local_APIC_setup(void)
   1728{
   1729	lapic_setup_esr();
   1730
   1731#ifdef CONFIG_X86_32
   1732	{
   1733		unsigned int value;
   1734		/* Disable the local apic timer */
   1735		value = apic_read(APIC_LVTT);
   1736		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
   1737		apic_write(APIC_LVTT, value);
   1738	}
   1739#endif
   1740
   1741	apic_pm_activate();
   1742}
   1743
   1744/*
   1745 * APIC setup function for application processors. Called from smpboot.c
   1746 */
   1747void apic_ap_setup(void)
   1748{
   1749	setup_local_APIC();
   1750	end_local_APIC_setup();
   1751}
   1752
   1753#ifdef CONFIG_X86_X2APIC
   1754int x2apic_mode;
   1755EXPORT_SYMBOL_GPL(x2apic_mode);
   1756
   1757enum {
   1758	X2APIC_OFF,
   1759	X2APIC_ON,
   1760	X2APIC_DISABLED,
   1761};
   1762static int x2apic_state;
   1763
   1764static void __x2apic_disable(void)
   1765{
   1766	u64 msr;
   1767
   1768	if (!boot_cpu_has(X86_FEATURE_APIC))
   1769		return;
   1770
   1771	rdmsrl(MSR_IA32_APICBASE, msr);
   1772	if (!(msr & X2APIC_ENABLE))
   1773		return;
   1774	/* Disable xapic and x2apic first and then reenable xapic mode */
   1775	wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
   1776	wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
   1777	printk_once(KERN_INFO "x2apic disabled\n");
   1778}
   1779
   1780static void __x2apic_enable(void)
   1781{
   1782	u64 msr;
   1783
   1784	rdmsrl(MSR_IA32_APICBASE, msr);
   1785	if (msr & X2APIC_ENABLE)
   1786		return;
   1787	wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
   1788	printk_once(KERN_INFO "x2apic enabled\n");
   1789}
   1790
   1791static int __init setup_nox2apic(char *str)
   1792{
   1793	if (x2apic_enabled()) {
   1794		int apicid = native_apic_msr_read(APIC_ID);
   1795
   1796		if (apicid >= 255) {
   1797			pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
   1798				apicid);
   1799			return 0;
   1800		}
   1801		pr_warn("x2apic already enabled.\n");
   1802		__x2apic_disable();
   1803	}
   1804	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
   1805	x2apic_state = X2APIC_DISABLED;
   1806	x2apic_mode = 0;
   1807	return 0;
   1808}
   1809early_param("nox2apic", setup_nox2apic);
   1810
   1811/* Called from cpu_init() to enable x2apic on (secondary) cpus */
   1812void x2apic_setup(void)
   1813{
   1814	/*
   1815	 * If x2apic is not in ON state, disable it if already enabled
   1816	 * from BIOS.
   1817	 */
   1818	if (x2apic_state != X2APIC_ON) {
   1819		__x2apic_disable();
   1820		return;
   1821	}
   1822	__x2apic_enable();
   1823}
   1824
   1825static __init void x2apic_disable(void)
   1826{
   1827	u32 x2apic_id, state = x2apic_state;
   1828
   1829	x2apic_mode = 0;
   1830	x2apic_state = X2APIC_DISABLED;
   1831
   1832	if (state != X2APIC_ON)
   1833		return;
   1834
   1835	x2apic_id = read_apic_id();
   1836	if (x2apic_id >= 255)
   1837		panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
   1838
   1839	__x2apic_disable();
   1840	register_lapic_address(mp_lapic_addr);
   1841}
   1842
   1843static __init void x2apic_enable(void)
   1844{
   1845	if (x2apic_state != X2APIC_OFF)
   1846		return;
   1847
   1848	x2apic_mode = 1;
   1849	x2apic_state = X2APIC_ON;
   1850	__x2apic_enable();
   1851}
   1852
   1853static __init void try_to_enable_x2apic(int remap_mode)
   1854{
   1855	if (x2apic_state == X2APIC_DISABLED)
   1856		return;
   1857
   1858	if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
   1859		u32 apic_limit = 255;
   1860
   1861		/*
   1862		 * Using X2APIC without IR is not architecturally supported
   1863		 * on bare metal but may be supported in guests.
   1864		 */
   1865		if (!x86_init.hyper.x2apic_available()) {
   1866			pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
   1867			x2apic_disable();
   1868			return;
   1869		}
   1870
   1871		/*
   1872		 * If the hypervisor supports extended destination ID in
   1873		 * MSI, that increases the maximum APIC ID that can be
   1874		 * used for non-remapped IRQ domains.
   1875		 */
   1876		if (x86_init.hyper.msi_ext_dest_id()) {
   1877			virt_ext_dest_id = 1;
   1878			apic_limit = 32767;
   1879		}
   1880
   1881		/*
   1882		 * Without IR, all CPUs can be addressed by IOAPIC/MSI only
   1883		 * in physical mode, and CPUs with an APIC ID that cannot
   1884		 * be addressed must not be brought online.
   1885		 */
   1886		x2apic_set_max_apicid(apic_limit);
   1887		x2apic_phys = 1;
   1888	}
   1889	x2apic_enable();
   1890}
   1891
   1892void __init check_x2apic(void)
   1893{
   1894	if (x2apic_enabled()) {
   1895		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
   1896		x2apic_mode = 1;
   1897		x2apic_state = X2APIC_ON;
   1898	} else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
   1899		x2apic_state = X2APIC_DISABLED;
   1900	}
   1901}
   1902#else /* CONFIG_X86_X2APIC */
   1903static int __init validate_x2apic(void)
   1904{
   1905	if (!apic_is_x2apic_enabled())
   1906		return 0;
   1907	/*
   1908	 * Checkme: Can we simply turn off x2apic here instead of panic?
   1909	 */
   1910	panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
   1911}
   1912early_initcall(validate_x2apic);
   1913
   1914static inline void try_to_enable_x2apic(int remap_mode) { }
   1915static inline void __x2apic_enable(void) { }
   1916#endif /* !CONFIG_X86_X2APIC */
   1917
   1918void __init enable_IR_x2apic(void)
   1919{
   1920	unsigned long flags;
   1921	int ret, ir_stat;
   1922
   1923	if (skip_ioapic_setup) {
   1924		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
   1925		return;
   1926	}
   1927
   1928	ir_stat = irq_remapping_prepare();
   1929	if (ir_stat < 0 && !x2apic_supported())
   1930		return;
   1931
   1932	ret = save_ioapic_entries();
   1933	if (ret) {
   1934		pr_info("Saving IO-APIC state failed: %d\n", ret);
   1935		return;
   1936	}
   1937
   1938	local_irq_save(flags);
   1939	legacy_pic->mask_all();
   1940	mask_ioapic_entries();
   1941
   1942	/* If irq_remapping_prepare() succeeded, try to enable it */
   1943	if (ir_stat >= 0)
   1944		ir_stat = irq_remapping_enable();
   1945	/* ir_stat contains the remap mode or an error code */
   1946	try_to_enable_x2apic(ir_stat);
   1947
   1948	if (ir_stat < 0)
   1949		restore_ioapic_entries();
   1950	legacy_pic->restore_mask();
   1951	local_irq_restore(flags);
   1952}
   1953
   1954#ifdef CONFIG_X86_64
   1955/*
   1956 * Detect and enable local APICs on non-SMP boards.
   1957 * Original code written by Keir Fraser.
   1958 * On AMD64 we trust the BIOS - if it says no APIC it is likely
   1959 * not correctly set up (usually the APIC timer won't work etc.)
   1960 */
   1961static int __init detect_init_APIC(void)
   1962{
   1963	if (!boot_cpu_has(X86_FEATURE_APIC)) {
   1964		pr_info("No local APIC present\n");
   1965		return -1;
   1966	}
   1967
   1968	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
   1969	return 0;
   1970}
   1971#else
   1972
   1973static int __init apic_verify(void)
   1974{
   1975	u32 features, h, l;
   1976
   1977	/*
   1978	 * The APIC feature bit should now be enabled
   1979	 * in `cpuid'
   1980	 */
   1981	features = cpuid_edx(1);
   1982	if (!(features & (1 << X86_FEATURE_APIC))) {
   1983		pr_warn("Could not enable APIC!\n");
   1984		return -1;
   1985	}
   1986	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
   1987	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
   1988
   1989	/* The BIOS may have set up the APIC at some other address */
   1990	if (boot_cpu_data.x86 >= 6) {
   1991		rdmsr(MSR_IA32_APICBASE, l, h);
   1992		if (l & MSR_IA32_APICBASE_ENABLE)
   1993			mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
   1994	}
   1995
   1996	pr_info("Found and enabled local APIC!\n");
   1997	return 0;
   1998}
   1999
   2000int __init apic_force_enable(unsigned long addr)
   2001{
   2002	u32 h, l;
   2003
   2004	if (disable_apic)
   2005		return -1;
   2006
   2007	/*
   2008	 * Some BIOSes disable the local APIC in the APIC_BASE
   2009	 * MSR. This can only be done in software for Intel P6 or later
   2010	 * and AMD K7 (Model > 1) or later.
   2011	 */
   2012	if (boot_cpu_data.x86 >= 6) {
   2013		rdmsr(MSR_IA32_APICBASE, l, h);
   2014		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
   2015			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
   2016			l &= ~MSR_IA32_APICBASE_BASE;
   2017			l |= MSR_IA32_APICBASE_ENABLE | addr;
   2018			wrmsr(MSR_IA32_APICBASE, l, h);
   2019			enabled_via_apicbase = 1;
   2020		}
   2021	}
   2022	return apic_verify();
   2023}
   2024
   2025/*
   2026 * Detect and initialize APIC
   2027 */
   2028static int __init detect_init_APIC(void)
   2029{
   2030	/* Disabled by kernel option? */
   2031	if (disable_apic)
   2032		return -1;
   2033
   2034	switch (boot_cpu_data.x86_vendor) {
   2035	case X86_VENDOR_AMD:
   2036		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
   2037		    (boot_cpu_data.x86 >= 15))
   2038			break;
   2039		goto no_apic;
   2040	case X86_VENDOR_HYGON:
   2041		break;
   2042	case X86_VENDOR_INTEL:
   2043		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
   2044		    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
   2045			break;
   2046		goto no_apic;
   2047	default:
   2048		goto no_apic;
   2049	}
   2050
   2051	if (!boot_cpu_has(X86_FEATURE_APIC)) {
   2052		/*
   2053		 * Over-ride BIOS and try to enable the local APIC only if
   2054		 * "lapic" specified.
   2055		 */
   2056		if (!force_enable_local_apic) {
   2057			pr_info("Local APIC disabled by BIOS -- "
   2058				"you can enable it with \"lapic\"\n");
   2059			return -1;
   2060		}
   2061		if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
   2062			return -1;
   2063	} else {
   2064		if (apic_verify())
   2065			return -1;
   2066	}
   2067
   2068	apic_pm_activate();
   2069
   2070	return 0;
   2071
   2072no_apic:
   2073	pr_info("No local APIC present or hardware disabled\n");
   2074	return -1;
   2075}
   2076#endif
   2077
   2078/**
   2079 * init_apic_mappings - initialize APIC mappings
   2080 */
   2081void __init init_apic_mappings(void)
   2082{
   2083	unsigned int new_apicid;
   2084
   2085	if (apic_validate_deadline_timer())
   2086		pr_info("TSC deadline timer available\n");
   2087
   2088	if (x2apic_mode) {
   2089		boot_cpu_physical_apicid = read_apic_id();
   2090		return;
   2091	}
   2092
   2093	/* If no local APIC can be found return early */
   2094	if (!smp_found_config && detect_init_APIC()) {
   2095		/* lets NOP'ify apic operations */
   2096		pr_info("APIC: disable apic facility\n");
   2097		apic_disable();
   2098	} else {
   2099		apic_phys = mp_lapic_addr;
   2100
   2101		/*
   2102		 * If the system has ACPI MADT tables or MP info, the LAPIC
   2103		 * address is already registered.
   2104		 */
   2105		if (!acpi_lapic && !smp_found_config)
   2106			register_lapic_address(apic_phys);
   2107	}
   2108
   2109	/*
   2110	 * Fetch the APIC ID of the BSP in case we have a
   2111	 * default configuration (or the MP table is broken).
   2112	 */
   2113	new_apicid = read_apic_id();
   2114	if (boot_cpu_physical_apicid != new_apicid) {
   2115		boot_cpu_physical_apicid = new_apicid;
   2116		/*
   2117		 * yeah -- we lie about apic_version
   2118		 * in case if apic was disabled via boot option
   2119		 * but it's not a problem for SMP compiled kernel
   2120		 * since apic_intr_mode_select is prepared for such
   2121		 * a case and disable smp mode
   2122		 */
   2123		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
   2124	}
   2125}
   2126
   2127void __init register_lapic_address(unsigned long address)
   2128{
   2129	mp_lapic_addr = address;
   2130
   2131	if (!x2apic_mode) {
   2132		set_fixmap_nocache(FIX_APIC_BASE, address);
   2133		apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
   2134			    APIC_BASE, address);
   2135	}
   2136	if (boot_cpu_physical_apicid == -1U) {
   2137		boot_cpu_physical_apicid  = read_apic_id();
   2138		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
   2139	}
   2140}
   2141
   2142/*
   2143 * Local APIC interrupts
   2144 */
   2145
   2146/*
   2147 * Common handling code for spurious_interrupt and spurious_vector entry
   2148 * points below. No point in allowing the compiler to inline it twice.
   2149 */
   2150static noinline void handle_spurious_interrupt(u8 vector)
   2151{
   2152	u32 v;
   2153
   2154	trace_spurious_apic_entry(vector);
   2155
   2156	inc_irq_stat(irq_spurious_count);
   2157
   2158	/*
   2159	 * If this is a spurious interrupt then do not acknowledge
   2160	 */
   2161	if (vector == SPURIOUS_APIC_VECTOR) {
   2162		/* See SDM vol 3 */
   2163		pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
   2164			smp_processor_id());
   2165		goto out;
   2166	}
   2167
   2168	/*
   2169	 * If it is a vectored one, verify it's set in the ISR. If set,
   2170	 * acknowledge it.
   2171	 */
   2172	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
   2173	if (v & (1 << (vector & 0x1f))) {
   2174		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
   2175			vector, smp_processor_id());
   2176		ack_APIC_irq();
   2177	} else {
   2178		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
   2179			vector, smp_processor_id());
   2180	}
   2181out:
   2182	trace_spurious_apic_exit(vector);
   2183}
   2184
   2185/**
   2186 * spurious_interrupt - Catch all for interrupts raised on unused vectors
   2187 * @regs:	Pointer to pt_regs on stack
   2188 * @vector:	The vector number
   2189 *
   2190 * This is invoked from ASM entry code to catch all interrupts which
   2191 * trigger on an entry which is routed to the common_spurious idtentry
   2192 * point.
   2193 */
   2194DEFINE_IDTENTRY_IRQ(spurious_interrupt)
   2195{
   2196	handle_spurious_interrupt(vector);
   2197}
   2198
   2199DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
   2200{
   2201	handle_spurious_interrupt(SPURIOUS_APIC_VECTOR);
   2202}
   2203
   2204/*
   2205 * This interrupt should never happen with our APIC/SMP architecture
   2206 */
   2207DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
   2208{
   2209	static const char * const error_interrupt_reason[] = {
   2210		"Send CS error",		/* APIC Error Bit 0 */
   2211		"Receive CS error",		/* APIC Error Bit 1 */
   2212		"Send accept error",		/* APIC Error Bit 2 */
   2213		"Receive accept error",		/* APIC Error Bit 3 */
   2214		"Redirectable IPI",		/* APIC Error Bit 4 */
   2215		"Send illegal vector",		/* APIC Error Bit 5 */
   2216		"Received illegal vector",	/* APIC Error Bit 6 */
   2217		"Illegal register address",	/* APIC Error Bit 7 */
   2218	};
   2219	u32 v, i = 0;
   2220
   2221	trace_error_apic_entry(ERROR_APIC_VECTOR);
   2222
   2223	/* First tickle the hardware, only then report what went on. -- REW */
   2224	if (lapic_get_maxlvt() > 3)	/* Due to the Pentium erratum 3AP. */
   2225		apic_write(APIC_ESR, 0);
   2226	v = apic_read(APIC_ESR);
   2227	ack_APIC_irq();
   2228	atomic_inc(&irq_err_count);
   2229
   2230	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
   2231		    smp_processor_id(), v);
   2232
   2233	v &= 0xff;
   2234	while (v) {
   2235		if (v & 0x1)
   2236			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
   2237		i++;
   2238		v >>= 1;
   2239	}
   2240
   2241	apic_printk(APIC_DEBUG, KERN_CONT "\n");
   2242
   2243	trace_error_apic_exit(ERROR_APIC_VECTOR);
   2244}
   2245
   2246/**
   2247 * connect_bsp_APIC - attach the APIC to the interrupt system
   2248 */
   2249static void __init connect_bsp_APIC(void)
   2250{
   2251#ifdef CONFIG_X86_32
   2252	if (pic_mode) {
   2253		/*
   2254		 * Do not trust the local APIC being empty at bootup.
   2255		 */
   2256		clear_local_APIC();
   2257		/*
   2258		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
   2259		 * local APIC to INT and NMI lines.
   2260		 */
   2261		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
   2262				"enabling APIC mode.\n");
   2263		imcr_pic_to_apic();
   2264	}
   2265#endif
   2266}
   2267
   2268/**
   2269 * disconnect_bsp_APIC - detach the APIC from the interrupt system
   2270 * @virt_wire_setup:	indicates, whether virtual wire mode is selected
   2271 *
   2272 * Virtual wire mode is necessary to deliver legacy interrupts even when the
   2273 * APIC is disabled.
   2274 */
   2275void disconnect_bsp_APIC(int virt_wire_setup)
   2276{
   2277	unsigned int value;
   2278
   2279#ifdef CONFIG_X86_32
   2280	if (pic_mode) {
   2281		/*
   2282		 * Put the board back into PIC mode (has an effect only on
   2283		 * certain older boards).  Note that APIC interrupts, including
   2284		 * IPIs, won't work beyond this point!  The only exception are
   2285		 * INIT IPIs.
   2286		 */
   2287		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
   2288				"entering PIC mode.\n");
   2289		imcr_apic_to_pic();
   2290		return;
   2291	}
   2292#endif
   2293
   2294	/* Go back to Virtual Wire compatibility mode */
   2295
   2296	/* For the spurious interrupt use vector F, and enable it */
   2297	value = apic_read(APIC_SPIV);
   2298	value &= ~APIC_VECTOR_MASK;
   2299	value |= APIC_SPIV_APIC_ENABLED;
   2300	value |= 0xf;
   2301	apic_write(APIC_SPIV, value);
   2302
   2303	if (!virt_wire_setup) {
   2304		/*
   2305		 * For LVT0 make it edge triggered, active high,
   2306		 * external and enabled
   2307		 */
   2308		value = apic_read(APIC_LVT0);
   2309		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
   2310			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
   2311			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
   2312		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
   2313		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
   2314		apic_write(APIC_LVT0, value);
   2315	} else {
   2316		/* Disable LVT0 */
   2317		apic_write(APIC_LVT0, APIC_LVT_MASKED);
   2318	}
   2319
   2320	/*
   2321	 * For LVT1 make it edge triggered, active high,
   2322	 * nmi and enabled
   2323	 */
   2324	value = apic_read(APIC_LVT1);
   2325	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
   2326			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
   2327			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
   2328	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
   2329	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
   2330	apic_write(APIC_LVT1, value);
   2331}
   2332
   2333/*
   2334 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
   2335 * contiguously, it equals to current allocated max logical CPU ID plus 1.
   2336 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
   2337 * so the maximum of nr_logical_cpuids is nr_cpu_ids.
   2338 *
   2339 * NOTE: Reserve 0 for BSP.
   2340 */
   2341static int nr_logical_cpuids = 1;
   2342
   2343/*
   2344 * Used to store mapping between logical CPU IDs and APIC IDs.
   2345 */
   2346static int cpuid_to_apicid[] = {
   2347	[0 ... NR_CPUS - 1] = -1,
   2348};
   2349
   2350bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
   2351{
   2352	return phys_id == cpuid_to_apicid[cpu];
   2353}
   2354
   2355#ifdef CONFIG_SMP
   2356/**
   2357 * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
   2358 * @apicid: APIC ID to check
   2359 */
   2360bool apic_id_is_primary_thread(unsigned int apicid)
   2361{
   2362	u32 mask;
   2363
   2364	if (smp_num_siblings == 1)
   2365		return true;
   2366	/* Isolate the SMT bit(s) in the APICID and check for 0 */
   2367	mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
   2368	return !(apicid & mask);
   2369}
   2370#endif
   2371
   2372/*
   2373 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
   2374 * and cpuid_to_apicid[] synchronized.
   2375 */
   2376static int allocate_logical_cpuid(int apicid)
   2377{
   2378	int i;
   2379
   2380	/*
   2381	 * cpuid <-> apicid mapping is persistent, so when a cpu is up,
   2382	 * check if the kernel has allocated a cpuid for it.
   2383	 */
   2384	for (i = 0; i < nr_logical_cpuids; i++) {
   2385		if (cpuid_to_apicid[i] == apicid)
   2386			return i;
   2387	}
   2388
   2389	/* Allocate a new cpuid. */
   2390	if (nr_logical_cpuids >= nr_cpu_ids) {
   2391		WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
   2392			     "Processor %d/0x%x and the rest are ignored.\n",
   2393			     nr_cpu_ids, nr_logical_cpuids, apicid);
   2394		return -EINVAL;
   2395	}
   2396
   2397	cpuid_to_apicid[nr_logical_cpuids] = apicid;
   2398	return nr_logical_cpuids++;
   2399}
   2400
   2401int generic_processor_info(int apicid, int version)
   2402{
   2403	int cpu, max = nr_cpu_ids;
   2404	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
   2405				phys_cpu_present_map);
   2406
   2407	/*
   2408	 * boot_cpu_physical_apicid is designed to have the apicid
   2409	 * returned by read_apic_id(), i.e, the apicid of the
   2410	 * currently booting-up processor. However, on some platforms,
   2411	 * it is temporarily modified by the apicid reported as BSP
   2412	 * through MP table. Concretely:
   2413	 *
   2414	 * - arch/x86/kernel/mpparse.c: MP_processor_info()
   2415	 * - arch/x86/mm/amdtopology.c: amd_numa_init()
   2416	 *
   2417	 * This function is executed with the modified
   2418	 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
   2419	 * parameter doesn't work to disable APs on kdump 2nd kernel.
   2420	 *
   2421	 * Since fixing handling of boot_cpu_physical_apicid requires
   2422	 * another discussion and tests on each platform, we leave it
   2423	 * for now and here we use read_apic_id() directly in this
   2424	 * function, generic_processor_info().
   2425	 */
   2426	if (disabled_cpu_apicid != BAD_APICID &&
   2427	    disabled_cpu_apicid != read_apic_id() &&
   2428	    disabled_cpu_apicid == apicid) {
   2429		int thiscpu = num_processors + disabled_cpus;
   2430
   2431		pr_warn("APIC: Disabling requested cpu."
   2432			" Processor %d/0x%x ignored.\n", thiscpu, apicid);
   2433
   2434		disabled_cpus++;
   2435		return -ENODEV;
   2436	}
   2437
   2438	/*
   2439	 * If boot cpu has not been detected yet, then only allow upto
   2440	 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
   2441	 */
   2442	if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
   2443	    apicid != boot_cpu_physical_apicid) {
   2444		int thiscpu = max + disabled_cpus - 1;
   2445
   2446		pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost"
   2447			" reached. Keeping one slot for boot cpu."
   2448			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
   2449
   2450		disabled_cpus++;
   2451		return -ENODEV;
   2452	}
   2453
   2454	if (num_processors >= nr_cpu_ids) {
   2455		int thiscpu = max + disabled_cpus;
   2456
   2457		pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
   2458			"Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
   2459
   2460		disabled_cpus++;
   2461		return -EINVAL;
   2462	}
   2463
   2464	if (apicid == boot_cpu_physical_apicid) {
   2465		/*
   2466		 * x86_bios_cpu_apicid is required to have processors listed
   2467		 * in same order as logical cpu numbers. Hence the first
   2468		 * entry is BSP, and so on.
   2469		 * boot_cpu_init() already hold bit 0 in cpu_present_mask
   2470		 * for BSP.
   2471		 */
   2472		cpu = 0;
   2473
   2474		/* Logical cpuid 0 is reserved for BSP. */
   2475		cpuid_to_apicid[0] = apicid;
   2476	} else {
   2477		cpu = allocate_logical_cpuid(apicid);
   2478		if (cpu < 0) {
   2479			disabled_cpus++;
   2480			return -EINVAL;
   2481		}
   2482	}
   2483
   2484	/*
   2485	 * Validate version
   2486	 */
   2487	if (version == 0x0) {
   2488		pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
   2489			cpu, apicid);
   2490		version = 0x10;
   2491	}
   2492
   2493	if (version != boot_cpu_apic_version) {
   2494		pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
   2495			boot_cpu_apic_version, cpu, version);
   2496	}
   2497
   2498	if (apicid > max_physical_apicid)
   2499		max_physical_apicid = apicid;
   2500
   2501#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
   2502	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
   2503	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
   2504#endif
   2505#ifdef CONFIG_X86_32
   2506	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
   2507		apic->x86_32_early_logical_apicid(cpu);
   2508#endif
   2509	set_cpu_possible(cpu, true);
   2510	physid_set(apicid, phys_cpu_present_map);
   2511	set_cpu_present(cpu, true);
   2512	num_processors++;
   2513
   2514	return cpu;
   2515}
   2516
   2517int hard_smp_processor_id(void)
   2518{
   2519	return read_apic_id();
   2520}
   2521
   2522void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
   2523			   bool dmar)
   2524{
   2525	memset(msg, 0, sizeof(*msg));
   2526
   2527	msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
   2528	msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical;
   2529	msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF;
   2530
   2531	msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED;
   2532	msg->arch_data.vector = cfg->vector;
   2533
   2534	msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
   2535	/*
   2536	 * Only the IOMMU itself can use the trick of putting destination
   2537	 * APIC ID into the high bits of the address. Anything else would
   2538	 * just be writing to memory if it tried that, and needs IR to
   2539	 * address APICs which can't be addressed in the normal 32-bit
   2540	 * address range at 0xFFExxxxx. That is typically just 8 bits, but
   2541	 * some hypervisors allow the extended destination ID field in bits
   2542	 * 5-11 to be used, giving support for 15 bits of APIC IDs in total.
   2543	 */
   2544	if (dmar)
   2545		msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8;
   2546	else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000)
   2547		msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8;
   2548	else
   2549		WARN_ON_ONCE(cfg->dest_apicid > 0xFF);
   2550}
   2551
   2552u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
   2553{
   2554	u32 dest = msg->arch_addr_lo.destid_0_7;
   2555
   2556	if (extid)
   2557		dest |= msg->arch_addr_hi.destid_8_31 << 8;
   2558	return dest;
   2559}
   2560EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
   2561
   2562#ifdef CONFIG_X86_64
   2563void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler)
   2564{
   2565	struct apic **drv;
   2566
   2567	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++)
   2568		(*drv)->wakeup_secondary_cpu_64 = handler;
   2569}
   2570#endif
   2571
   2572/*
   2573 * Override the generic EOI implementation with an optimized version.
   2574 * Only called during early boot when only one CPU is active and with
   2575 * interrupts disabled, so we know this does not race with actual APIC driver
   2576 * use.
   2577 */
   2578void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
   2579{
   2580	struct apic **drv;
   2581
   2582	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
   2583		/* Should happen once for each apic */
   2584		WARN_ON((*drv)->eoi_write == eoi_write);
   2585		(*drv)->native_eoi_write = (*drv)->eoi_write;
   2586		(*drv)->eoi_write = eoi_write;
   2587	}
   2588}
   2589
   2590static void __init apic_bsp_up_setup(void)
   2591{
   2592#ifdef CONFIG_X86_64
   2593	apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid));
   2594#else
   2595	/*
   2596	 * Hack: In case of kdump, after a crash, kernel might be booting
   2597	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
   2598	 * might be zero if read from MP tables. Get it from LAPIC.
   2599	 */
   2600# ifdef CONFIG_CRASH_DUMP
   2601	boot_cpu_physical_apicid = read_apic_id();
   2602# endif
   2603#endif
   2604	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
   2605}
   2606
   2607/**
   2608 * apic_bsp_setup - Setup function for local apic and io-apic
   2609 * @upmode:		Force UP mode (for APIC_init_uniprocessor)
   2610 */
   2611static void __init apic_bsp_setup(bool upmode)
   2612{
   2613	connect_bsp_APIC();
   2614	if (upmode)
   2615		apic_bsp_up_setup();
   2616	setup_local_APIC();
   2617
   2618	enable_IO_APIC();
   2619	end_local_APIC_setup();
   2620	irq_remap_enable_fault_handling();
   2621	setup_IO_APIC();
   2622	lapic_update_legacy_vectors();
   2623}
   2624
   2625#ifdef CONFIG_UP_LATE_INIT
   2626void __init up_late_init(void)
   2627{
   2628	if (apic_intr_mode == APIC_PIC)
   2629		return;
   2630
   2631	/* Setup local timer */
   2632	x86_init.timers.setup_percpu_clockev();
   2633}
   2634#endif
   2635
   2636/*
   2637 * Power management
   2638 */
   2639#ifdef CONFIG_PM
   2640
   2641static struct {
   2642	/*
   2643	 * 'active' is true if the local APIC was enabled by us and
   2644	 * not the BIOS; this signifies that we are also responsible
   2645	 * for disabling it before entering apm/acpi suspend
   2646	 */
   2647	int active;
   2648	/* r/w apic fields */
   2649	unsigned int apic_id;
   2650	unsigned int apic_taskpri;
   2651	unsigned int apic_ldr;
   2652	unsigned int apic_dfr;
   2653	unsigned int apic_spiv;
   2654	unsigned int apic_lvtt;
   2655	unsigned int apic_lvtpc;
   2656	unsigned int apic_lvt0;
   2657	unsigned int apic_lvt1;
   2658	unsigned int apic_lvterr;
   2659	unsigned int apic_tmict;
   2660	unsigned int apic_tdcr;
   2661	unsigned int apic_thmr;
   2662	unsigned int apic_cmci;
   2663} apic_pm_state;
   2664
   2665static int lapic_suspend(void)
   2666{
   2667	unsigned long flags;
   2668	int maxlvt;
   2669
   2670	if (!apic_pm_state.active)
   2671		return 0;
   2672
   2673	maxlvt = lapic_get_maxlvt();
   2674
   2675	apic_pm_state.apic_id = apic_read(APIC_ID);
   2676	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
   2677	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
   2678	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
   2679	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
   2680	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
   2681	if (maxlvt >= 4)
   2682		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
   2683	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
   2684	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
   2685	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
   2686	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
   2687	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
   2688#ifdef CONFIG_X86_THERMAL_VECTOR
   2689	if (maxlvt >= 5)
   2690		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
   2691#endif
   2692#ifdef CONFIG_X86_MCE_INTEL
   2693	if (maxlvt >= 6)
   2694		apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
   2695#endif
   2696
   2697	local_irq_save(flags);
   2698
   2699	/*
   2700	 * Mask IOAPIC before disabling the local APIC to prevent stale IRR
   2701	 * entries on some implementations.
   2702	 */
   2703	mask_ioapic_entries();
   2704
   2705	disable_local_APIC();
   2706
   2707	irq_remapping_disable();
   2708
   2709	local_irq_restore(flags);
   2710	return 0;
   2711}
   2712
   2713static void lapic_resume(void)
   2714{
   2715	unsigned int l, h;
   2716	unsigned long flags;
   2717	int maxlvt;
   2718
   2719	if (!apic_pm_state.active)
   2720		return;
   2721
   2722	local_irq_save(flags);
   2723
   2724	/*
   2725	 * IO-APIC and PIC have their own resume routines.
   2726	 * We just mask them here to make sure the interrupt
   2727	 * subsystem is completely quiet while we enable x2apic
   2728	 * and interrupt-remapping.
   2729	 */
   2730	mask_ioapic_entries();
   2731	legacy_pic->mask_all();
   2732
   2733	if (x2apic_mode) {
   2734		__x2apic_enable();
   2735	} else {
   2736		/*
   2737		 * Make sure the APICBASE points to the right address
   2738		 *
   2739		 * FIXME! This will be wrong if we ever support suspend on
   2740		 * SMP! We'll need to do this as part of the CPU restore!
   2741		 */
   2742		if (boot_cpu_data.x86 >= 6) {
   2743			rdmsr(MSR_IA32_APICBASE, l, h);
   2744			l &= ~MSR_IA32_APICBASE_BASE;
   2745			l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
   2746			wrmsr(MSR_IA32_APICBASE, l, h);
   2747		}
   2748	}
   2749
   2750	maxlvt = lapic_get_maxlvt();
   2751	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
   2752	apic_write(APIC_ID, apic_pm_state.apic_id);
   2753	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
   2754	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
   2755	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
   2756	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
   2757	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
   2758	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
   2759#ifdef CONFIG_X86_THERMAL_VECTOR
   2760	if (maxlvt >= 5)
   2761		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
   2762#endif
   2763#ifdef CONFIG_X86_MCE_INTEL
   2764	if (maxlvt >= 6)
   2765		apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
   2766#endif
   2767	if (maxlvt >= 4)
   2768		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
   2769	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
   2770	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
   2771	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
   2772	apic_write(APIC_ESR, 0);
   2773	apic_read(APIC_ESR);
   2774	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
   2775	apic_write(APIC_ESR, 0);
   2776	apic_read(APIC_ESR);
   2777
   2778	irq_remapping_reenable(x2apic_mode);
   2779
   2780	local_irq_restore(flags);
   2781}
   2782
   2783/*
   2784 * This device has no shutdown method - fully functioning local APICs
   2785 * are needed on every CPU up until machine_halt/restart/poweroff.
   2786 */
   2787
   2788static struct syscore_ops lapic_syscore_ops = {
   2789	.resume		= lapic_resume,
   2790	.suspend	= lapic_suspend,
   2791};
   2792
   2793static void apic_pm_activate(void)
   2794{
   2795	apic_pm_state.active = 1;
   2796}
   2797
   2798static int __init init_lapic_sysfs(void)
   2799{
   2800	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
   2801	if (boot_cpu_has(X86_FEATURE_APIC))
   2802		register_syscore_ops(&lapic_syscore_ops);
   2803
   2804	return 0;
   2805}
   2806
   2807/* local apic needs to resume before other devices access its registers. */
   2808core_initcall(init_lapic_sysfs);
   2809
   2810#else	/* CONFIG_PM */
   2811
   2812static void apic_pm_activate(void) { }
   2813
   2814#endif	/* CONFIG_PM */
   2815
   2816#ifdef CONFIG_X86_64
   2817
   2818static int multi_checked;
   2819static int multi;
   2820
   2821static int set_multi(const struct dmi_system_id *d)
   2822{
   2823	if (multi)
   2824		return 0;
   2825	pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
   2826	multi = 1;
   2827	return 0;
   2828}
   2829
   2830static const struct dmi_system_id multi_dmi_table[] = {
   2831	{
   2832		.callback = set_multi,
   2833		.ident = "IBM System Summit2",
   2834		.matches = {
   2835			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
   2836			DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
   2837		},
   2838	},
   2839	{}
   2840};
   2841
   2842static void dmi_check_multi(void)
   2843{
   2844	if (multi_checked)
   2845		return;
   2846
   2847	dmi_check_system(multi_dmi_table);
   2848	multi_checked = 1;
   2849}
   2850
   2851/*
   2852 * apic_is_clustered_box() -- Check if we can expect good TSC
   2853 *
   2854 * Thus far, the major user of this is IBM's Summit2 series:
   2855 * Clustered boxes may have unsynced TSC problems if they are
   2856 * multi-chassis.
   2857 * Use DMI to check them
   2858 */
   2859int apic_is_clustered_box(void)
   2860{
   2861	dmi_check_multi();
   2862	return multi;
   2863}
   2864#endif
   2865
   2866/*
   2867 * APIC command line parameters
   2868 */
   2869static int __init setup_disableapic(char *arg)
   2870{
   2871	disable_apic = 1;
   2872	setup_clear_cpu_cap(X86_FEATURE_APIC);
   2873	return 0;
   2874}
   2875early_param("disableapic", setup_disableapic);
   2876
   2877/* same as disableapic, for compatibility */
   2878static int __init setup_nolapic(char *arg)
   2879{
   2880	return setup_disableapic(arg);
   2881}
   2882early_param("nolapic", setup_nolapic);
   2883
   2884static int __init parse_lapic_timer_c2_ok(char *arg)
   2885{
   2886	local_apic_timer_c2_ok = 1;
   2887	return 0;
   2888}
   2889early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
   2890
   2891static int __init parse_disable_apic_timer(char *arg)
   2892{
   2893	disable_apic_timer = 1;
   2894	return 0;
   2895}
   2896early_param("noapictimer", parse_disable_apic_timer);
   2897
   2898static int __init parse_nolapic_timer(char *arg)
   2899{
   2900	disable_apic_timer = 1;
   2901	return 0;
   2902}
   2903early_param("nolapic_timer", parse_nolapic_timer);
   2904
   2905static int __init apic_set_verbosity(char *arg)
   2906{
   2907	if (!arg)  {
   2908#ifdef CONFIG_X86_64
   2909		skip_ioapic_setup = 0;
   2910		return 0;
   2911#endif
   2912		return -EINVAL;
   2913	}
   2914
   2915	if (strcmp("debug", arg) == 0)
   2916		apic_verbosity = APIC_DEBUG;
   2917	else if (strcmp("verbose", arg) == 0)
   2918		apic_verbosity = APIC_VERBOSE;
   2919#ifdef CONFIG_X86_64
   2920	else {
   2921		pr_warn("APIC Verbosity level %s not recognised"
   2922			" use apic=verbose or apic=debug\n", arg);
   2923		return -EINVAL;
   2924	}
   2925#endif
   2926
   2927	return 0;
   2928}
   2929early_param("apic", apic_set_verbosity);
   2930
   2931static int __init lapic_insert_resource(void)
   2932{
   2933	if (!apic_phys)
   2934		return -1;
   2935
   2936	/* Put local APIC into the resource map. */
   2937	lapic_resource.start = apic_phys;
   2938	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
   2939	insert_resource(&iomem_resource, &lapic_resource);
   2940
   2941	return 0;
   2942}
   2943
   2944/*
   2945 * need call insert after e820__reserve_resources()
   2946 * that is using request_resource
   2947 */
   2948late_initcall(lapic_insert_resource);
   2949
   2950static int __init apic_set_disabled_cpu_apicid(char *arg)
   2951{
   2952	if (!arg || !get_option(&arg, &disabled_cpu_apicid))
   2953		return -EINVAL;
   2954
   2955	return 0;
   2956}
   2957early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
   2958
   2959static int __init apic_set_extnmi(char *arg)
   2960{
   2961	if (!arg)
   2962		return -EINVAL;
   2963
   2964	if (!strncmp("all", arg, 3))
   2965		apic_extnmi = APIC_EXTNMI_ALL;
   2966	else if (!strncmp("none", arg, 4))
   2967		apic_extnmi = APIC_EXTNMI_NONE;
   2968	else if (!strncmp("bsp", arg, 3))
   2969		apic_extnmi = APIC_EXTNMI_BSP;
   2970	else {
   2971		pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
   2972		return -EINVAL;
   2973	}
   2974
   2975	return 0;
   2976}
   2977early_param("apic_extnmi", apic_set_extnmi);