intel.c (13189B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Intel specific MCE features. 4 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 5 * Copyright (C) 2008, 2009 Intel Corporation 6 * Author: Andi Kleen 7 */ 8 9#include <linux/gfp.h> 10#include <linux/interrupt.h> 11#include <linux/percpu.h> 12#include <linux/sched.h> 13#include <linux/cpumask.h> 14#include <asm/apic.h> 15#include <asm/cpufeature.h> 16#include <asm/intel-family.h> 17#include <asm/processor.h> 18#include <asm/msr.h> 19#include <asm/mce.h> 20 21#include "internal.h" 22 23/* 24 * Support for Intel Correct Machine Check Interrupts. This allows 25 * the CPU to raise an interrupt when a corrected machine check happened. 26 * Normally we pick those up using a regular polling timer. 27 * Also supports reliable discovery of shared banks. 28 */ 29 30/* 31 * CMCI can be delivered to multiple cpus that share a machine check bank 32 * so we need to designate a single cpu to process errors logged in each bank 33 * in the interrupt handler (otherwise we would have many races and potential 34 * double reporting of the same error). 35 * Note that this can change when a cpu is offlined or brought online since 36 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 37 * disables CMCI on all banks owned by the cpu and clears this bitfield. At 38 * this point, cmci_rediscover() kicks in and a different cpu may end up 39 * taking ownership of some of the shared MCA banks that were previously 40 * owned by the offlined cpu. 41 */ 42static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 43 44/* 45 * CMCI storm detection backoff counter 46 * 47 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've 48 * encountered an error. If not, we decrement it by one. We signal the end of 49 * the CMCI storm when it reaches 0. 50 */ 51static DEFINE_PER_CPU(int, cmci_backoff_cnt); 52 53/* 54 * cmci_discover_lock protects against parallel discovery attempts 55 * which could race against each other. 56 */ 57static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 58 59#define CMCI_THRESHOLD 1 60#define CMCI_POLL_INTERVAL (30 * HZ) 61#define CMCI_STORM_INTERVAL (HZ) 62#define CMCI_STORM_THRESHOLD 15 63 64static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); 65static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); 66static DEFINE_PER_CPU(unsigned int, cmci_storm_state); 67 68enum { 69 CMCI_STORM_NONE, 70 CMCI_STORM_ACTIVE, 71 CMCI_STORM_SUBSIDED, 72}; 73 74static atomic_t cmci_storm_on_cpus; 75 76static int cmci_supported(int *banks) 77{ 78 u64 cap; 79 80 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 81 return 0; 82 83 /* 84 * Vendor check is not strictly needed, but the initial 85 * initialization is vendor keyed and this 86 * makes sure none of the backdoors are entered otherwise. 87 */ 88 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && 89 boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) 90 return 0; 91 92 if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) 93 return 0; 94 rdmsrl(MSR_IA32_MCG_CAP, cap); 95 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); 96 return !!(cap & MCG_CMCI_P); 97} 98 99static bool lmce_supported(void) 100{ 101 u64 tmp; 102 103 if (mca_cfg.lmce_disabled) 104 return false; 105 106 rdmsrl(MSR_IA32_MCG_CAP, tmp); 107 108 /* 109 * LMCE depends on recovery support in the processor. Hence both 110 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. 111 */ 112 if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != 113 (MCG_SER_P | MCG_LMCE_P)) 114 return false; 115 116 /* 117 * BIOS should indicate support for LMCE by setting bit 20 in 118 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP 119 * fault. The MSR must also be locked for LMCE_ENABLED to take effect. 120 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally 121 * locks the MSR in the event that it wasn't already locked by BIOS. 122 */ 123 rdmsrl(MSR_IA32_FEAT_CTL, tmp); 124 if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED))) 125 return false; 126 127 return tmp & FEAT_CTL_LMCE_ENABLED; 128} 129 130bool mce_intel_cmci_poll(void) 131{ 132 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 133 return false; 134 135 /* 136 * Reset the counter if we've logged an error in the last poll 137 * during the storm. 138 */ 139 if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) 140 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 141 else 142 this_cpu_dec(cmci_backoff_cnt); 143 144 return true; 145} 146 147void mce_intel_hcpu_update(unsigned long cpu) 148{ 149 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) 150 atomic_dec(&cmci_storm_on_cpus); 151 152 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 153} 154 155static void cmci_toggle_interrupt_mode(bool on) 156{ 157 unsigned long flags, *owned; 158 int bank; 159 u64 val; 160 161 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 162 owned = this_cpu_ptr(mce_banks_owned); 163 for_each_set_bit(bank, owned, MAX_NR_BANKS) { 164 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 165 166 if (on) 167 val |= MCI_CTL2_CMCI_EN; 168 else 169 val &= ~MCI_CTL2_CMCI_EN; 170 171 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 172 } 173 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 174} 175 176unsigned long cmci_intel_adjust_timer(unsigned long interval) 177{ 178 if ((this_cpu_read(cmci_backoff_cnt) > 0) && 179 (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { 180 mce_notify_irq(); 181 return CMCI_STORM_INTERVAL; 182 } 183 184 switch (__this_cpu_read(cmci_storm_state)) { 185 case CMCI_STORM_ACTIVE: 186 187 /* 188 * We switch back to interrupt mode once the poll timer has 189 * silenced itself. That means no events recorded and the timer 190 * interval is back to our poll interval. 191 */ 192 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); 193 if (!atomic_sub_return(1, &cmci_storm_on_cpus)) 194 pr_notice("CMCI storm subsided: switching to interrupt mode\n"); 195 196 fallthrough; 197 198 case CMCI_STORM_SUBSIDED: 199 /* 200 * We wait for all CPUs to go back to SUBSIDED state. When that 201 * happens we switch back to interrupt mode. 202 */ 203 if (!atomic_read(&cmci_storm_on_cpus)) { 204 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 205 cmci_toggle_interrupt_mode(true); 206 cmci_recheck(); 207 } 208 return CMCI_POLL_INTERVAL; 209 default: 210 211 /* We have shiny weather. Let the poll do whatever it thinks. */ 212 return interval; 213 } 214} 215 216static bool cmci_storm_detect(void) 217{ 218 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 219 unsigned long ts = __this_cpu_read(cmci_time_stamp); 220 unsigned long now = jiffies; 221 int r; 222 223 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) 224 return true; 225 226 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { 227 cnt++; 228 } else { 229 cnt = 1; 230 __this_cpu_write(cmci_time_stamp, now); 231 } 232 __this_cpu_write(cmci_storm_cnt, cnt); 233 234 if (cnt <= CMCI_STORM_THRESHOLD) 235 return false; 236 237 cmci_toggle_interrupt_mode(false); 238 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 239 r = atomic_add_return(1, &cmci_storm_on_cpus); 240 mce_timer_kick(CMCI_STORM_INTERVAL); 241 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 242 243 if (r == 1) 244 pr_notice("CMCI storm detected: switching to poll mode\n"); 245 return true; 246} 247 248/* 249 * The interrupt handler. This is called on every event. 250 * Just call the poller directly to log any events. 251 * This could in theory increase the threshold under high load, 252 * but doesn't for now. 253 */ 254static void intel_threshold_interrupt(void) 255{ 256 if (cmci_storm_detect()) 257 return; 258 259 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 260} 261 262/* 263 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 264 * on this CPU. Use the algorithm recommended in the SDM to discover shared 265 * banks. 266 */ 267static void cmci_discover(int banks) 268{ 269 unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 270 unsigned long flags; 271 int i; 272 int bios_wrong_thresh = 0; 273 274 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 275 for (i = 0; i < banks; i++) { 276 u64 val; 277 int bios_zero_thresh = 0; 278 279 if (test_bit(i, owned)) 280 continue; 281 282 /* Skip banks in firmware first mode */ 283 if (test_bit(i, mce_banks_ce_disabled)) 284 continue; 285 286 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 287 288 /* Already owned by someone else? */ 289 if (val & MCI_CTL2_CMCI_EN) { 290 clear_bit(i, owned); 291 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 292 continue; 293 } 294 295 if (!mca_cfg.bios_cmci_threshold) { 296 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 297 val |= CMCI_THRESHOLD; 298 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 299 /* 300 * If bios_cmci_threshold boot option was specified 301 * but the threshold is zero, we'll try to initialize 302 * it to 1. 303 */ 304 bios_zero_thresh = 1; 305 val |= CMCI_THRESHOLD; 306 } 307 308 val |= MCI_CTL2_CMCI_EN; 309 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 310 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 311 312 /* Did the enable bit stick? -- the bank supports CMCI */ 313 if (val & MCI_CTL2_CMCI_EN) { 314 set_bit(i, owned); 315 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 316 /* 317 * We are able to set thresholds for some banks that 318 * had a threshold of 0. This means the BIOS has not 319 * set the thresholds properly or does not work with 320 * this boot option. Note down now and report later. 321 */ 322 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 323 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 324 bios_wrong_thresh = 1; 325 } else { 326 WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); 327 } 328 } 329 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 330 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 331 pr_info_once( 332 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 333 pr_info_once( 334 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 335 } 336} 337 338/* 339 * Just in case we missed an event during initialization check 340 * all the CMCI owned banks. 341 */ 342void cmci_recheck(void) 343{ 344 unsigned long flags; 345 int banks; 346 347 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 348 return; 349 350 local_irq_save(flags); 351 machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); 352 local_irq_restore(flags); 353} 354 355/* Caller must hold the lock on cmci_discover_lock */ 356static void __cmci_disable_bank(int bank) 357{ 358 u64 val; 359 360 if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 361 return; 362 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 363 val &= ~MCI_CTL2_CMCI_EN; 364 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 365 __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 366} 367 368/* 369 * Disable CMCI on this CPU for all banks it owns when it goes down. 370 * This allows other CPUs to claim the banks on rediscovery. 371 */ 372void cmci_clear(void) 373{ 374 unsigned long flags; 375 int i; 376 int banks; 377 378 if (!cmci_supported(&banks)) 379 return; 380 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 381 for (i = 0; i < banks; i++) 382 __cmci_disable_bank(i); 383 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 384} 385 386static void cmci_rediscover_work_func(void *arg) 387{ 388 int banks; 389 390 /* Recheck banks in case CPUs don't all have the same */ 391 if (cmci_supported(&banks)) 392 cmci_discover(banks); 393} 394 395/* After a CPU went down cycle through all the others and rediscover */ 396void cmci_rediscover(void) 397{ 398 int banks; 399 400 if (!cmci_supported(&banks)) 401 return; 402 403 on_each_cpu(cmci_rediscover_work_func, NULL, 1); 404} 405 406/* 407 * Reenable CMCI on this CPU in case a CPU down failed. 408 */ 409void cmci_reenable(void) 410{ 411 int banks; 412 if (cmci_supported(&banks)) 413 cmci_discover(banks); 414} 415 416void cmci_disable_bank(int bank) 417{ 418 int banks; 419 unsigned long flags; 420 421 if (!cmci_supported(&banks)) 422 return; 423 424 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 425 __cmci_disable_bank(bank); 426 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 427} 428 429void intel_init_cmci(void) 430{ 431 int banks; 432 433 if (!cmci_supported(&banks)) 434 return; 435 436 mce_threshold_vector = intel_threshold_interrupt; 437 cmci_discover(banks); 438 /* 439 * For CPU #0 this runs with still disabled APIC, but that's 440 * ok because only the vector is set up. We still do another 441 * check for the banks later for CPU #0 just to make sure 442 * to not miss any events. 443 */ 444 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 445 cmci_recheck(); 446} 447 448void intel_init_lmce(void) 449{ 450 u64 val; 451 452 if (!lmce_supported()) 453 return; 454 455 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 456 457 if (!(val & MCG_EXT_CTL_LMCE_EN)) 458 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 459} 460 461void intel_clear_lmce(void) 462{ 463 u64 val; 464 465 if (!lmce_supported()) 466 return; 467 468 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 469 val &= ~MCG_EXT_CTL_LMCE_EN; 470 wrmsrl(MSR_IA32_MCG_EXT_CTL, val); 471} 472 473/* 474 * Enable additional error logs from the integrated 475 * memory controller on processors that support this. 476 */ 477static void intel_imc_init(struct cpuinfo_x86 *c) 478{ 479 u64 error_control; 480 481 switch (c->x86_model) { 482 case INTEL_FAM6_SANDYBRIDGE_X: 483 case INTEL_FAM6_IVYBRIDGE_X: 484 case INTEL_FAM6_HASWELL_X: 485 if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control)) 486 return; 487 error_control |= 2; 488 wrmsrl_safe(MSR_ERROR_CONTROL, error_control); 489 break; 490 } 491} 492 493void mce_intel_feature_init(struct cpuinfo_x86 *c) 494{ 495 intel_init_cmci(); 496 intel_init_lmce(); 497 intel_imc_init(c); 498} 499 500void mce_intel_feature_clear(struct cpuinfo_x86 *c) 501{ 502 intel_clear_lmce(); 503} 504 505bool intel_filter_mce(struct mce *m) 506{ 507 struct cpuinfo_x86 *c = &boot_cpu_data; 508 509 /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */ 510 if ((c->x86 == 6) && 511 ((c->x86_model == INTEL_FAM6_HASWELL) || 512 (c->x86_model == INTEL_FAM6_HASWELL_L) || 513 (c->x86_model == INTEL_FAM6_BROADWELL) || 514 (c->x86_model == INTEL_FAM6_HASWELL_G) || 515 (c->x86_model == INTEL_FAM6_SKYLAKE_X)) && 516 (m->bank == 0) && 517 ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) 518 return true; 519 520 return false; 521}