ras.c (24501B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (C) 2001 Dave Engebretsen IBM Corporation 4 */ 5 6#include <linux/sched.h> 7#include <linux/interrupt.h> 8#include <linux/irq.h> 9#include <linux/of.h> 10#include <linux/fs.h> 11#include <linux/reboot.h> 12#include <linux/irq_work.h> 13 14#include <asm/machdep.h> 15#include <asm/rtas.h> 16#include <asm/firmware.h> 17#include <asm/mce.h> 18 19#include "pseries.h" 20 21static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; 22static DEFINE_SPINLOCK(ras_log_buf_lock); 23 24static int ras_check_exception_token; 25 26#define EPOW_SENSOR_TOKEN 9 27#define EPOW_SENSOR_INDEX 0 28 29/* EPOW events counter variable */ 30static int num_epow_events; 31 32static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); 33static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); 34static irqreturn_t ras_error_interrupt(int irq, void *dev_id); 35 36/* RTAS pseries MCE errorlog section. */ 37struct pseries_mc_errorlog { 38 __be32 fru_id; 39 __be32 proc_id; 40 u8 error_type; 41 /* 42 * sub_err_type (1 byte). Bit fields depends on error_type 43 * 44 * MSB0 45 * | 46 * V 47 * 01234567 48 * XXXXXXXX 49 * 50 * For error_type == MC_ERROR_TYPE_UE 51 * XXXXXXXX 52 * X 1: Permanent or Transient UE. 53 * X 1: Effective address provided. 54 * X 1: Logical address provided. 55 * XX 2: Reserved. 56 * XXX 3: Type of UE error. 57 * 58 * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB 59 * XXXXXXXX 60 * X 1: Effective address provided. 61 * XXXXX 5: Reserved. 62 * XX 2: Type of SLB/ERAT/TLB error. 63 * 64 * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS 65 * XXXXXXXX 66 * X 1: Error causing address provided. 67 * XXX 3: Type of error. 68 * XXXX 4: Reserved. 69 */ 70 u8 sub_err_type; 71 u8 reserved_1[6]; 72 __be64 effective_address; 73 __be64 logical_address; 74} __packed; 75 76/* RTAS pseries MCE error types */ 77#define MC_ERROR_TYPE_UE 0x00 78#define MC_ERROR_TYPE_SLB 0x01 79#define MC_ERROR_TYPE_ERAT 0x02 80#define MC_ERROR_TYPE_UNKNOWN 0x03 81#define MC_ERROR_TYPE_TLB 0x04 82#define MC_ERROR_TYPE_D_CACHE 0x05 83#define MC_ERROR_TYPE_I_CACHE 0x07 84#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 85 86/* RTAS pseries MCE error sub types */ 87#define MC_ERROR_UE_INDETERMINATE 0 88#define MC_ERROR_UE_IFETCH 1 89#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 90#define MC_ERROR_UE_LOAD_STORE 3 91#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 92 93#define UE_EFFECTIVE_ADDR_PROVIDED 0x40 94#define UE_LOGICAL_ADDR_PROVIDED 0x20 95#define MC_EFFECTIVE_ADDR_PROVIDED 0x80 96 97#define MC_ERROR_SLB_PARITY 0 98#define MC_ERROR_SLB_MULTIHIT 1 99#define MC_ERROR_SLB_INDETERMINATE 2 100 101#define MC_ERROR_ERAT_PARITY 1 102#define MC_ERROR_ERAT_MULTIHIT 2 103#define MC_ERROR_ERAT_INDETERMINATE 3 104 105#define MC_ERROR_TLB_PARITY 1 106#define MC_ERROR_TLB_MULTIHIT 2 107#define MC_ERROR_TLB_INDETERMINATE 3 108 109#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 110#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 111 112static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) 113{ 114 switch (mlog->error_type) { 115 case MC_ERROR_TYPE_UE: 116 return (mlog->sub_err_type & 0x07); 117 case MC_ERROR_TYPE_SLB: 118 case MC_ERROR_TYPE_ERAT: 119 case MC_ERROR_TYPE_TLB: 120 return (mlog->sub_err_type & 0x03); 121 case MC_ERROR_TYPE_CTRL_MEM_ACCESS: 122 return (mlog->sub_err_type & 0x70) >> 4; 123 default: 124 return 0; 125 } 126} 127 128/* 129 * Enable the hotplug interrupt late because processing them may touch other 130 * devices or systems (e.g. hugepages) that have not been initialized at the 131 * subsys stage. 132 */ 133static int __init init_ras_hotplug_IRQ(void) 134{ 135 struct device_node *np; 136 137 /* Hotplug Events */ 138 np = of_find_node_by_path("/event-sources/hot-plug-events"); 139 if (np != NULL) { 140 if (dlpar_workqueue_init() == 0) 141 request_event_sources_irqs(np, ras_hotplug_interrupt, 142 "RAS_HOTPLUG"); 143 of_node_put(np); 144 } 145 146 return 0; 147} 148machine_late_initcall(pseries, init_ras_hotplug_IRQ); 149 150/* 151 * Initialize handlers for the set of interrupts caused by hardware errors 152 * and power system events. 153 */ 154static int __init init_ras_IRQ(void) 155{ 156 struct device_node *np; 157 158 ras_check_exception_token = rtas_token("check-exception"); 159 160 /* Internal Errors */ 161 np = of_find_node_by_path("/event-sources/internal-errors"); 162 if (np != NULL) { 163 request_event_sources_irqs(np, ras_error_interrupt, 164 "RAS_ERROR"); 165 of_node_put(np); 166 } 167 168 /* EPOW Events */ 169 np = of_find_node_by_path("/event-sources/epow-events"); 170 if (np != NULL) { 171 request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW"); 172 of_node_put(np); 173 } 174 175 return 0; 176} 177machine_subsys_initcall(pseries, init_ras_IRQ); 178 179#define EPOW_SHUTDOWN_NORMAL 1 180#define EPOW_SHUTDOWN_ON_UPS 2 181#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 182#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 183 184static void handle_system_shutdown(char event_modifier) 185{ 186 switch (event_modifier) { 187 case EPOW_SHUTDOWN_NORMAL: 188 pr_emerg("Power off requested\n"); 189 orderly_poweroff(true); 190 break; 191 192 case EPOW_SHUTDOWN_ON_UPS: 193 pr_emerg("Loss of system power detected. System is running on" 194 " UPS/battery. Check RTAS error log for details\n"); 195 break; 196 197 case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: 198 pr_emerg("Loss of system critical functions detected. Check" 199 " RTAS error log for details\n"); 200 orderly_poweroff(true); 201 break; 202 203 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: 204 pr_emerg("High ambient temperature detected. Check RTAS" 205 " error log for details\n"); 206 orderly_poweroff(true); 207 break; 208 209 default: 210 pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", 211 event_modifier); 212 } 213} 214 215struct epow_errorlog { 216 unsigned char sensor_value; 217 unsigned char event_modifier; 218 unsigned char extended_modifier; 219 unsigned char reserved; 220 unsigned char platform_reason; 221}; 222 223#define EPOW_RESET 0 224#define EPOW_WARN_COOLING 1 225#define EPOW_WARN_POWER 2 226#define EPOW_SYSTEM_SHUTDOWN 3 227#define EPOW_SYSTEM_HALT 4 228#define EPOW_MAIN_ENCLOSURE 5 229#define EPOW_POWER_OFF 7 230 231static void rtas_parse_epow_errlog(struct rtas_error_log *log) 232{ 233 struct pseries_errorlog *pseries_log; 234 struct epow_errorlog *epow_log; 235 char action_code; 236 char modifier; 237 238 pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); 239 if (pseries_log == NULL) 240 return; 241 242 epow_log = (struct epow_errorlog *)pseries_log->data; 243 action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ 244 modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ 245 246 switch (action_code) { 247 case EPOW_RESET: 248 if (num_epow_events) { 249 pr_info("Non critical power/cooling issue cleared\n"); 250 num_epow_events--; 251 } 252 break; 253 254 case EPOW_WARN_COOLING: 255 pr_info("Non-critical cooling issue detected. Check RTAS error" 256 " log for details\n"); 257 break; 258 259 case EPOW_WARN_POWER: 260 pr_info("Non-critical power issue detected. Check RTAS error" 261 " log for details\n"); 262 break; 263 264 case EPOW_SYSTEM_SHUTDOWN: 265 handle_system_shutdown(modifier); 266 break; 267 268 case EPOW_SYSTEM_HALT: 269 pr_emerg("Critical power/cooling issue detected. Check RTAS" 270 " error log for details. Powering off.\n"); 271 orderly_poweroff(true); 272 break; 273 274 case EPOW_MAIN_ENCLOSURE: 275 case EPOW_POWER_OFF: 276 pr_emerg("System about to lose power. Check RTAS error log " 277 " for details. Powering off immediately.\n"); 278 emergency_sync(); 279 kernel_power_off(); 280 break; 281 282 default: 283 pr_err("Unknown power/cooling event (action code = %d)\n", 284 action_code); 285 } 286 287 /* Increment epow events counter variable */ 288 if (action_code != EPOW_RESET) 289 num_epow_events++; 290} 291 292static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) 293{ 294 struct pseries_errorlog *pseries_log; 295 struct pseries_hp_errorlog *hp_elog; 296 297 spin_lock(&ras_log_buf_lock); 298 299 rtas_call(ras_check_exception_token, 6, 1, NULL, 300 RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), 301 RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), 302 rtas_get_error_log_max()); 303 304 pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, 305 PSERIES_ELOG_SECT_ID_HOTPLUG); 306 hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; 307 308 /* 309 * Since PCI hotplug is not currently supported on pseries, put PCI 310 * hotplug events on the ras_log_buf to be handled by rtas_errd. 311 */ 312 if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || 313 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || 314 hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) 315 queue_hotplug_event(hp_elog); 316 else 317 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 318 319 spin_unlock(&ras_log_buf_lock); 320 return IRQ_HANDLED; 321} 322 323/* Handle environmental and power warning (EPOW) interrupts. */ 324static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) 325{ 326 int state; 327 int critical; 328 329 rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); 330 331 if (state > 3) 332 critical = 1; /* Time Critical */ 333 else 334 critical = 0; 335 336 spin_lock(&ras_log_buf_lock); 337 338 rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, 339 virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), 340 rtas_get_error_log_max()); 341 342 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 343 344 rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); 345 346 spin_unlock(&ras_log_buf_lock); 347 return IRQ_HANDLED; 348} 349 350/* 351 * Handle hardware error interrupts. 352 * 353 * RTAS check-exception is called to collect data on the exception. If 354 * the error is deemed recoverable, we log a warning and return. 355 * For nonrecoverable errors, an error is logged and we stop all processing 356 * as quickly as possible in order to prevent propagation of the failure. 357 */ 358static irqreturn_t ras_error_interrupt(int irq, void *dev_id) 359{ 360 struct rtas_error_log *rtas_elog; 361 int status; 362 int fatal; 363 364 spin_lock(&ras_log_buf_lock); 365 366 status = rtas_call(ras_check_exception_token, 6, 1, NULL, 367 RTAS_VECTOR_EXTERNAL_INTERRUPT, 368 virq_to_hw(irq), 369 RTAS_INTERNAL_ERROR, 1 /* Time Critical */, 370 __pa(&ras_log_buf), 371 rtas_get_error_log_max()); 372 373 rtas_elog = (struct rtas_error_log *)ras_log_buf; 374 375 if (status == 0 && 376 rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) 377 fatal = 1; 378 else 379 fatal = 0; 380 381 /* format and print the extended information */ 382 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); 383 384 if (fatal) { 385 pr_emerg("Fatal hardware error detected. Check RTAS error" 386 " log for details. Powering off immediately\n"); 387 emergency_sync(); 388 kernel_power_off(); 389 } else { 390 pr_err("Recoverable hardware error detected\n"); 391 } 392 393 spin_unlock(&ras_log_buf_lock); 394 return IRQ_HANDLED; 395} 396 397/* 398 * Some versions of FWNMI place the buffer inside the 4kB page starting at 399 * 0x7000. Other versions place it inside the rtas buffer. We check both. 400 * Minimum size of the buffer is 16 bytes. 401 */ 402#define VALID_FWNMI_BUFFER(A) \ 403 ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ 404 (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) 405 406static inline struct rtas_error_log *fwnmi_get_errlog(void) 407{ 408 return (struct rtas_error_log *)local_paca->mce_data_buf; 409} 410 411static __be64 *fwnmi_get_savep(struct pt_regs *regs) 412{ 413 unsigned long savep_ra; 414 415 /* Mask top two bits */ 416 savep_ra = regs->gpr[3] & ~(0x3UL << 62); 417 if (!VALID_FWNMI_BUFFER(savep_ra)) { 418 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); 419 return NULL; 420 } 421 422 return __va(savep_ra); 423} 424 425/* 426 * Get the error information for errors coming through the 427 * FWNMI vectors. The pt_regs' r3 will be updated to reflect 428 * the actual r3 if possible, and a ptr to the error log entry 429 * will be returned if found. 430 * 431 * Use one buffer mce_data_buf per cpu to store RTAS error. 432 * 433 * The mce_data_buf does not have any locks or protection around it, 434 * if a second machine check comes in, or a system reset is done 435 * before we have logged the error, then we will get corruption in the 436 * error log. This is preferable over holding off on calling 437 * ibm,nmi-interlock which would result in us checkstopping if a 438 * second machine check did come in. 439 */ 440static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) 441{ 442 struct rtas_error_log *h; 443 __be64 *savep; 444 445 savep = fwnmi_get_savep(regs); 446 if (!savep) 447 return NULL; 448 449 regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ 450 451 h = (struct rtas_error_log *)&savep[1]; 452 /* Use the per cpu buffer from paca to store rtas error log */ 453 memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); 454 if (!rtas_error_extended(h)) { 455 memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); 456 } else { 457 int len, error_log_length; 458 459 error_log_length = 8 + rtas_error_extended_log_length(h); 460 len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); 461 memcpy(local_paca->mce_data_buf, h, len); 462 } 463 464 return (struct rtas_error_log *)local_paca->mce_data_buf; 465} 466 467/* Call this when done with the data returned by FWNMI_get_errinfo. 468 * It will release the saved data area for other CPUs in the 469 * partition to receive FWNMI errors. 470 */ 471static void fwnmi_release_errinfo(void) 472{ 473 struct rtas_args rtas_args; 474 int ret; 475 476 /* 477 * On pseries, the machine check stack is limited to under 4GB, so 478 * args can be on-stack. 479 */ 480 rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); 481 ret = be32_to_cpu(rtas_args.rets[0]); 482 if (ret != 0) 483 printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); 484} 485 486int pSeries_system_reset_exception(struct pt_regs *regs) 487{ 488#ifdef __LITTLE_ENDIAN__ 489 /* 490 * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try 491 * to detect the bad SRR1 pattern here. Flip the NIP back to correct 492 * endian for reporting purposes. Unfortunately the MSR can't be fixed, 493 * so clear it. It will be missing MSR_RI so we won't try to recover. 494 */ 495 if ((be64_to_cpu(regs->msr) & 496 (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| 497 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { 498 regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); 499 regs_set_return_msr(regs, 0); 500 } 501#endif 502 503 if (fwnmi_active) { 504 __be64 *savep; 505 506 /* 507 * Firmware (PowerVM and KVM) saves r3 to a save area like 508 * machine check, which is not exactly what PAPR (2.9) 509 * suggests but there is no way to detect otherwise, so this 510 * is the interface now. 511 * 512 * System resets do not save any error log or require an 513 * "ibm,nmi-interlock" rtas call to release. 514 */ 515 516 savep = fwnmi_get_savep(regs); 517 if (savep) 518 regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ 519 } 520 521 if (smp_handle_nmi_ipi(regs)) 522 return 1; 523 524 return 0; /* need to perform reset */ 525} 526 527static int mce_handle_err_realmode(int disposition, u8 error_type) 528{ 529#ifdef CONFIG_PPC_BOOK3S_64 530 if (disposition == RTAS_DISP_NOT_RECOVERED) { 531 switch (error_type) { 532 case MC_ERROR_TYPE_ERAT: 533 flush_erat(); 534 disposition = RTAS_DISP_FULLY_RECOVERED; 535 break; 536 case MC_ERROR_TYPE_SLB: 537#ifdef CONFIG_PPC_64S_HASH_MMU 538 /* 539 * Store the old slb content in paca before flushing. 540 * Print this when we go to virtual mode. 541 * There are chances that we may hit MCE again if there 542 * is a parity error on the SLB entry we trying to read 543 * for saving. Hence limit the slb saving to single 544 * level of recursion. 545 */ 546 if (local_paca->in_mce == 1) 547 slb_save_contents(local_paca->mce_faulty_slbs); 548 flush_and_reload_slb(); 549 disposition = RTAS_DISP_FULLY_RECOVERED; 550#endif 551 break; 552 default: 553 break; 554 } 555 } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { 556 /* Platform corrected itself but could be degraded */ 557 pr_err("MCE: limited recovery, system may be degraded\n"); 558 disposition = RTAS_DISP_FULLY_RECOVERED; 559 } 560#endif 561 return disposition; 562} 563 564static int mce_handle_err_virtmode(struct pt_regs *regs, 565 struct rtas_error_log *errp, 566 struct pseries_mc_errorlog *mce_log, 567 int disposition) 568{ 569 struct mce_error_info mce_err = { 0 }; 570 int initiator = rtas_error_initiator(errp); 571 int severity = rtas_error_severity(errp); 572 unsigned long eaddr = 0, paddr = 0; 573 u8 error_type, err_sub_type; 574 575 if (!mce_log) 576 goto out; 577 578 error_type = mce_log->error_type; 579 err_sub_type = rtas_mc_error_sub_type(mce_log); 580 581 if (initiator == RTAS_INITIATOR_UNKNOWN) 582 mce_err.initiator = MCE_INITIATOR_UNKNOWN; 583 else if (initiator == RTAS_INITIATOR_CPU) 584 mce_err.initiator = MCE_INITIATOR_CPU; 585 else if (initiator == RTAS_INITIATOR_PCI) 586 mce_err.initiator = MCE_INITIATOR_PCI; 587 else if (initiator == RTAS_INITIATOR_ISA) 588 mce_err.initiator = MCE_INITIATOR_ISA; 589 else if (initiator == RTAS_INITIATOR_MEMORY) 590 mce_err.initiator = MCE_INITIATOR_MEMORY; 591 else if (initiator == RTAS_INITIATOR_POWERMGM) 592 mce_err.initiator = MCE_INITIATOR_POWERMGM; 593 else 594 mce_err.initiator = MCE_INITIATOR_UNKNOWN; 595 596 if (severity == RTAS_SEVERITY_NO_ERROR) 597 mce_err.severity = MCE_SEV_NO_ERROR; 598 else if (severity == RTAS_SEVERITY_EVENT) 599 mce_err.severity = MCE_SEV_WARNING; 600 else if (severity == RTAS_SEVERITY_WARNING) 601 mce_err.severity = MCE_SEV_WARNING; 602 else if (severity == RTAS_SEVERITY_ERROR_SYNC) 603 mce_err.severity = MCE_SEV_SEVERE; 604 else if (severity == RTAS_SEVERITY_ERROR) 605 mce_err.severity = MCE_SEV_SEVERE; 606 else 607 mce_err.severity = MCE_SEV_FATAL; 608 609 if (severity <= RTAS_SEVERITY_ERROR_SYNC) 610 mce_err.sync_error = true; 611 else 612 mce_err.sync_error = false; 613 614 mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; 615 mce_err.error_class = MCE_ECLASS_UNKNOWN; 616 617 switch (error_type) { 618 case MC_ERROR_TYPE_UE: 619 mce_err.error_type = MCE_ERROR_TYPE_UE; 620 mce_common_process_ue(regs, &mce_err); 621 if (mce_err.ignore_event) 622 disposition = RTAS_DISP_FULLY_RECOVERED; 623 switch (err_sub_type) { 624 case MC_ERROR_UE_IFETCH: 625 mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; 626 break; 627 case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH: 628 mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; 629 break; 630 case MC_ERROR_UE_LOAD_STORE: 631 mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; 632 break; 633 case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE: 634 mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; 635 break; 636 case MC_ERROR_UE_INDETERMINATE: 637 default: 638 mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE; 639 break; 640 } 641 if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) 642 eaddr = be64_to_cpu(mce_log->effective_address); 643 644 if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { 645 paddr = be64_to_cpu(mce_log->logical_address); 646 } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { 647 unsigned long pfn; 648 649 pfn = addr_to_pfn(regs, eaddr); 650 if (pfn != ULONG_MAX) 651 paddr = pfn << PAGE_SHIFT; 652 } 653 654 break; 655 case MC_ERROR_TYPE_SLB: 656 mce_err.error_type = MCE_ERROR_TYPE_SLB; 657 switch (err_sub_type) { 658 case MC_ERROR_SLB_PARITY: 659 mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY; 660 break; 661 case MC_ERROR_SLB_MULTIHIT: 662 mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; 663 break; 664 case MC_ERROR_SLB_INDETERMINATE: 665 default: 666 mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; 667 break; 668 } 669 if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 670 eaddr = be64_to_cpu(mce_log->effective_address); 671 break; 672 case MC_ERROR_TYPE_ERAT: 673 mce_err.error_type = MCE_ERROR_TYPE_ERAT; 674 switch (err_sub_type) { 675 case MC_ERROR_ERAT_PARITY: 676 mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY; 677 break; 678 case MC_ERROR_ERAT_MULTIHIT: 679 mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; 680 break; 681 case MC_ERROR_ERAT_INDETERMINATE: 682 default: 683 mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; 684 break; 685 } 686 if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 687 eaddr = be64_to_cpu(mce_log->effective_address); 688 break; 689 case MC_ERROR_TYPE_TLB: 690 mce_err.error_type = MCE_ERROR_TYPE_TLB; 691 switch (err_sub_type) { 692 case MC_ERROR_TLB_PARITY: 693 mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY; 694 break; 695 case MC_ERROR_TLB_MULTIHIT: 696 mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; 697 break; 698 case MC_ERROR_TLB_INDETERMINATE: 699 default: 700 mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; 701 break; 702 } 703 if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 704 eaddr = be64_to_cpu(mce_log->effective_address); 705 break; 706 case MC_ERROR_TYPE_D_CACHE: 707 mce_err.error_type = MCE_ERROR_TYPE_DCACHE; 708 break; 709 case MC_ERROR_TYPE_I_CACHE: 710 mce_err.error_type = MCE_ERROR_TYPE_ICACHE; 711 break; 712 case MC_ERROR_TYPE_CTRL_MEM_ACCESS: 713 mce_err.error_type = MCE_ERROR_TYPE_RA; 714 switch (err_sub_type) { 715 case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: 716 mce_err.u.ra_error_type = 717 MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; 718 break; 719 case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: 720 mce_err.u.ra_error_type = 721 MCE_RA_ERROR_LOAD_STORE_FOREIGN; 722 break; 723 } 724 if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 725 eaddr = be64_to_cpu(mce_log->effective_address); 726 break; 727 case MC_ERROR_TYPE_UNKNOWN: 728 default: 729 mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; 730 break; 731 } 732out: 733 save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, 734 &mce_err, regs->nip, eaddr, paddr); 735 return disposition; 736} 737 738static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) 739{ 740 struct pseries_errorlog *pseries_log; 741 struct pseries_mc_errorlog *mce_log = NULL; 742 int disposition = rtas_error_disposition(errp); 743 u8 error_type; 744 745 if (!rtas_error_extended(errp)) 746 goto out; 747 748 pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); 749 if (!pseries_log) 750 goto out; 751 752 mce_log = (struct pseries_mc_errorlog *)pseries_log->data; 753 error_type = mce_log->error_type; 754 755 disposition = mce_handle_err_realmode(disposition, error_type); 756out: 757 disposition = mce_handle_err_virtmode(regs, errp, mce_log, 758 disposition); 759 return disposition; 760} 761 762/* 763 * Process MCE rtas errlog event. 764 */ 765void pSeries_machine_check_log_err(void) 766{ 767 struct rtas_error_log *err; 768 769 err = fwnmi_get_errlog(); 770 log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); 771} 772 773/* 774 * See if we can recover from a machine check exception. 775 * This is only called on power4 (or above) and only via 776 * the Firmware Non-Maskable Interrupts (fwnmi) handler 777 * which provides the error analysis for us. 778 * 779 * Return 1 if corrected (or delivered a signal). 780 * Return 0 if there is nothing we can do. 781 */ 782static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) 783{ 784 int recovered = 0; 785 786 if (regs_is_unrecoverable(regs)) { 787 /* If MSR_RI isn't set, we cannot recover */ 788 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); 789 recovered = 0; 790 } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { 791 /* Platform corrected itself */ 792 recovered = 1; 793 } else if (evt->severity == MCE_SEV_FATAL) { 794 /* Fatal machine check */ 795 pr_err("Machine check interrupt is fatal\n"); 796 recovered = 0; 797 } 798 799 if (!recovered && evt->sync_error) { 800 /* 801 * Try to kill processes if we get a synchronous machine check 802 * (e.g., one caused by execution of this instruction). This 803 * will devolve into a panic if we try to kill init or are in 804 * an interrupt etc. 805 * 806 * TODO: Queue up this address for hwpoisioning later. 807 * TODO: This is not quite right for d-side machine 808 * checks ->nip is not necessarily the important 809 * address. 810 */ 811 if ((user_mode(regs))) { 812 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 813 recovered = 1; 814 } else if (die_will_crash()) { 815 /* 816 * die() would kill the kernel, so better to go via 817 * the platform reboot code that will log the 818 * machine check. 819 */ 820 recovered = 0; 821 } else { 822 die_mce("Machine check", regs, SIGBUS); 823 recovered = 1; 824 } 825 } 826 827 return recovered; 828} 829 830/* 831 * Handle a machine check. 832 * 833 * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) 834 * should be present. If so the handler which called us tells us if the 835 * error was recovered (never true if RI=0). 836 * 837 * On hardware prior to Power 4 these exceptions were asynchronous which 838 * means we can't tell exactly where it occurred and so we can't recover. 839 */ 840int pSeries_machine_check_exception(struct pt_regs *regs) 841{ 842 struct machine_check_event evt; 843 844 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 845 return 0; 846 847 /* Print things out */ 848 if (evt.version != MCE_V1) { 849 pr_err("Machine Check Exception, Unknown event version %d !\n", 850 evt.version); 851 return 0; 852 } 853 machine_check_print_event_info(&evt, user_mode(regs), false); 854 855 if (recover_mce(regs, &evt)) 856 return 1; 857 858 return 0; 859} 860 861long pseries_machine_check_realmode(struct pt_regs *regs) 862{ 863 struct rtas_error_log *errp; 864 int disposition; 865 866 if (fwnmi_active) { 867 errp = fwnmi_get_errinfo(regs); 868 /* 869 * Call to fwnmi_release_errinfo() in real mode causes kernel 870 * to panic. Hence we will call it as soon as we go into 871 * virtual mode. 872 */ 873 disposition = mce_handle_error(regs, errp); 874 875 fwnmi_release_errinfo(); 876 877 if (disposition == RTAS_DISP_FULLY_RECOVERED) 878 return 1; 879 } 880 881 return 0; 882}