mce.c (2708B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * NFIT - Machine Check Handler 4 * 5 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 6 */ 7#include <linux/notifier.h> 8#include <linux/acpi.h> 9#include <linux/nd.h> 10#include <asm/mce.h> 11#include "nfit.h" 12 13static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, 14 void *data) 15{ 16 struct mce *mce = (struct mce *)data; 17 struct acpi_nfit_desc *acpi_desc; 18 struct nfit_spa *nfit_spa; 19 20 /* We only care about uncorrectable memory errors */ 21 if (!mce_is_memory_error(mce) || mce_is_correctable(mce)) 22 return NOTIFY_DONE; 23 24 /* Verify the address reported in the MCE is valid. */ 25 if (!mce_usable_address(mce)) 26 return NOTIFY_DONE; 27 28 /* 29 * mce->addr contains the physical addr accessed that caused the 30 * machine check. We need to walk through the list of NFITs, and see 31 * if any of them matches that address, and only then start a scrub. 32 */ 33 mutex_lock(&acpi_desc_lock); 34 list_for_each_entry(acpi_desc, &acpi_descs, list) { 35 unsigned int align = 1UL << MCI_MISC_ADDR_LSB(mce->misc); 36 struct device *dev = acpi_desc->dev; 37 int found_match = 0; 38 39 mutex_lock(&acpi_desc->init_mutex); 40 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 41 struct acpi_nfit_system_address *spa = nfit_spa->spa; 42 43 if (nfit_spa_type(spa) != NFIT_SPA_PM) 44 continue; 45 /* find the spa that covers the mce addr */ 46 if (spa->address > mce->addr) 47 continue; 48 if ((spa->address + spa->length - 1) < mce->addr) 49 continue; 50 found_match = 1; 51 dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n", 52 spa->range_index, spa->address, spa->length); 53 /* 54 * We can break at the first match because we're going 55 * to rescan all the SPA ranges. There shouldn't be any 56 * aliasing anyway. 57 */ 58 break; 59 } 60 mutex_unlock(&acpi_desc->init_mutex); 61 62 if (!found_match) 63 continue; 64 65 /* If this fails due to an -ENOMEM, there is little we can do */ 66 nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus, 67 ALIGN_DOWN(mce->addr, align), align); 68 nvdimm_region_notify(nfit_spa->nd_region, 69 NVDIMM_REVALIDATE_POISON); 70 71 if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) { 72 /* 73 * We can ignore an -EBUSY here because if an ARS is 74 * already in progress, just let that be the last 75 * authoritative one 76 */ 77 acpi_nfit_ars_rescan(acpi_desc, 0); 78 } 79 mce->kflags |= MCE_HANDLED_NFIT; 80 break; 81 } 82 83 mutex_unlock(&acpi_desc_lock); 84 return NOTIFY_DONE; 85} 86 87static struct notifier_block nfit_mce_dec = { 88 .notifier_call = nfit_handle_mce, 89 .priority = MCE_PRIO_NFIT, 90}; 91 92void nfit_mce_register(void) 93{ 94 mce_register_decode_chain(&nfit_mce_dec); 95} 96 97void nfit_mce_unregister(void) 98{ 99 mce_unregister_decode_chain(&nfit_mce_dec); 100}