pcie-hisi-error.c (9192B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Driver for handling the PCIe controller errors on 4 * HiSilicon HIP SoCs. 5 * 6 * Copyright (c) 2020 HiSilicon Limited. 7 */ 8 9#include <linux/acpi.h> 10#include <acpi/ghes.h> 11#include <linux/bitops.h> 12#include <linux/delay.h> 13#include <linux/pci.h> 14#include <linux/platform_device.h> 15#include <linux/kfifo.h> 16#include <linux/spinlock.h> 17 18/* HISI PCIe controller error definitions */ 19#define HISI_PCIE_ERR_MISC_REGS 33 20 21#define HISI_PCIE_LOCAL_VALID_VERSION BIT(0) 22#define HISI_PCIE_LOCAL_VALID_SOC_ID BIT(1) 23#define HISI_PCIE_LOCAL_VALID_SOCKET_ID BIT(2) 24#define HISI_PCIE_LOCAL_VALID_NIMBUS_ID BIT(3) 25#define HISI_PCIE_LOCAL_VALID_SUB_MODULE_ID BIT(4) 26#define HISI_PCIE_LOCAL_VALID_CORE_ID BIT(5) 27#define HISI_PCIE_LOCAL_VALID_PORT_ID BIT(6) 28#define HISI_PCIE_LOCAL_VALID_ERR_TYPE BIT(7) 29#define HISI_PCIE_LOCAL_VALID_ERR_SEVERITY BIT(8) 30#define HISI_PCIE_LOCAL_VALID_ERR_MISC 9 31 32static guid_t hisi_pcie_sec_guid = 33 GUID_INIT(0xB2889FC9, 0xE7D7, 0x4F9D, 34 0xA8, 0x67, 0xAF, 0x42, 0xE9, 0x8B, 0xE7, 0x72); 35 36/* 37 * Firmware reports the socket port ID where the error occurred. These 38 * macros convert that to the core ID and core port ID required by the 39 * ACPI reset method. 40 */ 41#define HISI_PCIE_PORT_ID(core, v) (((v) >> 1) + ((core) << 3)) 42#define HISI_PCIE_CORE_ID(v) ((v) >> 3) 43#define HISI_PCIE_CORE_PORT_ID(v) (((v) & 7) << 1) 44 45struct hisi_pcie_error_data { 46 u64 val_bits; 47 u8 version; 48 u8 soc_id; 49 u8 socket_id; 50 u8 nimbus_id; 51 u8 sub_module_id; 52 u8 core_id; 53 u8 port_id; 54 u8 err_severity; 55 u16 err_type; 56 u8 reserv[2]; 57 u32 err_misc[HISI_PCIE_ERR_MISC_REGS]; 58}; 59 60struct hisi_pcie_error_private { 61 struct notifier_block nb; 62 struct device *dev; 63}; 64 65enum hisi_pcie_submodule_id { 66 HISI_PCIE_SUB_MODULE_ID_AP, 67 HISI_PCIE_SUB_MODULE_ID_TL, 68 HISI_PCIE_SUB_MODULE_ID_MAC, 69 HISI_PCIE_SUB_MODULE_ID_DL, 70 HISI_PCIE_SUB_MODULE_ID_SDI, 71}; 72 73static const char * const hisi_pcie_sub_module[] = { 74 [HISI_PCIE_SUB_MODULE_ID_AP] = "AP Layer", 75 [HISI_PCIE_SUB_MODULE_ID_TL] = "TL Layer", 76 [HISI_PCIE_SUB_MODULE_ID_MAC] = "MAC Layer", 77 [HISI_PCIE_SUB_MODULE_ID_DL] = "DL Layer", 78 [HISI_PCIE_SUB_MODULE_ID_SDI] = "SDI Layer", 79}; 80 81enum hisi_pcie_err_severity { 82 HISI_PCIE_ERR_SEV_RECOVERABLE, 83 HISI_PCIE_ERR_SEV_FATAL, 84 HISI_PCIE_ERR_SEV_CORRECTED, 85 HISI_PCIE_ERR_SEV_NONE, 86}; 87 88static const char * const hisi_pcie_error_sev[] = { 89 [HISI_PCIE_ERR_SEV_RECOVERABLE] = "recoverable", 90 [HISI_PCIE_ERR_SEV_FATAL] = "fatal", 91 [HISI_PCIE_ERR_SEV_CORRECTED] = "corrected", 92 [HISI_PCIE_ERR_SEV_NONE] = "none", 93}; 94 95static const char *hisi_pcie_get_string(const char * const *array, 96 size_t n, u32 id) 97{ 98 u32 index; 99 100 for (index = 0; index < n; index++) { 101 if (index == id && array[index]) 102 return array[index]; 103 } 104 105 return "unknown"; 106} 107 108static int hisi_pcie_port_reset(struct platform_device *pdev, 109 u32 chip_id, u32 port_id) 110{ 111 struct device *dev = &pdev->dev; 112 acpi_handle handle = ACPI_HANDLE(dev); 113 union acpi_object arg[3]; 114 struct acpi_object_list arg_list; 115 acpi_status s; 116 unsigned long long data = 0; 117 118 arg[0].type = ACPI_TYPE_INTEGER; 119 arg[0].integer.value = chip_id; 120 arg[1].type = ACPI_TYPE_INTEGER; 121 arg[1].integer.value = HISI_PCIE_CORE_ID(port_id); 122 arg[2].type = ACPI_TYPE_INTEGER; 123 arg[2].integer.value = HISI_PCIE_CORE_PORT_ID(port_id); 124 125 arg_list.count = 3; 126 arg_list.pointer = arg; 127 128 s = acpi_evaluate_integer(handle, "RST", &arg_list, &data); 129 if (ACPI_FAILURE(s)) { 130 dev_err(dev, "No RST method\n"); 131 return -EIO; 132 } 133 134 if (data) { 135 dev_err(dev, "Failed to Reset\n"); 136 return -EIO; 137 } 138 139 return 0; 140} 141 142static int hisi_pcie_port_do_recovery(struct platform_device *dev, 143 u32 chip_id, u32 port_id) 144{ 145 acpi_status s; 146 struct device *device = &dev->dev; 147 acpi_handle root_handle = ACPI_HANDLE(device); 148 struct acpi_pci_root *pci_root; 149 struct pci_bus *root_bus; 150 struct pci_dev *pdev; 151 u32 domain, busnr, devfn; 152 153 s = acpi_get_parent(root_handle, &root_handle); 154 if (ACPI_FAILURE(s)) 155 return -ENODEV; 156 pci_root = acpi_pci_find_root(root_handle); 157 if (!pci_root) 158 return -ENODEV; 159 root_bus = pci_root->bus; 160 domain = pci_root->segment; 161 162 busnr = root_bus->number; 163 devfn = PCI_DEVFN(port_id, 0); 164 pdev = pci_get_domain_bus_and_slot(domain, busnr, devfn); 165 if (!pdev) { 166 dev_info(device, "Fail to get root port %04x:%02x:%02x.%d device\n", 167 domain, busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); 168 return -ENODEV; 169 } 170 171 pci_stop_and_remove_bus_device_locked(pdev); 172 pci_dev_put(pdev); 173 174 if (hisi_pcie_port_reset(dev, chip_id, port_id)) 175 return -EIO; 176 177 /* 178 * The initialization time of subordinate devices after 179 * hot reset is no more than 1s, which is required by 180 * the PCI spec v5.0 sec 6.6.1. The time will shorten 181 * if Readiness Notifications mechanisms are used. But 182 * wait 1s here to adapt any conditions. 183 */ 184 ssleep(1UL); 185 186 /* add root port and downstream devices */ 187 pci_lock_rescan_remove(); 188 pci_rescan_bus(root_bus); 189 pci_unlock_rescan_remove(); 190 191 return 0; 192} 193 194static void hisi_pcie_handle_error(struct platform_device *pdev, 195 const struct hisi_pcie_error_data *edata) 196{ 197 struct device *dev = &pdev->dev; 198 int idx, rc; 199 const unsigned long valid_bits[] = {BITMAP_FROM_U64(edata->val_bits)}; 200 201 if (edata->val_bits == 0) { 202 dev_warn(dev, "%s: no valid error information\n", __func__); 203 return; 204 } 205 206 dev_info(dev, "\nHISI : HIP : PCIe controller error\n"); 207 if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SOC_ID) 208 dev_info(dev, "Table version = %d\n", edata->version); 209 if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SOCKET_ID) 210 dev_info(dev, "Socket ID = %d\n", edata->socket_id); 211 if (edata->val_bits & HISI_PCIE_LOCAL_VALID_NIMBUS_ID) 212 dev_info(dev, "Nimbus ID = %d\n", edata->nimbus_id); 213 if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SUB_MODULE_ID) 214 dev_info(dev, "Sub Module = %s\n", 215 hisi_pcie_get_string(hisi_pcie_sub_module, 216 ARRAY_SIZE(hisi_pcie_sub_module), 217 edata->sub_module_id)); 218 if (edata->val_bits & HISI_PCIE_LOCAL_VALID_CORE_ID) 219 dev_info(dev, "Core ID = core%d\n", edata->core_id); 220 if (edata->val_bits & HISI_PCIE_LOCAL_VALID_PORT_ID) 221 dev_info(dev, "Port ID = port%d\n", edata->port_id); 222 if (edata->val_bits & HISI_PCIE_LOCAL_VALID_ERR_SEVERITY) 223 dev_info(dev, "Error severity = %s\n", 224 hisi_pcie_get_string(hisi_pcie_error_sev, 225 ARRAY_SIZE(hisi_pcie_error_sev), 226 edata->err_severity)); 227 if (edata->val_bits & HISI_PCIE_LOCAL_VALID_ERR_TYPE) 228 dev_info(dev, "Error type = 0x%x\n", edata->err_type); 229 230 dev_info(dev, "Reg Dump:\n"); 231 idx = HISI_PCIE_LOCAL_VALID_ERR_MISC; 232 for_each_set_bit_from(idx, valid_bits, 233 HISI_PCIE_LOCAL_VALID_ERR_MISC + HISI_PCIE_ERR_MISC_REGS) 234 dev_info(dev, "ERR_MISC_%d = 0x%x\n", idx - HISI_PCIE_LOCAL_VALID_ERR_MISC, 235 edata->err_misc[idx - HISI_PCIE_LOCAL_VALID_ERR_MISC]); 236 237 if (edata->err_severity != HISI_PCIE_ERR_SEV_RECOVERABLE) 238 return; 239 240 /* Recovery for the PCIe controller errors, try reset 241 * PCI port for the error recovery 242 */ 243 rc = hisi_pcie_port_do_recovery(pdev, edata->socket_id, 244 HISI_PCIE_PORT_ID(edata->core_id, edata->port_id)); 245 if (rc) 246 dev_info(dev, "fail to do hisi pcie port reset\n"); 247} 248 249static int hisi_pcie_notify_error(struct notifier_block *nb, 250 unsigned long event, void *data) 251{ 252 struct acpi_hest_generic_data *gdata = data; 253 const struct hisi_pcie_error_data *error_data = acpi_hest_get_payload(gdata); 254 struct hisi_pcie_error_private *priv; 255 struct device *dev; 256 struct platform_device *pdev; 257 guid_t err_sec_guid; 258 u8 socket; 259 260 import_guid(&err_sec_guid, gdata->section_type); 261 if (!guid_equal(&err_sec_guid, &hisi_pcie_sec_guid)) 262 return NOTIFY_DONE; 263 264 priv = container_of(nb, struct hisi_pcie_error_private, nb); 265 dev = priv->dev; 266 267 if (device_property_read_u8(dev, "socket", &socket)) 268 return NOTIFY_DONE; 269 270 if (error_data->socket_id != socket) 271 return NOTIFY_DONE; 272 273 pdev = container_of(dev, struct platform_device, dev); 274 hisi_pcie_handle_error(pdev, error_data); 275 276 return NOTIFY_OK; 277} 278 279static int hisi_pcie_error_handler_probe(struct platform_device *pdev) 280{ 281 struct hisi_pcie_error_private *priv; 282 int ret; 283 284 priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); 285 if (!priv) 286 return -ENOMEM; 287 288 priv->nb.notifier_call = hisi_pcie_notify_error; 289 priv->dev = &pdev->dev; 290 ret = ghes_register_vendor_record_notifier(&priv->nb); 291 if (ret) { 292 dev_err(&pdev->dev, 293 "Failed to register hisi pcie controller error handler with apei\n"); 294 return ret; 295 } 296 297 platform_set_drvdata(pdev, priv); 298 299 return 0; 300} 301 302static int hisi_pcie_error_handler_remove(struct platform_device *pdev) 303{ 304 struct hisi_pcie_error_private *priv = platform_get_drvdata(pdev); 305 306 ghes_unregister_vendor_record_notifier(&priv->nb); 307 308 return 0; 309} 310 311static const struct acpi_device_id hisi_pcie_acpi_match[] = { 312 { "HISI0361", 0 }, 313 { } 314}; 315 316static struct platform_driver hisi_pcie_error_handler_driver = { 317 .driver = { 318 .name = "hisi-pcie-error-handler", 319 .acpi_match_table = hisi_pcie_acpi_match, 320 }, 321 .probe = hisi_pcie_error_handler_probe, 322 .remove = hisi_pcie_error_handler_remove, 323}; 324module_platform_driver(hisi_pcie_error_handler_driver); 325 326MODULE_DESCRIPTION("HiSilicon HIP PCIe controller error handling driver"); 327MODULE_LICENSE("GPL v2");