pci_event.c (10802B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright IBM Corp. 2012 4 * 5 * Author(s): 6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 */ 8 9#define KMSG_COMPONENT "zpci" 10#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12#include <linux/kernel.h> 13#include <linux/pci.h> 14#include <asm/pci_debug.h> 15#include <asm/pci_dma.h> 16#include <asm/sclp.h> 17 18#include "pci_bus.h" 19 20/* Content Code Description for PCI Function Error */ 21struct zpci_ccdf_err { 22 u32 reserved1; 23 u32 fh; /* function handle */ 24 u32 fid; /* function id */ 25 u32 ett : 4; /* expected table type */ 26 u32 mvn : 12; /* MSI vector number */ 27 u32 dmaas : 8; /* DMA address space */ 28 u32 : 6; 29 u32 q : 1; /* event qualifier */ 30 u32 rw : 1; /* read/write */ 31 u64 faddr; /* failing address */ 32 u32 reserved3; 33 u16 reserved4; 34 u16 pec; /* PCI event code */ 35} __packed; 36 37/* Content Code Description for PCI Function Availability */ 38struct zpci_ccdf_avail { 39 u32 reserved1; 40 u32 fh; /* function handle */ 41 u32 fid; /* function id */ 42 u32 reserved2; 43 u32 reserved3; 44 u32 reserved4; 45 u32 reserved5; 46 u16 reserved6; 47 u16 pec; /* PCI event code */ 48} __packed; 49 50static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) 51{ 52 switch (ers_res) { 53 case PCI_ERS_RESULT_CAN_RECOVER: 54 case PCI_ERS_RESULT_RECOVERED: 55 case PCI_ERS_RESULT_NEED_RESET: 56 return false; 57 default: 58 return true; 59 } 60} 61 62static bool is_passed_through(struct zpci_dev *zdev) 63{ 64 return zdev->s390_domain; 65} 66 67static bool is_driver_supported(struct pci_driver *driver) 68{ 69 if (!driver || !driver->err_handler) 70 return false; 71 if (!driver->err_handler->error_detected) 72 return false; 73 if (!driver->err_handler->slot_reset) 74 return false; 75 if (!driver->err_handler->resume) 76 return false; 77 return true; 78} 79 80static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, 81 struct pci_driver *driver) 82{ 83 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 84 85 ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); 86 if (ers_result_indicates_abort(ers_res)) 87 pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); 88 else if (ers_res == PCI_ERS_RESULT_NEED_RESET) 89 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); 90 91 return ers_res; 92} 93 94static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, 95 struct pci_driver *driver) 96{ 97 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 98 struct zpci_dev *zdev = to_zpci(pdev); 99 int rc; 100 101 pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); 102 rc = zpci_reset_load_store_blocked(zdev); 103 if (rc) { 104 pr_err("%s: Unblocking device access failed\n", pci_name(pdev)); 105 /* Let's try a full reset instead */ 106 return PCI_ERS_RESULT_NEED_RESET; 107 } 108 109 if (driver->err_handler->mmio_enabled) { 110 ers_res = driver->err_handler->mmio_enabled(pdev); 111 if (ers_result_indicates_abort(ers_res)) { 112 pr_info("%s: Automatic recovery failed after MMIO re-enable\n", 113 pci_name(pdev)); 114 return ers_res; 115 } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { 116 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); 117 return ers_res; 118 } 119 } 120 121 pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); 122 rc = zpci_clear_error_state(zdev); 123 if (!rc) { 124 pdev->error_state = pci_channel_io_normal; 125 } else { 126 pr_err("%s: Unblocking DMA failed\n", pci_name(pdev)); 127 /* Let's try a full reset instead */ 128 return PCI_ERS_RESULT_NEED_RESET; 129 } 130 131 return ers_res; 132} 133 134static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, 135 struct pci_driver *driver) 136{ 137 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 138 139 pr_info("%s: Initiating reset\n", pci_name(pdev)); 140 if (zpci_hot_reset_device(to_zpci(pdev))) { 141 pr_err("%s: The reset request failed\n", pci_name(pdev)); 142 return ers_res; 143 } 144 pdev->error_state = pci_channel_io_normal; 145 ers_res = driver->err_handler->slot_reset(pdev); 146 if (ers_result_indicates_abort(ers_res)) { 147 pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); 148 return ers_res; 149 } 150 151 return ers_res; 152} 153 154/* zpci_event_attempt_error_recovery - Try to recover the given PCI function 155 * @pdev: PCI function to recover currently in the error state 156 * 157 * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst. 158 * With the simplification that recovery always happens per function 159 * and the platform determines which functions are affected for 160 * multi-function devices. 161 */ 162static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) 163{ 164 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 165 struct pci_driver *driver; 166 167 /* 168 * Ensure that the PCI function is not removed concurrently, no driver 169 * is unbound or probed and that userspace can't access its 170 * configuration space while we perform recovery. 171 */ 172 pci_dev_lock(pdev); 173 if (pdev->error_state == pci_channel_io_perm_failure) { 174 ers_res = PCI_ERS_RESULT_DISCONNECT; 175 goto out_unlock; 176 } 177 pdev->error_state = pci_channel_io_frozen; 178 179 if (is_passed_through(to_zpci(pdev))) { 180 pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", 181 pci_name(pdev)); 182 goto out_unlock; 183 } 184 185 driver = to_pci_driver(pdev->dev.driver); 186 if (!is_driver_supported(driver)) { 187 if (!driver) 188 pr_info("%s: Cannot be recovered because no driver is bound to the device\n", 189 pci_name(pdev)); 190 else 191 pr_info("%s: The %s driver bound to the device does not support error recovery\n", 192 pci_name(pdev), 193 driver->name); 194 goto out_unlock; 195 } 196 197 ers_res = zpci_event_notify_error_detected(pdev, driver); 198 if (ers_result_indicates_abort(ers_res)) 199 goto out_unlock; 200 201 if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { 202 ers_res = zpci_event_do_error_state_clear(pdev, driver); 203 if (ers_result_indicates_abort(ers_res)) 204 goto out_unlock; 205 } 206 207 if (ers_res == PCI_ERS_RESULT_NEED_RESET) 208 ers_res = zpci_event_do_reset(pdev, driver); 209 210 if (ers_res != PCI_ERS_RESULT_RECOVERED) { 211 pr_err("%s: Automatic recovery failed; operator intervention is required\n", 212 pci_name(pdev)); 213 goto out_unlock; 214 } 215 216 pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); 217 if (driver->err_handler->resume) 218 driver->err_handler->resume(pdev); 219out_unlock: 220 pci_dev_unlock(pdev); 221 222 return ers_res; 223} 224 225/* zpci_event_io_failure - Report PCI channel failure state to driver 226 * @pdev: PCI function for which to report 227 * @es: PCI channel failure state to report 228 */ 229static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) 230{ 231 struct pci_driver *driver; 232 233 pci_dev_lock(pdev); 234 pdev->error_state = es; 235 /** 236 * While vfio-pci's error_detected callback notifies user-space QEMU 237 * reacts to this by freezing the guest. In an s390 environment PCI 238 * errors are rarely fatal so this is overkill. Instead in the future 239 * we will inject the error event and let the guest recover the device 240 * itself. 241 */ 242 if (is_passed_through(to_zpci(pdev))) 243 goto out; 244 driver = to_pci_driver(pdev->dev.driver); 245 if (driver && driver->err_handler && driver->err_handler->error_detected) 246 driver->err_handler->error_detected(pdev, pdev->error_state); 247out: 248 pci_dev_unlock(pdev); 249} 250 251static void __zpci_event_error(struct zpci_ccdf_err *ccdf) 252{ 253 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); 254 struct pci_dev *pdev = NULL; 255 pci_ers_result_t ers_res; 256 257 zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", 258 ccdf->fid, ccdf->fh, ccdf->pec); 259 zpci_err("error CCDF:\n"); 260 zpci_err_hex(ccdf, sizeof(*ccdf)); 261 262 if (zdev) { 263 zpci_update_fh(zdev, ccdf->fh); 264 if (zdev->zbus->bus) 265 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); 266 } 267 268 pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", 269 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); 270 271 if (!pdev) 272 goto no_pdev; 273 274 switch (ccdf->pec) { 275 case 0x003a: /* Service Action or Error Recovery Successful */ 276 ers_res = zpci_event_attempt_error_recovery(pdev); 277 if (ers_res != PCI_ERS_RESULT_RECOVERED) 278 zpci_event_io_failure(pdev, pci_channel_io_perm_failure); 279 break; 280 default: 281 /* 282 * Mark as frozen not permanently failed because the device 283 * could be subsequently recovered by the platform. 284 */ 285 zpci_event_io_failure(pdev, pci_channel_io_frozen); 286 break; 287 } 288 pci_dev_put(pdev); 289no_pdev: 290 zpci_zdev_put(zdev); 291} 292 293void zpci_event_error(void *data) 294{ 295 if (zpci_is_enabled()) 296 __zpci_event_error(data); 297} 298 299static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) 300{ 301 zpci_update_fh(zdev, fh); 302 /* Give the driver a hint that the function is 303 * already unusable. 304 */ 305 zpci_bus_remove_device(zdev, true); 306 /* Even though the device is already gone we still 307 * need to free zPCI resources as part of the disable. 308 */ 309 if (zdev->dma_table) 310 zpci_dma_exit_device(zdev); 311 if (zdev_enabled(zdev)) 312 zpci_disable_device(zdev); 313 zdev->state = ZPCI_FN_STATE_STANDBY; 314} 315 316static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) 317{ 318 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); 319 bool existing_zdev = !!zdev; 320 enum zpci_state state; 321 322 zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", 323 ccdf->fid, ccdf->fh, ccdf->pec); 324 switch (ccdf->pec) { 325 case 0x0301: /* Reserved|Standby -> Configured */ 326 if (!zdev) { 327 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); 328 if (IS_ERR(zdev)) 329 break; 330 } else { 331 /* the configuration request may be stale */ 332 if (zdev->state != ZPCI_FN_STATE_STANDBY) 333 break; 334 zdev->state = ZPCI_FN_STATE_CONFIGURED; 335 } 336 zpci_scan_configured_device(zdev, ccdf->fh); 337 break; 338 case 0x0302: /* Reserved -> Standby */ 339 if (!zdev) 340 zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); 341 else 342 zpci_update_fh(zdev, ccdf->fh); 343 break; 344 case 0x0303: /* Deconfiguration requested */ 345 if (zdev) { 346 /* The event may have been queued before we confirgured 347 * the device. 348 */ 349 if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 350 break; 351 zpci_update_fh(zdev, ccdf->fh); 352 zpci_deconfigure_device(zdev); 353 } 354 break; 355 case 0x0304: /* Configured -> Standby|Reserved */ 356 if (zdev) { 357 /* The event may have been queued before we confirgured 358 * the device.: 359 */ 360 if (zdev->state == ZPCI_FN_STATE_CONFIGURED) 361 zpci_event_hard_deconfigured(zdev, ccdf->fh); 362 /* The 0x0304 event may immediately reserve the device */ 363 if (!clp_get_state(zdev->fid, &state) && 364 state == ZPCI_FN_STATE_RESERVED) { 365 zpci_device_reserved(zdev); 366 } 367 } 368 break; 369 case 0x0306: /* 0x308 or 0x302 for multiple devices */ 370 zpci_remove_reserved_devices(); 371 clp_scan_pci_devices(); 372 break; 373 case 0x0308: /* Standby -> Reserved */ 374 if (!zdev) 375 break; 376 zpci_device_reserved(zdev); 377 break; 378 default: 379 break; 380 } 381 if (existing_zdev) 382 zpci_zdev_put(zdev); 383} 384 385void zpci_event_availability(void *data) 386{ 387 if (zpci_is_enabled()) 388 __zpci_event_availability(data); 389}