eeh_driver.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
eeh_driver.c (32923B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
      4 * Copyright IBM Corp. 2004 2005
      5 * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
      6 *
      7 * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
      8 */
      9#include <linux/delay.h>
     10#include <linux/interrupt.h>
     11#include <linux/irq.h>
     12#include <linux/module.h>
     13#include <linux/pci.h>
     14#include <linux/pci_hotplug.h>
     15#include <asm/eeh.h>
     16#include <asm/eeh_event.h>
     17#include <asm/ppc-pci.h>
     18#include <asm/pci-bridge.h>
     19#include <asm/rtas.h>
     20
     21struct eeh_rmv_data {
     22	struct list_head removed_vf_list;
     23	int removed_dev_count;
     24};
     25
     26static int eeh_result_priority(enum pci_ers_result result)
     27{
     28	switch (result) {
     29	case PCI_ERS_RESULT_NONE:
     30		return 1;
     31	case PCI_ERS_RESULT_NO_AER_DRIVER:
     32		return 2;
     33	case PCI_ERS_RESULT_RECOVERED:
     34		return 3;
     35	case PCI_ERS_RESULT_CAN_RECOVER:
     36		return 4;
     37	case PCI_ERS_RESULT_DISCONNECT:
     38		return 5;
     39	case PCI_ERS_RESULT_NEED_RESET:
     40		return 6;
     41	default:
     42		WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
     43		return 0;
     44	}
     45};
     46
     47static const char *pci_ers_result_name(enum pci_ers_result result)
     48{
     49	switch (result) {
     50	case PCI_ERS_RESULT_NONE:
     51		return "none";
     52	case PCI_ERS_RESULT_CAN_RECOVER:
     53		return "can recover";
     54	case PCI_ERS_RESULT_NEED_RESET:
     55		return "need reset";
     56	case PCI_ERS_RESULT_DISCONNECT:
     57		return "disconnect";
     58	case PCI_ERS_RESULT_RECOVERED:
     59		return "recovered";
     60	case PCI_ERS_RESULT_NO_AER_DRIVER:
     61		return "no AER driver";
     62	default:
     63		WARN_ONCE(1, "Unknown result type: %d\n", (int)result);
     64		return "unknown";
     65	}
     66};
     67
     68static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
     69						enum pci_ers_result new)
     70{
     71	if (eeh_result_priority(new) > eeh_result_priority(old))
     72		return new;
     73	return old;
     74}
     75
     76static bool eeh_dev_removed(struct eeh_dev *edev)
     77{
     78	return !edev || (edev->mode & EEH_DEV_REMOVED);
     79}
     80
     81static bool eeh_edev_actionable(struct eeh_dev *edev)
     82{
     83	if (!edev->pdev)
     84		return false;
     85	if (edev->pdev->error_state == pci_channel_io_perm_failure)
     86		return false;
     87	if (eeh_dev_removed(edev))
     88		return false;
     89	if (eeh_pe_passed(edev->pe))
     90		return false;
     91
     92	return true;
     93}
     94
     95/**
     96 * eeh_pcid_get - Get the PCI device driver
     97 * @pdev: PCI device
     98 *
     99 * The function is used to retrieve the PCI device driver for
    100 * the indicated PCI device. Besides, we will increase the reference
    101 * of the PCI device driver to prevent that being unloaded on
    102 * the fly. Otherwise, kernel crash would be seen.
    103 */
    104static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
    105{
    106	if (!pdev || !pdev->dev.driver)
    107		return NULL;
    108
    109	if (!try_module_get(pdev->dev.driver->owner))
    110		return NULL;
    111
    112	return to_pci_driver(pdev->dev.driver);
    113}
    114
    115/**
    116 * eeh_pcid_put - Dereference on the PCI device driver
    117 * @pdev: PCI device
    118 *
    119 * The function is called to do dereference on the PCI device
    120 * driver of the indicated PCI device.
    121 */
    122static inline void eeh_pcid_put(struct pci_dev *pdev)
    123{
    124	if (!pdev || !pdev->dev.driver)
    125		return;
    126
    127	module_put(pdev->dev.driver->owner);
    128}
    129
    130/**
    131 * eeh_disable_irq - Disable interrupt for the recovering device
    132 * @dev: PCI device
    133 *
    134 * This routine must be called when reporting temporary or permanent
    135 * error to the particular PCI device to disable interrupt of that
    136 * device. If the device has enabled MSI or MSI-X interrupt, we needn't
    137 * do real work because EEH should freeze DMA transfers for those PCI
    138 * devices encountering EEH errors, which includes MSI or MSI-X.
    139 */
    140static void eeh_disable_irq(struct eeh_dev *edev)
    141{
    142	/* Don't disable MSI and MSI-X interrupts. They are
    143	 * effectively disabled by the DMA Stopped state
    144	 * when an EEH error occurs.
    145	 */
    146	if (edev->pdev->msi_enabled || edev->pdev->msix_enabled)
    147		return;
    148
    149	if (!irq_has_action(edev->pdev->irq))
    150		return;
    151
    152	edev->mode |= EEH_DEV_IRQ_DISABLED;
    153	disable_irq_nosync(edev->pdev->irq);
    154}
    155
    156/**
    157 * eeh_enable_irq - Enable interrupt for the recovering device
    158 * @dev: PCI device
    159 *
    160 * This routine must be called to enable interrupt while failed
    161 * device could be resumed.
    162 */
    163static void eeh_enable_irq(struct eeh_dev *edev)
    164{
    165	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
    166		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
    167		/*
    168		 * FIXME !!!!!
    169		 *
    170		 * This is just ass backwards. This maze has
    171		 * unbalanced irq_enable/disable calls. So instead of
    172		 * finding the root cause it works around the warning
    173		 * in the irq_enable code by conditionally calling
    174		 * into it.
    175		 *
    176		 * That's just wrong.The warning in the core code is
    177		 * there to tell people to fix their asymmetries in
    178		 * their own code, not by abusing the core information
    179		 * to avoid it.
    180		 *
    181		 * I so wish that the assymetry would be the other way
    182		 * round and a few more irq_disable calls render that
    183		 * shit unusable forever.
    184		 *
    185		 *	tglx
    186		 */
    187		if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq)))
    188			enable_irq(edev->pdev->irq);
    189	}
    190}
    191
    192static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
    193{
    194	struct pci_dev *pdev;
    195
    196	if (!edev)
    197		return;
    198
    199	/*
    200	 * We cannot access the config space on some adapters.
    201	 * Otherwise, it will cause fenced PHB. We don't save
    202	 * the content in their config space and will restore
    203	 * from the initial config space saved when the EEH
    204	 * device is created.
    205	 */
    206	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
    207		return;
    208
    209	pdev = eeh_dev_to_pci_dev(edev);
    210	if (!pdev)
    211		return;
    212
    213	pci_save_state(pdev);
    214}
    215
    216static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s)
    217{
    218	struct eeh_pe *pe;
    219	struct eeh_dev *edev, *tmp;
    220
    221	eeh_for_each_pe(root, pe)
    222		eeh_pe_for_each_dev(pe, edev, tmp)
    223			if (eeh_edev_actionable(edev))
    224				edev->pdev->error_state = s;
    225}
    226
    227static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
    228{
    229	struct eeh_pe *pe;
    230	struct eeh_dev *edev, *tmp;
    231
    232	eeh_for_each_pe(root, pe) {
    233		eeh_pe_for_each_dev(pe, edev, tmp) {
    234			if (!eeh_edev_actionable(edev))
    235				continue;
    236
    237			if (!eeh_pcid_get(edev->pdev))
    238				continue;
    239
    240			if (enable)
    241				eeh_enable_irq(edev);
    242			else
    243				eeh_disable_irq(edev);
    244
    245			eeh_pcid_put(edev->pdev);
    246		}
    247	}
    248}
    249
    250typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
    251					     struct pci_dev *,
    252					     struct pci_driver *);
    253static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
    254			       enum pci_ers_result *result)
    255{
    256	struct pci_dev *pdev;
    257	struct pci_driver *driver;
    258	enum pci_ers_result new_result;
    259
    260	pci_lock_rescan_remove();
    261	pdev = edev->pdev;
    262	if (pdev)
    263		get_device(&pdev->dev);
    264	pci_unlock_rescan_remove();
    265	if (!pdev) {
    266		eeh_edev_info(edev, "no device");
    267		return;
    268	}
    269	device_lock(&pdev->dev);
    270	if (eeh_edev_actionable(edev)) {
    271		driver = eeh_pcid_get(pdev);
    272
    273		if (!driver)
    274			eeh_edev_info(edev, "no driver");
    275		else if (!driver->err_handler)
    276			eeh_edev_info(edev, "driver not EEH aware");
    277		else if (edev->mode & EEH_DEV_NO_HANDLER)
    278			eeh_edev_info(edev, "driver bound too late");
    279		else {
    280			new_result = fn(edev, pdev, driver);
    281			eeh_edev_info(edev, "%s driver reports: '%s'",
    282				      driver->name,
    283				      pci_ers_result_name(new_result));
    284			if (result)
    285				*result = pci_ers_merge_result(*result,
    286							       new_result);
    287		}
    288		if (driver)
    289			eeh_pcid_put(pdev);
    290	} else {
    291		eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
    292			      !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
    293	}
    294	device_unlock(&pdev->dev);
    295	if (edev->pdev != pdev)
    296		eeh_edev_warn(edev, "Device changed during processing!\n");
    297	put_device(&pdev->dev);
    298}
    299
    300static void eeh_pe_report(const char *name, struct eeh_pe *root,
    301			  eeh_report_fn fn, enum pci_ers_result *result)
    302{
    303	struct eeh_pe *pe;
    304	struct eeh_dev *edev, *tmp;
    305
    306	pr_info("EEH: Beginning: '%s'\n", name);
    307	eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp)
    308		eeh_pe_report_edev(edev, fn, result);
    309	if (result)
    310		pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
    311			name, pci_ers_result_name(*result));
    312	else
    313		pr_info("EEH: Finished:'%s'", name);
    314}
    315
    316/**
    317 * eeh_report_error - Report pci error to each device driver
    318 * @edev: eeh device
    319 * @driver: device's PCI driver
    320 *
    321 * Report an EEH error to each device driver.
    322 */
    323static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
    324					    struct pci_dev *pdev,
    325					    struct pci_driver *driver)
    326{
    327	enum pci_ers_result rc;
    328
    329	if (!driver->err_handler->error_detected)
    330		return PCI_ERS_RESULT_NONE;
    331
    332	eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
    333		      driver->name);
    334	rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
    335
    336	edev->in_error = true;
    337	pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
    338	return rc;
    339}
    340
    341/**
    342 * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
    343 * @edev: eeh device
    344 * @driver: device's PCI driver
    345 *
    346 * Tells each device driver that IO ports, MMIO and config space I/O
    347 * are now enabled.
    348 */
    349static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
    350						   struct pci_dev *pdev,
    351						   struct pci_driver *driver)
    352{
    353	if (!driver->err_handler->mmio_enabled)
    354		return PCI_ERS_RESULT_NONE;
    355	eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
    356	return driver->err_handler->mmio_enabled(pdev);
    357}
    358
    359/**
    360 * eeh_report_reset - Tell device that slot has been reset
    361 * @edev: eeh device
    362 * @driver: device's PCI driver
    363 *
    364 * This routine must be called while EEH tries to reset particular
    365 * PCI device so that the associated PCI device driver could take
    366 * some actions, usually to save data the driver needs so that the
    367 * driver can work again while the device is recovered.
    368 */
    369static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
    370					    struct pci_dev *pdev,
    371					    struct pci_driver *driver)
    372{
    373	if (!driver->err_handler->slot_reset || !edev->in_error)
    374		return PCI_ERS_RESULT_NONE;
    375	eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
    376	return driver->err_handler->slot_reset(pdev);
    377}
    378
    379static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
    380{
    381	struct pci_dev *pdev;
    382
    383	if (!edev)
    384		return;
    385
    386	/*
    387	 * The content in the config space isn't saved because
    388	 * the blocked config space on some adapters. We have
    389	 * to restore the initial saved config space when the
    390	 * EEH device is created.
    391	 */
    392	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
    393		if (list_is_last(&edev->entry, &edev->pe->edevs))
    394			eeh_pe_restore_bars(edev->pe);
    395
    396		return;
    397	}
    398
    399	pdev = eeh_dev_to_pci_dev(edev);
    400	if (!pdev)
    401		return;
    402
    403	pci_restore_state(pdev);
    404}
    405
    406/**
    407 * eeh_report_resume - Tell device to resume normal operations
    408 * @edev: eeh device
    409 * @driver: device's PCI driver
    410 *
    411 * This routine must be called to notify the device driver that it
    412 * could resume so that the device driver can do some initialization
    413 * to make the recovered device work again.
    414 */
    415static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
    416					     struct pci_dev *pdev,
    417					     struct pci_driver *driver)
    418{
    419	if (!driver->err_handler->resume || !edev->in_error)
    420		return PCI_ERS_RESULT_NONE;
    421
    422	eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
    423	driver->err_handler->resume(pdev);
    424
    425	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
    426#ifdef CONFIG_PCI_IOV
    427	if (eeh_ops->notify_resume)
    428		eeh_ops->notify_resume(edev);
    429#endif
    430	return PCI_ERS_RESULT_NONE;
    431}
    432
    433/**
    434 * eeh_report_failure - Tell device driver that device is dead.
    435 * @edev: eeh device
    436 * @driver: device's PCI driver
    437 *
    438 * This informs the device driver that the device is permanently
    439 * dead, and that no further recovery attempts will be made on it.
    440 */
    441static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
    442					      struct pci_dev *pdev,
    443					      struct pci_driver *driver)
    444{
    445	enum pci_ers_result rc;
    446
    447	if (!driver->err_handler->error_detected)
    448		return PCI_ERS_RESULT_NONE;
    449
    450	eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
    451		      driver->name);
    452	rc = driver->err_handler->error_detected(pdev,
    453						 pci_channel_io_perm_failure);
    454
    455	pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
    456	return rc;
    457}
    458
    459static void *eeh_add_virt_device(struct eeh_dev *edev)
    460{
    461	struct pci_driver *driver;
    462	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
    463
    464	if (!(edev->physfn)) {
    465		eeh_edev_warn(edev, "Not for VF\n");
    466		return NULL;
    467	}
    468
    469	driver = eeh_pcid_get(dev);
    470	if (driver) {
    471		if (driver->err_handler) {
    472			eeh_pcid_put(dev);
    473			return NULL;
    474		}
    475		eeh_pcid_put(dev);
    476	}
    477
    478#ifdef CONFIG_PCI_IOV
    479	pci_iov_add_virtfn(edev->physfn, edev->vf_index);
    480#endif
    481	return NULL;
    482}
    483
    484static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
    485{
    486	struct pci_driver *driver;
    487	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
    488	struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
    489
    490	/*
    491	 * Actually, we should remove the PCI bridges as well.
    492	 * However, that's lots of complexity to do that,
    493	 * particularly some of devices under the bridge might
    494	 * support EEH. So we just care about PCI devices for
    495	 * simplicity here.
    496	 */
    497	if (!eeh_edev_actionable(edev) ||
    498	    (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
    499		return;
    500
    501	if (rmv_data) {
    502		driver = eeh_pcid_get(dev);
    503		if (driver) {
    504			if (driver->err_handler &&
    505			    driver->err_handler->error_detected &&
    506			    driver->err_handler->slot_reset) {
    507				eeh_pcid_put(dev);
    508				return;
    509			}
    510			eeh_pcid_put(dev);
    511		}
    512	}
    513
    514	/* Remove it from PCI subsystem */
    515	pr_info("EEH: Removing %s without EEH sensitive driver\n",
    516		pci_name(dev));
    517	edev->mode |= EEH_DEV_DISCONNECTED;
    518	if (rmv_data)
    519		rmv_data->removed_dev_count++;
    520
    521	if (edev->physfn) {
    522#ifdef CONFIG_PCI_IOV
    523		pci_iov_remove_virtfn(edev->physfn, edev->vf_index);
    524		edev->pdev = NULL;
    525#endif
    526		if (rmv_data)
    527			list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
    528	} else {
    529		pci_lock_rescan_remove();
    530		pci_stop_and_remove_bus_device(dev);
    531		pci_unlock_rescan_remove();
    532	}
    533}
    534
    535static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
    536{
    537	struct eeh_dev *edev, *tmp;
    538
    539	eeh_pe_for_each_dev(pe, edev, tmp) {
    540		if (!(edev->mode & EEH_DEV_DISCONNECTED))
    541			continue;
    542
    543		edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
    544		eeh_pe_tree_remove(edev);
    545	}
    546
    547	return NULL;
    548}
    549
    550/*
    551 * Explicitly clear PE's frozen state for PowerNV where
    552 * we have frozen PE until BAR restore is completed. It's
    553 * harmless to clear it for pSeries. To be consistent with
    554 * PE reset (for 3 times), we try to clear the frozen state
    555 * for 3 times as well.
    556 */
    557static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed)
    558{
    559	struct eeh_pe *pe;
    560	int i;
    561
    562	eeh_for_each_pe(root, pe) {
    563		if (include_passed || !eeh_pe_passed(pe)) {
    564			for (i = 0; i < 3; i++)
    565				if (!eeh_unfreeze_pe(pe))
    566					break;
    567			if (i >= 3)
    568				return -EIO;
    569		}
    570	}
    571	eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed);
    572	return 0;
    573}
    574
    575int eeh_pe_reset_and_recover(struct eeh_pe *pe)
    576{
    577	int ret;
    578
    579	/* Bail if the PE is being recovered */
    580	if (pe->state & EEH_PE_RECOVERING)
    581		return 0;
    582
    583	/* Put the PE into recovery mode */
    584	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
    585
    586	/* Save states */
    587	eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
    588
    589	/* Issue reset */
    590	ret = eeh_pe_reset_full(pe, true);
    591	if (ret) {
    592		eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
    593		return ret;
    594	}
    595
    596	/* Unfreeze the PE */
    597	ret = eeh_clear_pe_frozen_state(pe, true);
    598	if (ret) {
    599		eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
    600		return ret;
    601	}
    602
    603	/* Restore device state */
    604	eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
    605
    606	/* Clear recovery mode */
    607	eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
    608
    609	return 0;
    610}
    611
    612/**
    613 * eeh_reset_device - Perform actual reset of a pci slot
    614 * @driver_eeh_aware: Does the device's driver provide EEH support?
    615 * @pe: EEH PE
    616 * @bus: PCI bus corresponding to the isolcated slot
    617 * @rmv_data: Optional, list to record removed devices
    618 *
    619 * This routine must be called to do reset on the indicated PE.
    620 * During the reset, udev might be invoked because those affected
    621 * PCI devices will be removed and then added.
    622 */
    623static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
    624			    struct eeh_rmv_data *rmv_data,
    625			    bool driver_eeh_aware)
    626{
    627	time64_t tstamp;
    628	int cnt, rc;
    629	struct eeh_dev *edev;
    630	struct eeh_pe *tmp_pe;
    631	bool any_passed = false;
    632
    633	eeh_for_each_pe(pe, tmp_pe)
    634		any_passed |= eeh_pe_passed(tmp_pe);
    635
    636	/* pcibios will clear the counter; save the value */
    637	cnt = pe->freeze_count;
    638	tstamp = pe->tstamp;
    639
    640	/*
    641	 * We don't remove the corresponding PE instances because
    642	 * we need the information afterwords. The attached EEH
    643	 * devices are expected to be attached soon when calling
    644	 * into pci_hp_add_devices().
    645	 */
    646	eeh_pe_state_mark(pe, EEH_PE_KEEP);
    647	if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
    648		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
    649	} else {
    650		pci_lock_rescan_remove();
    651		pci_hp_remove_devices(bus);
    652		pci_unlock_rescan_remove();
    653	}
    654
    655	/*
    656	 * Reset the pci controller. (Asserts RST#; resets config space).
    657	 * Reconfigure bridges and devices. Don't try to bring the system
    658	 * up if the reset failed for some reason.
    659	 *
    660	 * During the reset, it's very dangerous to have uncontrolled PCI
    661	 * config accesses. So we prefer to block them. However, controlled
    662	 * PCI config accesses initiated from EEH itself are allowed.
    663	 */
    664	rc = eeh_pe_reset_full(pe, false);
    665	if (rc)
    666		return rc;
    667
    668	pci_lock_rescan_remove();
    669
    670	/* Restore PE */
    671	eeh_ops->configure_bridge(pe);
    672	eeh_pe_restore_bars(pe);
    673
    674	/* Clear frozen state */
    675	rc = eeh_clear_pe_frozen_state(pe, false);
    676	if (rc) {
    677		pci_unlock_rescan_remove();
    678		return rc;
    679	}
    680
    681	/* Give the system 5 seconds to finish running the user-space
    682	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
    683	 * this is a hack, but if we don't do this, and try to bring
    684	 * the device up before the scripts have taken it down,
    685	 * potentially weird things happen.
    686	 */
    687	if (!driver_eeh_aware || rmv_data->removed_dev_count) {
    688		pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
    689			(driver_eeh_aware ? "partial" : "complete"));
    690		ssleep(5);
    691
    692		/*
    693		 * The EEH device is still connected with its parent
    694		 * PE. We should disconnect it so the binding can be
    695		 * rebuilt when adding PCI devices.
    696		 */
    697		edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
    698		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
    699		if (pe->type & EEH_PE_VF) {
    700			eeh_add_virt_device(edev);
    701		} else {
    702			if (!driver_eeh_aware)
    703				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
    704			pci_hp_add_devices(bus);
    705		}
    706	}
    707	eeh_pe_state_clear(pe, EEH_PE_KEEP, true);
    708
    709	pe->tstamp = tstamp;
    710	pe->freeze_count = cnt;
    711
    712	pci_unlock_rescan_remove();
    713	return 0;
    714}
    715
    716/* The longest amount of time to wait for a pci device
    717 * to come back on line, in seconds.
    718 */
    719#define MAX_WAIT_FOR_RECOVERY 300
    720
    721
    722/* Walks the PE tree after processing an event to remove any stale PEs.
    723 *
    724 * NB: This needs to be recursive to ensure the leaf PEs get removed
    725 * before their parents do. Although this is possible to do recursively
    726 * we don't since this is easier to read and we need to garantee
    727 * the leaf nodes will be handled first.
    728 */
    729static void eeh_pe_cleanup(struct eeh_pe *pe)
    730{
    731	struct eeh_pe *child_pe, *tmp;
    732
    733	list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
    734		eeh_pe_cleanup(child_pe);
    735
    736	if (pe->state & EEH_PE_KEEP)
    737		return;
    738
    739	if (!(pe->state & EEH_PE_INVALID))
    740		return;
    741
    742	if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
    743		list_del(&pe->child);
    744		kfree(pe);
    745	}
    746}
    747
    748/**
    749 * eeh_check_slot_presence - Check if a device is still present in a slot
    750 * @pdev: pci_dev to check
    751 *
    752 * This function may return a false positive if we can't determine the slot's
    753 * presence state. This might happen for for PCIe slots if the PE containing
    754 * the upstream bridge is also frozen, or the bridge is part of the same PE
    755 * as the device.
    756 *
    757 * This shouldn't happen often, but you might see it if you hotplug a PCIe
    758 * switch.
    759 */
    760static bool eeh_slot_presence_check(struct pci_dev *pdev)
    761{
    762	const struct hotplug_slot_ops *ops;
    763	struct pci_slot *slot;
    764	u8 state;
    765	int rc;
    766
    767	if (!pdev)
    768		return false;
    769
    770	if (pdev->error_state == pci_channel_io_perm_failure)
    771		return false;
    772
    773	slot = pdev->slot;
    774	if (!slot || !slot->hotplug)
    775		return true;
    776
    777	ops = slot->hotplug->ops;
    778	if (!ops || !ops->get_adapter_status)
    779		return true;
    780
    781	/* set the attention indicator while we've got the slot ops */
    782	if (ops->set_attention_status)
    783		ops->set_attention_status(slot->hotplug, 1);
    784
    785	rc = ops->get_adapter_status(slot->hotplug, &state);
    786	if (rc)
    787		return true;
    788
    789	return !!state;
    790}
    791
    792static void eeh_clear_slot_attention(struct pci_dev *pdev)
    793{
    794	const struct hotplug_slot_ops *ops;
    795	struct pci_slot *slot;
    796
    797	if (!pdev)
    798		return;
    799
    800	if (pdev->error_state == pci_channel_io_perm_failure)
    801		return;
    802
    803	slot = pdev->slot;
    804	if (!slot || !slot->hotplug)
    805		return;
    806
    807	ops = slot->hotplug->ops;
    808	if (!ops || !ops->set_attention_status)
    809		return;
    810
    811	ops->set_attention_status(slot->hotplug, 0);
    812}
    813
    814/**
    815 * eeh_handle_normal_event - Handle EEH events on a specific PE
    816 * @pe: EEH PE - which should not be used after we return, as it may
    817 * have been invalidated.
    818 *
    819 * Attempts to recover the given PE.  If recovery fails or the PE has failed
    820 * too many times, remove the PE.
    821 *
    822 * While PHB detects address or data parity errors on particular PCI
    823 * slot, the associated PE will be frozen. Besides, DMA's occurring
    824 * to wild addresses (which usually happen due to bugs in device
    825 * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
    826 * #PERR or other misc PCI-related errors also can trigger EEH errors.
    827 *
    828 * Recovery process consists of unplugging the device driver (which
    829 * generated hotplug events to userspace), then issuing a PCI #RST to
    830 * the device, then reconfiguring the PCI config space for all bridges
    831 * & devices under this slot, and then finally restarting the device
    832 * drivers (which cause a second set of hotplug events to go out to
    833 * userspace).
    834 */
    835void eeh_handle_normal_event(struct eeh_pe *pe)
    836{
    837	struct pci_bus *bus;
    838	struct eeh_dev *edev, *tmp;
    839	struct eeh_pe *tmp_pe;
    840	int rc = 0;
    841	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
    842	struct eeh_rmv_data rmv_data =
    843		{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
    844	int devices = 0;
    845
    846	bus = eeh_pe_bus_get(pe);
    847	if (!bus) {
    848		pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
    849			__func__, pe->phb->global_number, pe->addr);
    850		return;
    851	}
    852
    853	/*
    854	 * When devices are hot-removed we might get an EEH due to
    855	 * a driver attempting to touch the MMIO space of a removed
    856	 * device. In this case we don't have a device to recover
    857	 * so suppress the event if we can't find any present devices.
    858	 *
    859	 * The hotplug driver should take care of tearing down the
    860	 * device itself.
    861	 */
    862	eeh_for_each_pe(pe, tmp_pe)
    863		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
    864			if (eeh_slot_presence_check(edev->pdev))
    865				devices++;
    866
    867	if (!devices) {
    868		pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
    869			pe->phb->global_number, pe->addr);
    870		goto out; /* nothing to recover */
    871	}
    872
    873	/* Log the event */
    874	if (pe->type & EEH_PE_PHB) {
    875		pr_err("EEH: Recovering PHB#%x, location: %s\n",
    876			pe->phb->global_number, eeh_pe_loc_get(pe));
    877	} else {
    878		struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
    879
    880		pr_err("EEH: Recovering PHB#%x-PE#%x\n",
    881		       pe->phb->global_number, pe->addr);
    882		pr_err("EEH: PE location: %s, PHB location: %s\n",
    883		       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
    884	}
    885
    886#ifdef CONFIG_STACKTRACE
    887	/*
    888	 * Print the saved stack trace now that we've verified there's
    889	 * something to recover.
    890	 */
    891	if (pe->trace_entries) {
    892		void **ptrs = (void **) pe->stack_trace;
    893		int i;
    894
    895		pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
    896		       pe->phb->global_number, pe->addr);
    897
    898		/* FIXME: Use the same format as dump_stack() */
    899		pr_err("EEH: Call Trace:\n");
    900		for (i = 0; i < pe->trace_entries; i++)
    901			pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
    902
    903		pe->trace_entries = 0;
    904	}
    905#endif /* CONFIG_STACKTRACE */
    906
    907	eeh_for_each_pe(pe, tmp_pe)
    908		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
    909			edev->mode &= ~EEH_DEV_NO_HANDLER;
    910
    911	eeh_pe_update_time_stamp(pe);
    912	pe->freeze_count++;
    913	if (pe->freeze_count > eeh_max_freezes) {
    914		pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
    915		       pe->phb->global_number, pe->addr,
    916		       pe->freeze_count);
    917
    918		goto recover_failed;
    919	}
    920
    921	/* Walk the various device drivers attached to this slot through
    922	 * a reset sequence, giving each an opportunity to do what it needs
    923	 * to accomplish the reset.  Each child gets a report of the
    924	 * status ... if any child can't handle the reset, then the entire
    925	 * slot is dlpar removed and added.
    926	 *
    927	 * When the PHB is fenced, we have to issue a reset to recover from
    928	 * the error. Override the result if necessary to have partially
    929	 * hotplug for this case.
    930	 */
    931	pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
    932		pe->freeze_count, eeh_max_freezes);
    933	pr_info("EEH: Notify device drivers to shutdown\n");
    934	eeh_set_channel_state(pe, pci_channel_io_frozen);
    935	eeh_set_irq_state(pe, false);
    936	eeh_pe_report("error_detected(IO frozen)", pe,
    937		      eeh_report_error, &result);
    938	if (result == PCI_ERS_RESULT_DISCONNECT)
    939		goto recover_failed;
    940
    941	/*
    942	 * Error logged on a PHB are always fences which need a full
    943	 * PHB reset to clear so force that to happen.
    944	 */
    945	if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE)
    946		result = PCI_ERS_RESULT_NEED_RESET;
    947
    948	/* Get the current PCI slot state. This can take a long time,
    949	 * sometimes over 300 seconds for certain systems.
    950	 */
    951	rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000);
    952	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
    953		pr_warn("EEH: Permanent failure\n");
    954		goto recover_failed;
    955	}
    956
    957	/* Since rtas may enable MMIO when posting the error log,
    958	 * don't post the error log until after all dev drivers
    959	 * have been informed.
    960	 */
    961	pr_info("EEH: Collect temporary log\n");
    962	eeh_slot_error_detail(pe, EEH_LOG_TEMP);
    963
    964	/* If all device drivers were EEH-unaware, then shut
    965	 * down all of the device drivers, and hope they
    966	 * go down willingly, without panicing the system.
    967	 */
    968	if (result == PCI_ERS_RESULT_NONE) {
    969		pr_info("EEH: Reset with hotplug activity\n");
    970		rc = eeh_reset_device(pe, bus, NULL, false);
    971		if (rc) {
    972			pr_warn("%s: Unable to reset, err=%d\n", __func__, rc);
    973			goto recover_failed;
    974		}
    975	}
    976
    977	/* If all devices reported they can proceed, then re-enable MMIO */
    978	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
    979		pr_info("EEH: Enable I/O for affected devices\n");
    980		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
    981		if (rc < 0)
    982			goto recover_failed;
    983
    984		if (rc) {
    985			result = PCI_ERS_RESULT_NEED_RESET;
    986		} else {
    987			pr_info("EEH: Notify device drivers to resume I/O\n");
    988			eeh_pe_report("mmio_enabled", pe,
    989				      eeh_report_mmio_enabled, &result);
    990		}
    991	}
    992	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
    993		pr_info("EEH: Enabled DMA for affected devices\n");
    994		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
    995		if (rc < 0)
    996			goto recover_failed;
    997
    998		if (rc) {
    999			result = PCI_ERS_RESULT_NEED_RESET;
   1000		} else {
   1001			/*
   1002			 * We didn't do PE reset for the case. The PE
   1003			 * is still in frozen state. Clear it before
   1004			 * resuming the PE.
   1005			 */
   1006			eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
   1007			result = PCI_ERS_RESULT_RECOVERED;
   1008		}
   1009	}
   1010
   1011	/* If any device called out for a reset, then reset the slot */
   1012	if (result == PCI_ERS_RESULT_NEED_RESET) {
   1013		pr_info("EEH: Reset without hotplug activity\n");
   1014		rc = eeh_reset_device(pe, bus, &rmv_data, true);
   1015		if (rc) {
   1016			pr_warn("%s: Cannot reset, err=%d\n", __func__, rc);
   1017			goto recover_failed;
   1018		}
   1019
   1020		result = PCI_ERS_RESULT_NONE;
   1021		eeh_set_channel_state(pe, pci_channel_io_normal);
   1022		eeh_set_irq_state(pe, true);
   1023		eeh_pe_report("slot_reset", pe, eeh_report_reset,
   1024			      &result);
   1025	}
   1026
   1027	if ((result == PCI_ERS_RESULT_RECOVERED) ||
   1028	    (result == PCI_ERS_RESULT_NONE)) {
   1029		/*
   1030		 * For those hot removed VFs, we should add back them after PF
   1031		 * get recovered properly.
   1032		 */
   1033		list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
   1034					 rmv_entry) {
   1035			eeh_add_virt_device(edev);
   1036			list_del(&edev->rmv_entry);
   1037		}
   1038
   1039		/* Tell all device drivers that they can resume operations */
   1040		pr_info("EEH: Notify device driver to resume\n");
   1041		eeh_set_channel_state(pe, pci_channel_io_normal);
   1042		eeh_set_irq_state(pe, true);
   1043		eeh_pe_report("resume", pe, eeh_report_resume, NULL);
   1044		eeh_for_each_pe(pe, tmp_pe) {
   1045			eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
   1046				edev->mode &= ~EEH_DEV_NO_HANDLER;
   1047				edev->in_error = false;
   1048			}
   1049		}
   1050
   1051		pr_info("EEH: Recovery successful.\n");
   1052		goto out;
   1053	}
   1054
   1055recover_failed:
   1056	/*
   1057	 * About 90% of all real-life EEH failures in the field
   1058	 * are due to poorly seated PCI cards. Only 10% or so are
   1059	 * due to actual, failed cards.
   1060	 */
   1061	pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
   1062		"Please try reseating or replacing it\n",
   1063		pe->phb->global_number, pe->addr);
   1064
   1065	eeh_slot_error_detail(pe, EEH_LOG_PERM);
   1066
   1067	/* Notify all devices that they're about to go down. */
   1068	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
   1069	eeh_set_irq_state(pe, false);
   1070	eeh_pe_report("error_detected(permanent failure)", pe,
   1071		      eeh_report_failure, NULL);
   1072
   1073	/* Mark the PE to be removed permanently */
   1074	eeh_pe_state_mark(pe, EEH_PE_REMOVED);
   1075
   1076	/*
   1077	 * Shut down the device drivers for good. We mark
   1078	 * all removed devices correctly to avoid access
   1079	 * the their PCI config any more.
   1080	 */
   1081	if (pe->type & EEH_PE_VF) {
   1082		eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
   1083		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
   1084	} else {
   1085		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
   1086		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
   1087
   1088		pci_lock_rescan_remove();
   1089		pci_hp_remove_devices(bus);
   1090		pci_unlock_rescan_remove();
   1091		/* The passed PE should no longer be used */
   1092		return;
   1093	}
   1094
   1095out:
   1096	/*
   1097	 * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
   1098	 * we don't want to modify the PE tree structure so we do it here.
   1099	 */
   1100	eeh_pe_cleanup(pe);
   1101
   1102	/* clear the slot attention LED for all recovered devices */
   1103	eeh_for_each_pe(pe, tmp_pe)
   1104		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
   1105			eeh_clear_slot_attention(edev->pdev);
   1106
   1107	eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
   1108}
   1109
   1110/**
   1111 * eeh_handle_special_event - Handle EEH events without a specific failing PE
   1112 *
   1113 * Called when an EEH event is detected but can't be narrowed down to a
   1114 * specific PE.  Iterates through possible failures and handles them as
   1115 * necessary.
   1116 */
   1117void eeh_handle_special_event(void)
   1118{
   1119	struct eeh_pe *pe, *phb_pe, *tmp_pe;
   1120	struct eeh_dev *edev, *tmp_edev;
   1121	struct pci_bus *bus;
   1122	struct pci_controller *hose;
   1123	unsigned long flags;
   1124	int rc;
   1125
   1126
   1127	do {
   1128		rc = eeh_ops->next_error(&pe);
   1129
   1130		switch (rc) {
   1131		case EEH_NEXT_ERR_DEAD_IOC:
   1132			/* Mark all PHBs in dead state */
   1133			eeh_serialize_lock(&flags);
   1134
   1135			/* Purge all events */
   1136			eeh_remove_event(NULL, true);
   1137
   1138			list_for_each_entry(hose, &hose_list, list_node) {
   1139				phb_pe = eeh_phb_pe_get(hose);
   1140				if (!phb_pe) continue;
   1141
   1142				eeh_pe_mark_isolated(phb_pe);
   1143			}
   1144
   1145			eeh_serialize_unlock(flags);
   1146
   1147			break;
   1148		case EEH_NEXT_ERR_FROZEN_PE:
   1149		case EEH_NEXT_ERR_FENCED_PHB:
   1150		case EEH_NEXT_ERR_DEAD_PHB:
   1151			/* Mark the PE in fenced state */
   1152			eeh_serialize_lock(&flags);
   1153
   1154			/* Purge all events of the PHB */
   1155			eeh_remove_event(pe, true);
   1156
   1157			if (rc != EEH_NEXT_ERR_DEAD_PHB)
   1158				eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
   1159			eeh_pe_mark_isolated(pe);
   1160
   1161			eeh_serialize_unlock(flags);
   1162
   1163			break;
   1164		case EEH_NEXT_ERR_NONE:
   1165			return;
   1166		default:
   1167			pr_warn("%s: Invalid value %d from next_error()\n",
   1168				__func__, rc);
   1169			return;
   1170		}
   1171
   1172		/*
   1173		 * For fenced PHB and frozen PE, it's handled as normal
   1174		 * event. We have to remove the affected PHBs for dead
   1175		 * PHB and IOC
   1176		 */
   1177		if (rc == EEH_NEXT_ERR_FROZEN_PE ||
   1178		    rc == EEH_NEXT_ERR_FENCED_PHB) {
   1179			eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
   1180			eeh_handle_normal_event(pe);
   1181		} else {
   1182			eeh_for_each_pe(pe, tmp_pe)
   1183				eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
   1184					edev->mode &= ~EEH_DEV_NO_HANDLER;
   1185
   1186			/* Notify all devices to be down */
   1187			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
   1188			eeh_set_channel_state(pe, pci_channel_io_perm_failure);
   1189			eeh_pe_report(
   1190				"error_detected(permanent failure)", pe,
   1191				eeh_report_failure, NULL);
   1192
   1193			pci_lock_rescan_remove();
   1194			list_for_each_entry(hose, &hose_list, list_node) {
   1195				phb_pe = eeh_phb_pe_get(hose);
   1196				if (!phb_pe ||
   1197				    !(phb_pe->state & EEH_PE_ISOLATED) ||
   1198				    (phb_pe->state & EEH_PE_RECOVERING))
   1199					continue;
   1200
   1201				bus = eeh_pe_bus_get(phb_pe);
   1202				if (!bus) {
   1203					pr_err("%s: Cannot find PCI bus for "
   1204					       "PHB#%x-PE#%x\n",
   1205					       __func__,
   1206					       pe->phb->global_number,
   1207					       pe->addr);
   1208					break;
   1209				}
   1210				pci_hp_remove_devices(bus);
   1211			}
   1212			pci_unlock_rescan_remove();
   1213		}
   1214
   1215		/*
   1216		 * If we have detected dead IOC, we needn't proceed
   1217		 * any more since all PHBs would have been removed
   1218		 */
   1219		if (rc == EEH_NEXT_ERR_DEAD_IOC)
   1220			break;
   1221	} while (rc != EEH_NEXT_ERR_NONE);
   1222}