eeh-powernv.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
eeh-powernv.c (44576B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * PowerNV Platform dependent EEH operations
      4 *
      5 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
      6 */
      7
      8#include <linux/atomic.h>
      9#include <linux/debugfs.h>
     10#include <linux/delay.h>
     11#include <linux/export.h>
     12#include <linux/init.h>
     13#include <linux/interrupt.h>
     14#include <linux/irqdomain.h>
     15#include <linux/list.h>
     16#include <linux/msi.h>
     17#include <linux/of.h>
     18#include <linux/pci.h>
     19#include <linux/proc_fs.h>
     20#include <linux/rbtree.h>
     21#include <linux/sched.h>
     22#include <linux/seq_file.h>
     23#include <linux/spinlock.h>
     24
     25#include <asm/eeh.h>
     26#include <asm/eeh_event.h>
     27#include <asm/firmware.h>
     28#include <asm/io.h>
     29#include <asm/iommu.h>
     30#include <asm/machdep.h>
     31#include <asm/msi_bitmap.h>
     32#include <asm/opal.h>
     33#include <asm/ppc-pci.h>
     34#include <asm/pnv-pci.h>
     35
     36#include "powernv.h"
     37#include "pci.h"
     38#include "../../../../drivers/pci/pci.h"
     39
     40static int eeh_event_irq = -EINVAL;
     41
     42static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
     43{
     44	dev_dbg(&pdev->dev, "EEH: Setting up device\n");
     45	eeh_probe_device(pdev);
     46}
     47
     48static irqreturn_t pnv_eeh_event(int irq, void *data)
     49{
     50	/*
     51	 * We simply send a special EEH event if EEH has been
     52	 * enabled. We don't care about EEH events until we've
     53	 * finished processing the outstanding ones. Event processing
     54	 * gets unmasked in next_error() if EEH is enabled.
     55	 */
     56	disable_irq_nosync(irq);
     57
     58	if (eeh_enabled())
     59		eeh_send_failure_event(NULL);
     60
     61	return IRQ_HANDLED;
     62}
     63
     64#ifdef CONFIG_DEBUG_FS
     65static ssize_t pnv_eeh_ei_write(struct file *filp,
     66				const char __user *user_buf,
     67				size_t count, loff_t *ppos)
     68{
     69	struct pci_controller *hose = filp->private_data;
     70	struct eeh_pe *pe;
     71	int pe_no, type, func;
     72	unsigned long addr, mask;
     73	char buf[50];
     74	int ret;
     75
     76	if (!eeh_ops || !eeh_ops->err_inject)
     77		return -ENXIO;
     78
     79	/* Copy over argument buffer */
     80	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
     81	if (!ret)
     82		return -EFAULT;
     83
     84	/* Retrieve parameters */
     85	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
     86		     &pe_no, &type, &func, &addr, &mask);
     87	if (ret != 5)
     88		return -EINVAL;
     89
     90	/* Retrieve PE */
     91	pe = eeh_pe_get(hose, pe_no);
     92	if (!pe)
     93		return -ENODEV;
     94
     95	/* Do error injection */
     96	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
     97	return ret < 0 ? ret : count;
     98}
     99
    100static const struct file_operations pnv_eeh_ei_fops = {
    101	.open	= simple_open,
    102	.llseek	= no_llseek,
    103	.write	= pnv_eeh_ei_write,
    104};
    105
    106static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
    107{
    108	struct pci_controller *hose = data;
    109	struct pnv_phb *phb = hose->private_data;
    110
    111	out_be64(phb->regs + offset, val);
    112	return 0;
    113}
    114
    115static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
    116{
    117	struct pci_controller *hose = data;
    118	struct pnv_phb *phb = hose->private_data;
    119
    120	*val = in_be64(phb->regs + offset);
    121	return 0;
    122}
    123
    124#define PNV_EEH_DBGFS_ENTRY(name, reg)				\
    125static int pnv_eeh_dbgfs_set_##name(void *data, u64 val)	\
    126{								\
    127	return pnv_eeh_dbgfs_set(data, reg, val);		\
    128}								\
    129								\
    130static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val)	\
    131{								\
    132	return pnv_eeh_dbgfs_get(data, reg, val);		\
    133}								\
    134								\
    135DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name,		\
    136			pnv_eeh_dbgfs_get_##name,		\
    137                        pnv_eeh_dbgfs_set_##name,		\
    138			"0x%llx\n")
    139
    140PNV_EEH_DBGFS_ENTRY(outb, 0xD10);
    141PNV_EEH_DBGFS_ENTRY(inbA, 0xD90);
    142PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
    143
    144#endif /* CONFIG_DEBUG_FS */
    145
    146static void pnv_eeh_enable_phbs(void)
    147{
    148	struct pci_controller *hose;
    149	struct pnv_phb *phb;
    150
    151	list_for_each_entry(hose, &hose_list, list_node) {
    152		phb = hose->private_data;
    153		/*
    154		 * If EEH is enabled, we're going to rely on that.
    155		 * Otherwise, we restore to conventional mechanism
    156		 * to clear frozen PE during PCI config access.
    157		 */
    158		if (eeh_enabled())
    159			phb->flags |= PNV_PHB_FLAG_EEH;
    160		else
    161			phb->flags &= ~PNV_PHB_FLAG_EEH;
    162	}
    163}
    164
    165/**
    166 * pnv_eeh_post_init - EEH platform dependent post initialization
    167 *
    168 * EEH platform dependent post initialization on powernv. When
    169 * the function is called, the EEH PEs and devices should have
    170 * been built. If the I/O cache staff has been built, EEH is
    171 * ready to supply service.
    172 */
    173int pnv_eeh_post_init(void)
    174{
    175	struct pci_controller *hose;
    176	struct pnv_phb *phb;
    177	int ret = 0;
    178
    179	eeh_show_enabled();
    180
    181	/* Register OPAL event notifier */
    182	eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
    183	if (eeh_event_irq < 0) {
    184		pr_err("%s: Can't register OPAL event interrupt (%d)\n",
    185		       __func__, eeh_event_irq);
    186		return eeh_event_irq;
    187	}
    188
    189	ret = request_irq(eeh_event_irq, pnv_eeh_event,
    190			  IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
    191	if (ret < 0) {
    192		irq_dispose_mapping(eeh_event_irq);
    193		pr_err("%s: Can't request OPAL event interrupt (%d)\n",
    194		       __func__, eeh_event_irq);
    195		return ret;
    196	}
    197
    198	if (!eeh_enabled())
    199		disable_irq(eeh_event_irq);
    200
    201	pnv_eeh_enable_phbs();
    202
    203	list_for_each_entry(hose, &hose_list, list_node) {
    204		phb = hose->private_data;
    205
    206		/* Create debugfs entries */
    207#ifdef CONFIG_DEBUG_FS
    208		if (phb->has_dbgfs || !phb->dbgfs)
    209			continue;
    210
    211		phb->has_dbgfs = 1;
    212		debugfs_create_file("err_injct", 0200,
    213				    phb->dbgfs, hose,
    214				    &pnv_eeh_ei_fops);
    215
    216		debugfs_create_file("err_injct_outbound", 0600,
    217				    phb->dbgfs, hose,
    218				    &pnv_eeh_dbgfs_ops_outb);
    219		debugfs_create_file("err_injct_inboundA", 0600,
    220				    phb->dbgfs, hose,
    221				    &pnv_eeh_dbgfs_ops_inbA);
    222		debugfs_create_file("err_injct_inboundB", 0600,
    223				    phb->dbgfs, hose,
    224				    &pnv_eeh_dbgfs_ops_inbB);
    225#endif /* CONFIG_DEBUG_FS */
    226	}
    227
    228	return ret;
    229}
    230
    231static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
    232{
    233	int pos = PCI_CAPABILITY_LIST;
    234	int cnt = 48;   /* Maximal number of capabilities */
    235	u32 status, id;
    236
    237	if (!pdn)
    238		return 0;
    239
    240	/* Check if the device supports capabilities */
    241	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
    242	if (!(status & PCI_STATUS_CAP_LIST))
    243		return 0;
    244
    245	while (cnt--) {
    246		pnv_pci_cfg_read(pdn, pos, 1, &pos);
    247		if (pos < 0x40)
    248			break;
    249
    250		pos &= ~3;
    251		pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
    252		if (id == 0xff)
    253			break;
    254
    255		/* Found */
    256		if (id == cap)
    257			return pos;
    258
    259		/* Next one */
    260		pos += PCI_CAP_LIST_NEXT;
    261	}
    262
    263	return 0;
    264}
    265
    266static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
    267{
    268	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
    269	u32 header;
    270	int pos = 256, ttl = (4096 - 256) / 8;
    271
    272	if (!edev || !edev->pcie_cap)
    273		return 0;
    274	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
    275		return 0;
    276	else if (!header)
    277		return 0;
    278
    279	while (ttl-- > 0) {
    280		if (PCI_EXT_CAP_ID(header) == cap && pos)
    281			return pos;
    282
    283		pos = PCI_EXT_CAP_NEXT(header);
    284		if (pos < 256)
    285			break;
    286
    287		if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
    288			break;
    289	}
    290
    291	return 0;
    292}
    293
    294static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
    295{
    296	struct pci_controller *hose = pdev->bus->sysdata;
    297	struct pnv_phb *phb = hose->private_data;
    298	struct pci_dev *parent = pdev->bus->self;
    299
    300#ifdef CONFIG_PCI_IOV
    301	/* for VFs we use the PF's PE as the upstream PE */
    302	if (pdev->is_virtfn)
    303		parent = pdev->physfn;
    304#endif
    305
    306	/* otherwise use the PE of our parent bridge */
    307	if (parent) {
    308		struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
    309
    310		return eeh_pe_get(phb->hose, ioda_pe->pe_number);
    311	}
    312
    313	return NULL;
    314}
    315
    316/**
    317 * pnv_eeh_probe - Do probe on PCI device
    318 * @pdev: pci_dev to probe
    319 *
    320 * Create, or find the existing, eeh_dev for this pci_dev.
    321 */
    322static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
    323{
    324	struct pci_dn *pdn = pci_get_pdn(pdev);
    325	struct pci_controller *hose = pdn->phb;
    326	struct pnv_phb *phb = hose->private_data;
    327	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
    328	struct eeh_pe *upstream_pe;
    329	uint32_t pcie_flags;
    330	int ret;
    331	int config_addr = (pdn->busno << 8) | (pdn->devfn);
    332
    333	/*
    334	 * When probing the root bridge, which doesn't have any
    335	 * subordinate PCI devices. We don't have OF node for
    336	 * the root bridge. So it's not reasonable to continue
    337	 * the probing.
    338	 */
    339	if (!edev || edev->pe)
    340		return NULL;
    341
    342	/* already configured? */
    343	if (edev->pdev) {
    344		pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n",
    345			__func__, hose->global_number, config_addr >> 8,
    346			PCI_SLOT(config_addr), PCI_FUNC(config_addr));
    347		return edev;
    348	}
    349
    350	/* Skip for PCI-ISA bridge */
    351	if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
    352		return NULL;
    353
    354	eeh_edev_dbg(edev, "Probing device\n");
    355
    356	/* Initialize eeh device */
    357	edev->mode	&= 0xFFFFFF00;
    358	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
    359	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
    360	edev->af_cap   = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
    361	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
    362	if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
    363		edev->mode |= EEH_DEV_BRIDGE;
    364		if (edev->pcie_cap) {
    365			pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
    366					 2, &pcie_flags);
    367			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
    368			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
    369				edev->mode |= EEH_DEV_ROOT_PORT;
    370			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
    371				edev->mode |= EEH_DEV_DS_PORT;
    372		}
    373	}
    374
    375	edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
    376
    377	upstream_pe = pnv_eeh_get_upstream_pe(pdev);
    378
    379	/* Create PE */
    380	ret = eeh_pe_tree_insert(edev, upstream_pe);
    381	if (ret) {
    382		eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
    383		return NULL;
    384	}
    385
    386	/*
    387	 * If the PE contains any one of following adapters, the
    388	 * PCI config space can't be accessed when dumping EEH log.
    389	 * Otherwise, we will run into fenced PHB caused by shortage
    390	 * of outbound credits in the adapter. The PCI config access
    391	 * should be blocked until PE reset. MMIO access is dropped
    392	 * by hardware certainly. In order to drop PCI config requests,
    393	 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
    394	 * will be checked in the backend for PE state retrieval. If
    395	 * the PE becomes frozen for the first time and the flag has
    396	 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
    397	 * that PE to block its config space.
    398	 *
    399	 * Broadcom BCM5718 2-ports NICs (14e4:1656)
    400	 * Broadcom Austin 4-ports NICs (14e4:1657)
    401	 * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
    402	 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
    403	 */
    404	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
    405	     pdn->device_id == 0x1656) ||
    406	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
    407	     pdn->device_id == 0x1657) ||
    408	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
    409	     pdn->device_id == 0x168a) ||
    410	    (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
    411	     pdn->device_id == 0x168e))
    412		edev->pe->state |= EEH_PE_CFG_RESTRICTED;
    413
    414	/*
    415	 * Cache the PE primary bus, which can't be fetched when
    416	 * full hotplug is in progress. In that case, all child
    417	 * PCI devices of the PE are expected to be removed prior
    418	 * to PE reset.
    419	 */
    420	if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
    421		edev->pe->bus = pci_find_bus(hose->global_number,
    422					     pdn->busno);
    423		if (edev->pe->bus)
    424			edev->pe->state |= EEH_PE_PRI_BUS;
    425	}
    426
    427	/*
    428	 * Enable EEH explicitly so that we will do EEH check
    429	 * while accessing I/O stuff
    430	 */
    431	if (!eeh_has_flag(EEH_ENABLED)) {
    432		enable_irq(eeh_event_irq);
    433		pnv_eeh_enable_phbs();
    434		eeh_add_flag(EEH_ENABLED);
    435	}
    436
    437	/* Save memory bars */
    438	eeh_save_bars(edev);
    439
    440	eeh_edev_dbg(edev, "EEH enabled on device\n");
    441
    442	return edev;
    443}
    444
    445/**
    446 * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
    447 * @pe: EEH PE
    448 * @option: operation to be issued
    449 *
    450 * The function is used to control the EEH functionality globally.
    451 * Currently, following options are support according to PAPR:
    452 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
    453 */
    454static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
    455{
    456	struct pci_controller *hose = pe->phb;
    457	struct pnv_phb *phb = hose->private_data;
    458	bool freeze_pe = false;
    459	int opt;
    460	s64 rc;
    461
    462	switch (option) {
    463	case EEH_OPT_DISABLE:
    464		return -EPERM;
    465	case EEH_OPT_ENABLE:
    466		return 0;
    467	case EEH_OPT_THAW_MMIO:
    468		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
    469		break;
    470	case EEH_OPT_THAW_DMA:
    471		opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
    472		break;
    473	case EEH_OPT_FREEZE_PE:
    474		freeze_pe = true;
    475		opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
    476		break;
    477	default:
    478		pr_warn("%s: Invalid option %d\n", __func__, option);
    479		return -EINVAL;
    480	}
    481
    482	/* Freeze master and slave PEs if PHB supports compound PEs */
    483	if (freeze_pe) {
    484		if (phb->freeze_pe) {
    485			phb->freeze_pe(phb, pe->addr);
    486			return 0;
    487		}
    488
    489		rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt);
    490		if (rc != OPAL_SUCCESS) {
    491			pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
    492				__func__, rc, phb->hose->global_number,
    493				pe->addr);
    494			return -EIO;
    495		}
    496
    497		return 0;
    498	}
    499
    500	/* Unfreeze master and slave PEs if PHB supports */
    501	if (phb->unfreeze_pe)
    502		return phb->unfreeze_pe(phb, pe->addr, opt);
    503
    504	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt);
    505	if (rc != OPAL_SUCCESS) {
    506		pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
    507			__func__, rc, option, phb->hose->global_number,
    508			pe->addr);
    509		return -EIO;
    510	}
    511
    512	return 0;
    513}
    514
    515static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
    516{
    517	struct pnv_phb *phb = pe->phb->private_data;
    518	s64 rc;
    519
    520	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
    521					 phb->diag_data_size);
    522	if (rc != OPAL_SUCCESS)
    523		pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
    524			__func__, rc, pe->phb->global_number);
    525}
    526
    527static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
    528{
    529	struct pnv_phb *phb = pe->phb->private_data;
    530	u8 fstate = 0;
    531	__be16 pcierr = 0;
    532	s64 rc;
    533	int result = 0;
    534
    535	rc = opal_pci_eeh_freeze_status(phb->opal_id,
    536					pe->addr,
    537					&fstate,
    538					&pcierr,
    539					NULL);
    540	if (rc != OPAL_SUCCESS) {
    541		pr_warn("%s: Failure %lld getting PHB#%x state\n",
    542			__func__, rc, phb->hose->global_number);
    543		return EEH_STATE_NOT_SUPPORT;
    544	}
    545
    546	/*
    547	 * Check PHB state. If the PHB is frozen for the
    548	 * first time, to dump the PHB diag-data.
    549	 */
    550	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
    551		result = (EEH_STATE_MMIO_ACTIVE  |
    552			  EEH_STATE_DMA_ACTIVE   |
    553			  EEH_STATE_MMIO_ENABLED |
    554			  EEH_STATE_DMA_ENABLED);
    555	} else if (!(pe->state & EEH_PE_ISOLATED)) {
    556		eeh_pe_mark_isolated(pe);
    557		pnv_eeh_get_phb_diag(pe);
    558
    559		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
    560			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
    561	}
    562
    563	return result;
    564}
    565
    566static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
    567{
    568	struct pnv_phb *phb = pe->phb->private_data;
    569	u8 fstate = 0;
    570	__be16 pcierr = 0;
    571	s64 rc;
    572	int result;
    573
    574	/*
    575	 * We don't clobber hardware frozen state until PE
    576	 * reset is completed. In order to keep EEH core
    577	 * moving forward, we have to return operational
    578	 * state during PE reset.
    579	 */
    580	if (pe->state & EEH_PE_RESET) {
    581		result = (EEH_STATE_MMIO_ACTIVE  |
    582			  EEH_STATE_DMA_ACTIVE   |
    583			  EEH_STATE_MMIO_ENABLED |
    584			  EEH_STATE_DMA_ENABLED);
    585		return result;
    586	}
    587
    588	/*
    589	 * Fetch PE state from hardware. If the PHB
    590	 * supports compound PE, let it handle that.
    591	 */
    592	if (phb->get_pe_state) {
    593		fstate = phb->get_pe_state(phb, pe->addr);
    594	} else {
    595		rc = opal_pci_eeh_freeze_status(phb->opal_id,
    596						pe->addr,
    597						&fstate,
    598						&pcierr,
    599						NULL);
    600		if (rc != OPAL_SUCCESS) {
    601			pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
    602				__func__, rc, phb->hose->global_number,
    603				pe->addr);
    604			return EEH_STATE_NOT_SUPPORT;
    605		}
    606	}
    607
    608	/* Figure out state */
    609	switch (fstate) {
    610	case OPAL_EEH_STOPPED_NOT_FROZEN:
    611		result = (EEH_STATE_MMIO_ACTIVE  |
    612			  EEH_STATE_DMA_ACTIVE   |
    613			  EEH_STATE_MMIO_ENABLED |
    614			  EEH_STATE_DMA_ENABLED);
    615		break;
    616	case OPAL_EEH_STOPPED_MMIO_FREEZE:
    617		result = (EEH_STATE_DMA_ACTIVE |
    618			  EEH_STATE_DMA_ENABLED);
    619		break;
    620	case OPAL_EEH_STOPPED_DMA_FREEZE:
    621		result = (EEH_STATE_MMIO_ACTIVE |
    622			  EEH_STATE_MMIO_ENABLED);
    623		break;
    624	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
    625		result = 0;
    626		break;
    627	case OPAL_EEH_STOPPED_RESET:
    628		result = EEH_STATE_RESET_ACTIVE;
    629		break;
    630	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
    631		result = EEH_STATE_UNAVAILABLE;
    632		break;
    633	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
    634		result = EEH_STATE_NOT_SUPPORT;
    635		break;
    636	default:
    637		result = EEH_STATE_NOT_SUPPORT;
    638		pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
    639			__func__, phb->hose->global_number,
    640			pe->addr, fstate);
    641	}
    642
    643	/*
    644	 * If PHB supports compound PE, to freeze all
    645	 * slave PEs for consistency.
    646	 *
    647	 * If the PE is switching to frozen state for the
    648	 * first time, to dump the PHB diag-data.
    649	 */
    650	if (!(result & EEH_STATE_NOT_SUPPORT) &&
    651	    !(result & EEH_STATE_UNAVAILABLE) &&
    652	    !(result & EEH_STATE_MMIO_ACTIVE) &&
    653	    !(result & EEH_STATE_DMA_ACTIVE)  &&
    654	    !(pe->state & EEH_PE_ISOLATED)) {
    655		if (phb->freeze_pe)
    656			phb->freeze_pe(phb, pe->addr);
    657
    658		eeh_pe_mark_isolated(pe);
    659		pnv_eeh_get_phb_diag(pe);
    660
    661		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
    662			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
    663	}
    664
    665	return result;
    666}
    667
    668/**
    669 * pnv_eeh_get_state - Retrieve PE state
    670 * @pe: EEH PE
    671 * @delay: delay while PE state is temporarily unavailable
    672 *
    673 * Retrieve the state of the specified PE. For IODA-compitable
    674 * platform, it should be retrieved from IODA table. Therefore,
    675 * we prefer passing down to hardware implementation to handle
    676 * it.
    677 */
    678static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
    679{
    680	int ret;
    681
    682	if (pe->type & EEH_PE_PHB)
    683		ret = pnv_eeh_get_phb_state(pe);
    684	else
    685		ret = pnv_eeh_get_pe_state(pe);
    686
    687	if (!delay)
    688		return ret;
    689
    690	/*
    691	 * If the PE state is temporarily unavailable,
    692	 * to inform the EEH core delay for default
    693	 * period (1 second)
    694	 */
    695	*delay = 0;
    696	if (ret & EEH_STATE_UNAVAILABLE)
    697		*delay = 1000;
    698
    699	return ret;
    700}
    701
    702static s64 pnv_eeh_poll(unsigned long id)
    703{
    704	s64 rc = OPAL_HARDWARE;
    705
    706	while (1) {
    707		rc = opal_pci_poll(id);
    708		if (rc <= 0)
    709			break;
    710
    711		if (system_state < SYSTEM_RUNNING)
    712			udelay(1000 * rc);
    713		else
    714			msleep(rc);
    715	}
    716
    717	return rc;
    718}
    719
    720int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
    721{
    722	struct pnv_phb *phb = hose->private_data;
    723	s64 rc = OPAL_HARDWARE;
    724
    725	pr_debug("%s: Reset PHB#%x, option=%d\n",
    726		 __func__, hose->global_number, option);
    727
    728	/* Issue PHB complete reset request */
    729	if (option == EEH_RESET_FUNDAMENTAL ||
    730	    option == EEH_RESET_HOT)
    731		rc = opal_pci_reset(phb->opal_id,
    732				    OPAL_RESET_PHB_COMPLETE,
    733				    OPAL_ASSERT_RESET);
    734	else if (option == EEH_RESET_DEACTIVATE)
    735		rc = opal_pci_reset(phb->opal_id,
    736				    OPAL_RESET_PHB_COMPLETE,
    737				    OPAL_DEASSERT_RESET);
    738	if (rc < 0)
    739		goto out;
    740
    741	/*
    742	 * Poll state of the PHB until the request is done
    743	 * successfully. The PHB reset is usually PHB complete
    744	 * reset followed by hot reset on root bus. So we also
    745	 * need the PCI bus settlement delay.
    746	 */
    747	if (rc > 0)
    748		rc = pnv_eeh_poll(phb->opal_id);
    749	if (option == EEH_RESET_DEACTIVATE) {
    750		if (system_state < SYSTEM_RUNNING)
    751			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
    752		else
    753			msleep(EEH_PE_RST_SETTLE_TIME);
    754	}
    755out:
    756	if (rc != OPAL_SUCCESS)
    757		return -EIO;
    758
    759	return 0;
    760}
    761
    762static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
    763{
    764	struct pnv_phb *phb = hose->private_data;
    765	s64 rc = OPAL_HARDWARE;
    766
    767	pr_debug("%s: Reset PHB#%x, option=%d\n",
    768		 __func__, hose->global_number, option);
    769
    770	/*
    771	 * During the reset deassert time, we needn't care
    772	 * the reset scope because the firmware does nothing
    773	 * for fundamental or hot reset during deassert phase.
    774	 */
    775	if (option == EEH_RESET_FUNDAMENTAL)
    776		rc = opal_pci_reset(phb->opal_id,
    777				    OPAL_RESET_PCI_FUNDAMENTAL,
    778				    OPAL_ASSERT_RESET);
    779	else if (option == EEH_RESET_HOT)
    780		rc = opal_pci_reset(phb->opal_id,
    781				    OPAL_RESET_PCI_HOT,
    782				    OPAL_ASSERT_RESET);
    783	else if (option == EEH_RESET_DEACTIVATE)
    784		rc = opal_pci_reset(phb->opal_id,
    785				    OPAL_RESET_PCI_HOT,
    786				    OPAL_DEASSERT_RESET);
    787	if (rc < 0)
    788		goto out;
    789
    790	/* Poll state of the PHB until the request is done */
    791	if (rc > 0)
    792		rc = pnv_eeh_poll(phb->opal_id);
    793	if (option == EEH_RESET_DEACTIVATE)
    794		msleep(EEH_PE_RST_SETTLE_TIME);
    795out:
    796	if (rc != OPAL_SUCCESS)
    797		return -EIO;
    798
    799	return 0;
    800}
    801
    802static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
    803{
    804	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
    805	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
    806	int aer = edev ? edev->aer_cap : 0;
    807	u32 ctrl;
    808
    809	pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n",
    810		 __func__, pci_domain_nr(dev->bus),
    811		 dev->bus->number, option);
    812
    813	switch (option) {
    814	case EEH_RESET_FUNDAMENTAL:
    815	case EEH_RESET_HOT:
    816		/* Don't report linkDown event */
    817		if (aer) {
    818			eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
    819					     4, &ctrl);
    820			ctrl |= PCI_ERR_UNC_SURPDN;
    821			eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
    822					      4, ctrl);
    823		}
    824
    825		eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
    826		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
    827		eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
    828
    829		msleep(EEH_PE_RST_HOLD_TIME);
    830		break;
    831	case EEH_RESET_DEACTIVATE:
    832		eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
    833		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
    834		eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
    835
    836		msleep(EEH_PE_RST_SETTLE_TIME);
    837
    838		/* Continue reporting linkDown event */
    839		if (aer) {
    840			eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
    841					     4, &ctrl);
    842			ctrl &= ~PCI_ERR_UNC_SURPDN;
    843			eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
    844					      4, ctrl);
    845		}
    846
    847		break;
    848	}
    849
    850	return 0;
    851}
    852
    853static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
    854{
    855	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
    856	struct pnv_phb *phb = hose->private_data;
    857	struct device_node *dn = pci_device_to_OF_node(pdev);
    858	uint64_t id = PCI_SLOT_ID(phb->opal_id,
    859				  (pdev->bus->number << 8) | pdev->devfn);
    860	uint8_t scope;
    861	int64_t rc;
    862
    863	/* Hot reset to the bus if firmware cannot handle */
    864	if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL))
    865		return __pnv_eeh_bridge_reset(pdev, option);
    866
    867	pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n",
    868		 __func__, pci_domain_nr(pdev->bus),
    869		 pdev->bus->number, option);
    870
    871	switch (option) {
    872	case EEH_RESET_FUNDAMENTAL:
    873		scope = OPAL_RESET_PCI_FUNDAMENTAL;
    874		break;
    875	case EEH_RESET_HOT:
    876		scope = OPAL_RESET_PCI_HOT;
    877		break;
    878	case EEH_RESET_DEACTIVATE:
    879		return 0;
    880	default:
    881		dev_dbg(&pdev->dev, "%s: Unsupported reset %d\n",
    882			__func__, option);
    883		return -EINVAL;
    884	}
    885
    886	rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);
    887	if (rc <= OPAL_SUCCESS)
    888		goto out;
    889
    890	rc = pnv_eeh_poll(id);
    891out:
    892	return (rc == OPAL_SUCCESS) ? 0 : -EIO;
    893}
    894
    895void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
    896{
    897	struct pci_controller *hose;
    898
    899	if (pci_is_root_bus(dev->bus)) {
    900		hose = pci_bus_to_host(dev->bus);
    901		pnv_eeh_root_reset(hose, EEH_RESET_HOT);
    902		pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
    903	} else {
    904		pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
    905		pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
    906	}
    907}
    908
    909static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
    910				     int pos, u16 mask)
    911{
    912	struct eeh_dev *edev = pdn->edev;
    913	int i, status = 0;
    914
    915	/* Wait for Transaction Pending bit to be cleared */
    916	for (i = 0; i < 4; i++) {
    917		eeh_ops->read_config(edev, pos, 2, &status);
    918		if (!(status & mask))
    919			return;
    920
    921		msleep((1 << i) * 100);
    922	}
    923
    924	pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
    925		__func__, type,
    926		pdn->phb->global_number, pdn->busno,
    927		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
    928}
    929
    930static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
    931{
    932	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
    933	u32 reg = 0;
    934
    935	if (WARN_ON(!edev->pcie_cap))
    936		return -ENOTTY;
    937
    938	eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
    939	if (!(reg & PCI_EXP_DEVCAP_FLR))
    940		return -ENOTTY;
    941
    942	switch (option) {
    943	case EEH_RESET_HOT:
    944	case EEH_RESET_FUNDAMENTAL:
    945		pnv_eeh_wait_for_pending(pdn, "",
    946					 edev->pcie_cap + PCI_EXP_DEVSTA,
    947					 PCI_EXP_DEVSTA_TRPND);
    948		eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
    949				     4, &reg);
    950		reg |= PCI_EXP_DEVCTL_BCR_FLR;
    951		eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
    952				      4, reg);
    953		msleep(EEH_PE_RST_HOLD_TIME);
    954		break;
    955	case EEH_RESET_DEACTIVATE:
    956		eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
    957				     4, &reg);
    958		reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
    959		eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
    960				      4, reg);
    961		msleep(EEH_PE_RST_SETTLE_TIME);
    962		break;
    963	}
    964
    965	return 0;
    966}
    967
    968static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
    969{
    970	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
    971	u32 cap = 0;
    972
    973	if (WARN_ON(!edev->af_cap))
    974		return -ENOTTY;
    975
    976	eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap);
    977	if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
    978		return -ENOTTY;
    979
    980	switch (option) {
    981	case EEH_RESET_HOT:
    982	case EEH_RESET_FUNDAMENTAL:
    983		/*
    984		 * Wait for Transaction Pending bit to clear. A word-aligned
    985		 * test is used, so we use the control offset rather than status
    986		 * and shift the test bit to match.
    987		 */
    988		pnv_eeh_wait_for_pending(pdn, "AF",
    989					 edev->af_cap + PCI_AF_CTRL,
    990					 PCI_AF_STATUS_TP << 8);
    991		eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL,
    992				      1, PCI_AF_CTRL_FLR);
    993		msleep(EEH_PE_RST_HOLD_TIME);
    994		break;
    995	case EEH_RESET_DEACTIVATE:
    996		eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0);
    997		msleep(EEH_PE_RST_SETTLE_TIME);
    998		break;
    999	}
   1000
   1001	return 0;
   1002}
   1003
   1004static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
   1005{
   1006	struct eeh_dev *edev;
   1007	struct pci_dn *pdn;
   1008	int ret;
   1009
   1010	/* The VF PE should have only one child device */
   1011	edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
   1012	pdn = eeh_dev_to_pdn(edev);
   1013	if (!pdn)
   1014		return -ENXIO;
   1015
   1016	ret = pnv_eeh_do_flr(pdn, option);
   1017	if (!ret)
   1018		return ret;
   1019
   1020	return pnv_eeh_do_af_flr(pdn, option);
   1021}
   1022
   1023/**
   1024 * pnv_eeh_reset - Reset the specified PE
   1025 * @pe: EEH PE
   1026 * @option: reset option
   1027 *
   1028 * Do reset on the indicated PE. For PCI bus sensitive PE,
   1029 * we need to reset the parent p2p bridge. The PHB has to
   1030 * be reinitialized if the p2p bridge is root bridge. For
   1031 * PCI device sensitive PE, we will try to reset the device
   1032 * through FLR. For now, we don't have OPAL APIs to do HARD
   1033 * reset yet, so all reset would be SOFT (HOT) reset.
   1034 */
   1035static int pnv_eeh_reset(struct eeh_pe *pe, int option)
   1036{
   1037	struct pci_controller *hose = pe->phb;
   1038	struct pnv_phb *phb;
   1039	struct pci_bus *bus;
   1040	int64_t rc;
   1041
   1042	/*
   1043	 * For PHB reset, we always have complete reset. For those PEs whose
   1044	 * primary bus derived from root complex (root bus) or root port
   1045	 * (usually bus#1), we apply hot or fundamental reset on the root port.
   1046	 * For other PEs, we always have hot reset on the PE primary bus.
   1047	 *
   1048	 * Here, we have different design to pHyp, which always clear the
   1049	 * frozen state during PE reset. However, the good idea here from
   1050	 * benh is to keep frozen state before we get PE reset done completely
   1051	 * (until BAR restore). With the frozen state, HW drops illegal IO
   1052	 * or MMIO access, which can incur recursive frozen PE during PE
   1053	 * reset. The side effect is that EEH core has to clear the frozen
   1054	 * state explicitly after BAR restore.
   1055	 */
   1056	if (pe->type & EEH_PE_PHB)
   1057		return pnv_eeh_phb_reset(hose, option);
   1058
   1059	/*
   1060	 * The frozen PE might be caused by PAPR error injection
   1061	 * registers, which are expected to be cleared after hitting
   1062	 * frozen PE as stated in the hardware spec. Unfortunately,
   1063	 * that's not true on P7IOC. So we have to clear it manually
   1064	 * to avoid recursive EEH errors during recovery.
   1065	 */
   1066	phb = hose->private_data;
   1067	if (phb->model == PNV_PHB_MODEL_P7IOC &&
   1068	    (option == EEH_RESET_HOT ||
   1069	     option == EEH_RESET_FUNDAMENTAL)) {
   1070		rc = opal_pci_reset(phb->opal_id,
   1071				    OPAL_RESET_PHB_ERROR,
   1072				    OPAL_ASSERT_RESET);
   1073		if (rc != OPAL_SUCCESS) {
   1074			pr_warn("%s: Failure %lld clearing error injection registers\n",
   1075				__func__, rc);
   1076			return -EIO;
   1077		}
   1078	}
   1079
   1080	if (pe->type & EEH_PE_VF)
   1081		return pnv_eeh_reset_vf_pe(pe, option);
   1082
   1083	bus = eeh_pe_bus_get(pe);
   1084	if (!bus) {
   1085		pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
   1086			__func__, pe->phb->global_number, pe->addr);
   1087		return -EIO;
   1088	}
   1089
   1090	if (pci_is_root_bus(bus))
   1091		return pnv_eeh_root_reset(hose, option);
   1092
   1093	/*
   1094	 * For hot resets try use the generic PCI error recovery reset
   1095	 * functions. These correctly handles the case where the secondary
   1096	 * bus is behind a hotplug slot and it will use the slot provided
   1097	 * reset methods to prevent spurious hotplug events during the reset.
   1098	 *
   1099	 * Fundamental resets need to be handled internally to EEH since the
   1100	 * PCI core doesn't really have a concept of a fundamental reset,
   1101	 * mainly because there's no standard way to generate one. Only a
   1102	 * few devices require an FRESET so it should be fine.
   1103	 */
   1104	if (option != EEH_RESET_FUNDAMENTAL) {
   1105		/*
   1106		 * NB: Skiboot and pnv_eeh_bridge_reset() also no-op the
   1107		 *     de-assert step. It's like the OPAL reset API was
   1108		 *     poorly designed or something...
   1109		 */
   1110		if (option == EEH_RESET_DEACTIVATE)
   1111			return 0;
   1112
   1113		rc = pci_bus_error_reset(bus->self);
   1114		if (!rc)
   1115			return 0;
   1116	}
   1117
   1118	/* otherwise, use the generic bridge reset. this might call into FW */
   1119	if (pci_is_root_bus(bus->parent))
   1120		return pnv_eeh_root_reset(hose, option);
   1121	return pnv_eeh_bridge_reset(bus->self, option);
   1122}
   1123
   1124/**
   1125 * pnv_eeh_get_log - Retrieve error log
   1126 * @pe: EEH PE
   1127 * @severity: temporary or permanent error log
   1128 * @drv_log: driver log to be combined with retrieved error log
   1129 * @len: length of driver log
   1130 *
   1131 * Retrieve the temporary or permanent error from the PE.
   1132 */
   1133static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
   1134			   char *drv_log, unsigned long len)
   1135{
   1136	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
   1137		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
   1138
   1139	return 0;
   1140}
   1141
   1142/**
   1143 * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
   1144 * @pe: EEH PE
   1145 *
   1146 * The function will be called to reconfigure the bridges included
   1147 * in the specified PE so that the mulfunctional PE would be recovered
   1148 * again.
   1149 */
   1150static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
   1151{
   1152	return 0;
   1153}
   1154
   1155/**
   1156 * pnv_pe_err_inject - Inject specified error to the indicated PE
   1157 * @pe: the indicated PE
   1158 * @type: error type
   1159 * @func: specific error type
   1160 * @addr: address
   1161 * @mask: address mask
   1162 *
   1163 * The routine is called to inject specified error, which is
   1164 * determined by @type and @func, to the indicated PE for
   1165 * testing purpose.
   1166 */
   1167static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
   1168			      unsigned long addr, unsigned long mask)
   1169{
   1170	struct pci_controller *hose = pe->phb;
   1171	struct pnv_phb *phb = hose->private_data;
   1172	s64 rc;
   1173
   1174	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
   1175	    type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
   1176		pr_warn("%s: Invalid error type %d\n",
   1177			__func__, type);
   1178		return -ERANGE;
   1179	}
   1180
   1181	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
   1182	    func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
   1183		pr_warn("%s: Invalid error function %d\n",
   1184			__func__, func);
   1185		return -ERANGE;
   1186	}
   1187
   1188	/* Firmware supports error injection ? */
   1189	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
   1190		pr_warn("%s: Firmware doesn't support error injection\n",
   1191			__func__);
   1192		return -ENXIO;
   1193	}
   1194
   1195	/* Do error injection */
   1196	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
   1197				 type, func, addr, mask);
   1198	if (rc != OPAL_SUCCESS) {
   1199		pr_warn("%s: Failure %lld injecting error "
   1200			"%d-%d to PHB#%x-PE#%x\n",
   1201			__func__, rc, type, func,
   1202			hose->global_number, pe->addr);
   1203		return -EIO;
   1204	}
   1205
   1206	return 0;
   1207}
   1208
   1209static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
   1210{
   1211	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
   1212
   1213	if (!edev || !edev->pe)
   1214		return false;
   1215
   1216	/*
   1217	 * We will issue FLR or AF FLR to all VFs, which are contained
   1218	 * in VF PE. It relies on the EEH PCI config accessors. So we
   1219	 * can't block them during the window.
   1220	 */
   1221	if (edev->physfn && (edev->pe->state & EEH_PE_RESET))
   1222		return false;
   1223
   1224	if (edev->pe->state & EEH_PE_CFG_BLOCKED)
   1225		return true;
   1226
   1227	return false;
   1228}
   1229
   1230static int pnv_eeh_read_config(struct eeh_dev *edev,
   1231			       int where, int size, u32 *val)
   1232{
   1233	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
   1234
   1235	if (!pdn)
   1236		return PCIBIOS_DEVICE_NOT_FOUND;
   1237
   1238	if (pnv_eeh_cfg_blocked(pdn)) {
   1239		*val = 0xFFFFFFFF;
   1240		return PCIBIOS_SET_FAILED;
   1241	}
   1242
   1243	return pnv_pci_cfg_read(pdn, where, size, val);
   1244}
   1245
   1246static int pnv_eeh_write_config(struct eeh_dev *edev,
   1247				int where, int size, u32 val)
   1248{
   1249	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
   1250
   1251	if (!pdn)
   1252		return PCIBIOS_DEVICE_NOT_FOUND;
   1253
   1254	if (pnv_eeh_cfg_blocked(pdn))
   1255		return PCIBIOS_SET_FAILED;
   1256
   1257	return pnv_pci_cfg_write(pdn, where, size, val);
   1258}
   1259
   1260static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
   1261{
   1262	/* GEM */
   1263	if (data->gemXfir || data->gemRfir ||
   1264	    data->gemRirqfir || data->gemMask || data->gemRwof)
   1265		pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
   1266			be64_to_cpu(data->gemXfir),
   1267			be64_to_cpu(data->gemRfir),
   1268			be64_to_cpu(data->gemRirqfir),
   1269			be64_to_cpu(data->gemMask),
   1270			be64_to_cpu(data->gemRwof));
   1271
   1272	/* LEM */
   1273	if (data->lemFir || data->lemErrMask ||
   1274	    data->lemAction0 || data->lemAction1 || data->lemWof)
   1275		pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
   1276			be64_to_cpu(data->lemFir),
   1277			be64_to_cpu(data->lemErrMask),
   1278			be64_to_cpu(data->lemAction0),
   1279			be64_to_cpu(data->lemAction1),
   1280			be64_to_cpu(data->lemWof));
   1281}
   1282
   1283static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
   1284{
   1285	struct pnv_phb *phb = hose->private_data;
   1286	struct OpalIoP7IOCErrorData *data =
   1287		(struct OpalIoP7IOCErrorData*)phb->diag_data;
   1288	long rc;
   1289
   1290	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
   1291	if (rc != OPAL_SUCCESS) {
   1292		pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
   1293			__func__, phb->hub_id, rc);
   1294		return;
   1295	}
   1296
   1297	switch (be16_to_cpu(data->type)) {
   1298	case OPAL_P7IOC_DIAG_TYPE_RGC:
   1299		pr_info("P7IOC diag-data for RGC\n\n");
   1300		pnv_eeh_dump_hub_diag_common(data);
   1301		if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
   1302			pr_info("  RGC: %016llx %016llx\n",
   1303				be64_to_cpu(data->rgc.rgcStatus),
   1304				be64_to_cpu(data->rgc.rgcLdcp));
   1305		break;
   1306	case OPAL_P7IOC_DIAG_TYPE_BI:
   1307		pr_info("P7IOC diag-data for BI %s\n\n",
   1308			data->bi.biDownbound ? "Downbound" : "Upbound");
   1309		pnv_eeh_dump_hub_diag_common(data);
   1310		if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
   1311		    data->bi.biLdcp2 || data->bi.biFenceStatus)
   1312			pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
   1313				be64_to_cpu(data->bi.biLdcp0),
   1314				be64_to_cpu(data->bi.biLdcp1),
   1315				be64_to_cpu(data->bi.biLdcp2),
   1316				be64_to_cpu(data->bi.biFenceStatus));
   1317		break;
   1318	case OPAL_P7IOC_DIAG_TYPE_CI:
   1319		pr_info("P7IOC diag-data for CI Port %d\n\n",
   1320			data->ci.ciPort);
   1321		pnv_eeh_dump_hub_diag_common(data);
   1322		if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
   1323			pr_info("  CI:  %016llx %016llx\n",
   1324				be64_to_cpu(data->ci.ciPortStatus),
   1325				be64_to_cpu(data->ci.ciPortLdcp));
   1326		break;
   1327	case OPAL_P7IOC_DIAG_TYPE_MISC:
   1328		pr_info("P7IOC diag-data for MISC\n\n");
   1329		pnv_eeh_dump_hub_diag_common(data);
   1330		break;
   1331	case OPAL_P7IOC_DIAG_TYPE_I2C:
   1332		pr_info("P7IOC diag-data for I2C\n\n");
   1333		pnv_eeh_dump_hub_diag_common(data);
   1334		break;
   1335	default:
   1336		pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
   1337			__func__, phb->hub_id, data->type);
   1338	}
   1339}
   1340
   1341static int pnv_eeh_get_pe(struct pci_controller *hose,
   1342			  u16 pe_no, struct eeh_pe **pe)
   1343{
   1344	struct pnv_phb *phb = hose->private_data;
   1345	struct pnv_ioda_pe *pnv_pe;
   1346	struct eeh_pe *dev_pe;
   1347
   1348	/*
   1349	 * If PHB supports compound PE, to fetch
   1350	 * the master PE because slave PE is invisible
   1351	 * to EEH core.
   1352	 */
   1353	pnv_pe = &phb->ioda.pe_array[pe_no];
   1354	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
   1355		pnv_pe = pnv_pe->master;
   1356		WARN_ON(!pnv_pe ||
   1357			!(pnv_pe->flags & PNV_IODA_PE_MASTER));
   1358		pe_no = pnv_pe->pe_number;
   1359	}
   1360
   1361	/* Find the PE according to PE# */
   1362	dev_pe = eeh_pe_get(hose, pe_no);
   1363	if (!dev_pe)
   1364		return -EEXIST;
   1365
   1366	/* Freeze the (compound) PE */
   1367	*pe = dev_pe;
   1368	if (!(dev_pe->state & EEH_PE_ISOLATED))
   1369		phb->freeze_pe(phb, pe_no);
   1370
   1371	/*
   1372	 * At this point, we're sure the (compound) PE should
   1373	 * have been frozen. However, we still need poke until
   1374	 * hitting the frozen PE on top level.
   1375	 */
   1376	dev_pe = dev_pe->parent;
   1377	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
   1378		int ret;
   1379		ret = eeh_ops->get_state(dev_pe, NULL);
   1380		if (ret <= 0 || eeh_state_active(ret)) {
   1381			dev_pe = dev_pe->parent;
   1382			continue;
   1383		}
   1384
   1385		/* Frozen parent PE */
   1386		*pe = dev_pe;
   1387		if (!(dev_pe->state & EEH_PE_ISOLATED))
   1388			phb->freeze_pe(phb, dev_pe->addr);
   1389
   1390		/* Next one */
   1391		dev_pe = dev_pe->parent;
   1392	}
   1393
   1394	return 0;
   1395}
   1396
   1397/**
   1398 * pnv_eeh_next_error - Retrieve next EEH error to handle
   1399 * @pe: Affected PE
   1400 *
   1401 * The function is expected to be called by EEH core while it gets
   1402 * special EEH event (without binding PE). The function calls to
   1403 * OPAL APIs for next error to handle. The informational error is
   1404 * handled internally by platform. However, the dead IOC, dead PHB,
   1405 * fenced PHB and frozen PE should be handled by EEH core eventually.
   1406 */
   1407static int pnv_eeh_next_error(struct eeh_pe **pe)
   1408{
   1409	struct pci_controller *hose;
   1410	struct pnv_phb *phb;
   1411	struct eeh_pe *phb_pe, *parent_pe;
   1412	__be64 frozen_pe_no;
   1413	__be16 err_type, severity;
   1414	long rc;
   1415	int state, ret = EEH_NEXT_ERR_NONE;
   1416
   1417	/*
   1418	 * While running here, it's safe to purge the event queue. The
   1419	 * event should still be masked.
   1420	 */
   1421	eeh_remove_event(NULL, false);
   1422
   1423	list_for_each_entry(hose, &hose_list, list_node) {
   1424		/*
   1425		 * If the subordinate PCI buses of the PHB has been
   1426		 * removed or is exactly under error recovery, we
   1427		 * needn't take care of it any more.
   1428		 */
   1429		phb = hose->private_data;
   1430		phb_pe = eeh_phb_pe_get(hose);
   1431		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
   1432			continue;
   1433
   1434		rc = opal_pci_next_error(phb->opal_id,
   1435					 &frozen_pe_no, &err_type, &severity);
   1436		if (rc != OPAL_SUCCESS) {
   1437			pr_devel("%s: Invalid return value on "
   1438				 "PHB#%x (0x%lx) from opal_pci_next_error",
   1439				 __func__, hose->global_number, rc);
   1440			continue;
   1441		}
   1442
   1443		/* If the PHB doesn't have error, stop processing */
   1444		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
   1445		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
   1446			pr_devel("%s: No error found on PHB#%x\n",
   1447				 __func__, hose->global_number);
   1448			continue;
   1449		}
   1450
   1451		/*
   1452		 * Processing the error. We're expecting the error with
   1453		 * highest priority reported upon multiple errors on the
   1454		 * specific PHB.
   1455		 */
   1456		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
   1457			__func__, be16_to_cpu(err_type),
   1458			be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
   1459			hose->global_number);
   1460		switch (be16_to_cpu(err_type)) {
   1461		case OPAL_EEH_IOC_ERROR:
   1462			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
   1463				pr_err("EEH: dead IOC detected\n");
   1464				ret = EEH_NEXT_ERR_DEAD_IOC;
   1465			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
   1466				pr_info("EEH: IOC informative error "
   1467					"detected\n");
   1468				pnv_eeh_get_and_dump_hub_diag(hose);
   1469				ret = EEH_NEXT_ERR_NONE;
   1470			}
   1471
   1472			break;
   1473		case OPAL_EEH_PHB_ERROR:
   1474			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
   1475				*pe = phb_pe;
   1476				pr_err("EEH: dead PHB#%x detected, "
   1477				       "location: %s\n",
   1478					hose->global_number,
   1479					eeh_pe_loc_get(phb_pe));
   1480				ret = EEH_NEXT_ERR_DEAD_PHB;
   1481			} else if (be16_to_cpu(severity) ==
   1482				   OPAL_EEH_SEV_PHB_FENCED) {
   1483				*pe = phb_pe;
   1484				pr_err("EEH: Fenced PHB#%x detected, "
   1485				       "location: %s\n",
   1486					hose->global_number,
   1487					eeh_pe_loc_get(phb_pe));
   1488				ret = EEH_NEXT_ERR_FENCED_PHB;
   1489			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
   1490				pr_info("EEH: PHB#%x informative error "
   1491					"detected, location: %s\n",
   1492					hose->global_number,
   1493					eeh_pe_loc_get(phb_pe));
   1494				pnv_eeh_get_phb_diag(phb_pe);
   1495				pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
   1496				ret = EEH_NEXT_ERR_NONE;
   1497			}
   1498
   1499			break;
   1500		case OPAL_EEH_PE_ERROR:
   1501			/*
   1502			 * If we can't find the corresponding PE, we
   1503			 * just try to unfreeze.
   1504			 */
   1505			if (pnv_eeh_get_pe(hose,
   1506				be64_to_cpu(frozen_pe_no), pe)) {
   1507				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
   1508					hose->global_number, be64_to_cpu(frozen_pe_no));
   1509				pr_info("EEH: PHB location: %s\n",
   1510					eeh_pe_loc_get(phb_pe));
   1511
   1512				/* Dump PHB diag-data */
   1513				rc = opal_pci_get_phb_diag_data2(phb->opal_id,
   1514					phb->diag_data, phb->diag_data_size);
   1515				if (rc == OPAL_SUCCESS)
   1516					pnv_pci_dump_phb_diag_data(hose,
   1517							phb->diag_data);
   1518
   1519				/* Try best to clear it */
   1520				opal_pci_eeh_freeze_clear(phb->opal_id,
   1521					be64_to_cpu(frozen_pe_no),
   1522					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
   1523				ret = EEH_NEXT_ERR_NONE;
   1524			} else if ((*pe)->state & EEH_PE_ISOLATED ||
   1525				   eeh_pe_passed(*pe)) {
   1526				ret = EEH_NEXT_ERR_NONE;
   1527			} else {
   1528				pr_err("EEH: Frozen PE#%x "
   1529				       "on PHB#%x detected\n",
   1530				       (*pe)->addr,
   1531					(*pe)->phb->global_number);
   1532				pr_err("EEH: PE location: %s, "
   1533				       "PHB location: %s\n",
   1534				       eeh_pe_loc_get(*pe),
   1535				       eeh_pe_loc_get(phb_pe));
   1536				ret = EEH_NEXT_ERR_FROZEN_PE;
   1537			}
   1538
   1539			break;
   1540		default:
   1541			pr_warn("%s: Unexpected error type %d\n",
   1542				__func__, be16_to_cpu(err_type));
   1543		}
   1544
   1545		/*
   1546		 * EEH core will try recover from fenced PHB or
   1547		 * frozen PE. In the time for frozen PE, EEH core
   1548		 * enable IO path for that before collecting logs,
   1549		 * but it ruins the site. So we have to dump the
   1550		 * log in advance here.
   1551		 */
   1552		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
   1553		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
   1554		    !((*pe)->state & EEH_PE_ISOLATED)) {
   1555			eeh_pe_mark_isolated(*pe);
   1556			pnv_eeh_get_phb_diag(*pe);
   1557
   1558			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
   1559				pnv_pci_dump_phb_diag_data((*pe)->phb,
   1560							   (*pe)->data);
   1561		}
   1562
   1563		/*
   1564		 * We probably have the frozen parent PE out there and
   1565		 * we need have to handle frozen parent PE firstly.
   1566		 */
   1567		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
   1568			parent_pe = (*pe)->parent;
   1569			while (parent_pe) {
   1570				/* Hit the ceiling ? */
   1571				if (parent_pe->type & EEH_PE_PHB)
   1572					break;
   1573
   1574				/* Frozen parent PE ? */
   1575				state = eeh_ops->get_state(parent_pe, NULL);
   1576				if (state > 0 && !eeh_state_active(state))
   1577					*pe = parent_pe;
   1578
   1579				/* Next parent level */
   1580				parent_pe = parent_pe->parent;
   1581			}
   1582
   1583			/* We possibly migrate to another PE */
   1584			eeh_pe_mark_isolated(*pe);
   1585		}
   1586
   1587		/*
   1588		 * If we have no errors on the specific PHB or only
   1589		 * informative error there, we continue poking it.
   1590		 * Otherwise, we need actions to be taken by upper
   1591		 * layer.
   1592		 */
   1593		if (ret > EEH_NEXT_ERR_INF)
   1594			break;
   1595	}
   1596
   1597	/* Unmask the event */
   1598	if (ret == EEH_NEXT_ERR_NONE && eeh_enabled())
   1599		enable_irq(eeh_event_irq);
   1600
   1601	return ret;
   1602}
   1603
   1604static int pnv_eeh_restore_config(struct eeh_dev *edev)
   1605{
   1606	struct pnv_phb *phb;
   1607	s64 ret = 0;
   1608
   1609	if (!edev)
   1610		return -EEXIST;
   1611
   1612	if (edev->physfn)
   1613		return 0;
   1614
   1615	phb = edev->controller->private_data;
   1616	ret = opal_pci_reinit(phb->opal_id,
   1617			      OPAL_REINIT_PCI_DEV, edev->bdfn);
   1618
   1619	if (ret) {
   1620		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
   1621			__func__, edev->bdfn, ret);
   1622		return -EIO;
   1623	}
   1624
   1625	return ret;
   1626}
   1627
   1628static struct eeh_ops pnv_eeh_ops = {
   1629	.name                   = "powernv",
   1630	.probe			= pnv_eeh_probe,
   1631	.set_option             = pnv_eeh_set_option,
   1632	.get_state              = pnv_eeh_get_state,
   1633	.reset                  = pnv_eeh_reset,
   1634	.get_log                = pnv_eeh_get_log,
   1635	.configure_bridge       = pnv_eeh_configure_bridge,
   1636	.err_inject		= pnv_eeh_err_inject,
   1637	.read_config            = pnv_eeh_read_config,
   1638	.write_config           = pnv_eeh_write_config,
   1639	.next_error		= pnv_eeh_next_error,
   1640	.restore_config		= pnv_eeh_restore_config,
   1641	.notify_resume		= NULL
   1642};
   1643
   1644/**
   1645 * eeh_powernv_init - Register platform dependent EEH operations
   1646 *
   1647 * EEH initialization on powernv platform. This function should be
   1648 * called before any EEH related functions.
   1649 */
   1650static int __init eeh_powernv_init(void)
   1651{
   1652	int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
   1653	struct pci_controller *hose;
   1654	struct pnv_phb *phb;
   1655	int ret = -EINVAL;
   1656
   1657	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
   1658		pr_warn("%s: OPAL is required !\n", __func__);
   1659		return -EINVAL;
   1660	}
   1661
   1662	/* Set probe mode */
   1663	eeh_add_flag(EEH_PROBE_MODE_DEV);
   1664
   1665	/*
   1666	 * P7IOC blocks PCI config access to frozen PE, but PHB3
   1667	 * doesn't do that. So we have to selectively enable I/O
   1668	 * prior to collecting error log.
   1669	 */
   1670	list_for_each_entry(hose, &hose_list, list_node) {
   1671		phb = hose->private_data;
   1672
   1673		if (phb->model == PNV_PHB_MODEL_P7IOC)
   1674			eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
   1675
   1676		if (phb->diag_data_size > max_diag_size)
   1677			max_diag_size = phb->diag_data_size;
   1678
   1679		break;
   1680	}
   1681
   1682	/*
   1683	 * eeh_init() allocates the eeh_pe and its aux data buf so the
   1684	 * size needs to be set before calling eeh_init().
   1685	 */
   1686	eeh_set_pe_aux_size(max_diag_size);
   1687	ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
   1688
   1689	ret = eeh_init(&pnv_eeh_ops);
   1690	if (!ret)
   1691		pr_info("EEH: PowerNV platform initialized\n");
   1692	else
   1693		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
   1694
   1695	return ret;
   1696}
   1697machine_arch_initcall(powernv, eeh_powernv_init);