cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pcie.c (39587B)


      1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
      2/*
      3 * Copyright(c) 2015 - 2019 Intel Corporation.
      4 */
      5
      6#include <linux/pci.h>
      7#include <linux/io.h>
      8#include <linux/delay.h>
      9#include <linux/vmalloc.h>
     10#include <linux/aer.h>
     11#include <linux/module.h>
     12
     13#include "hfi.h"
     14#include "chip_registers.h"
     15#include "aspm.h"
     16
     17/*
     18 * This file contains PCIe utility routines.
     19 */
     20
     21/*
     22 * Do all the common PCIe setup and initialization.
     23 */
     24int hfi1_pcie_init(struct hfi1_devdata *dd)
     25{
     26	int ret;
     27	struct pci_dev *pdev = dd->pcidev;
     28
     29	ret = pci_enable_device(pdev);
     30	if (ret) {
     31		/*
     32		 * This can happen (in theory) iff:
     33		 * We did a chip reset, and then failed to reprogram the
     34		 * BAR, or the chip reset due to an internal error.  We then
     35		 * unloaded the driver and reloaded it.
     36		 *
     37		 * Both reset cases set the BAR back to initial state.  For
     38		 * the latter case, the AER sticky error bit at offset 0x718
     39		 * should be set, but the Linux kernel doesn't yet know
     40		 * about that, it appears.  If the original BAR was retained
     41		 * in the kernel data structures, this may be OK.
     42		 */
     43		dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
     44		return ret;
     45	}
     46
     47	ret = pci_request_regions(pdev, DRIVER_NAME);
     48	if (ret) {
     49		dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
     50		goto bail;
     51	}
     52
     53	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
     54	if (ret) {
     55		/*
     56		 * If the 64 bit setup fails, try 32 bit.  Some systems
     57		 * do not setup 64 bit maps on systems with 2GB or less
     58		 * memory installed.
     59		 */
     60		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
     61		if (ret) {
     62			dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
     63			goto bail;
     64		}
     65	}
     66
     67	pci_set_master(pdev);
     68	(void)pci_enable_pcie_error_reporting(pdev);
     69	return 0;
     70
     71bail:
     72	hfi1_pcie_cleanup(pdev);
     73	return ret;
     74}
     75
     76/*
     77 * Clean what was done in hfi1_pcie_init()
     78 */
     79void hfi1_pcie_cleanup(struct pci_dev *pdev)
     80{
     81	pci_disable_device(pdev);
     82	/*
     83	 * Release regions should be called after the disable. OK to
     84	 * call if request regions has not been called or failed.
     85	 */
     86	pci_release_regions(pdev);
     87}
     88
     89/*
     90 * Do remaining PCIe setup, once dd is allocated, and save away
     91 * fields required to re-initialize after a chip reset, or for
     92 * various other purposes
     93 */
     94int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
     95{
     96	unsigned long len;
     97	resource_size_t addr;
     98	int ret = 0;
     99	u32 rcv_array_count;
    100
    101	addr = pci_resource_start(pdev, 0);
    102	len = pci_resource_len(pdev, 0);
    103
    104	/*
    105	 * The TXE PIO buffers are at the tail end of the chip space.
    106	 * Cut them off and map them separately.
    107	 */
    108
    109	/* sanity check vs expectations */
    110	if (len != TXE_PIO_SEND + TXE_PIO_SIZE) {
    111		dd_dev_err(dd, "chip PIO range does not match\n");
    112		return -EINVAL;
    113	}
    114
    115	dd->kregbase1 = ioremap(addr, RCV_ARRAY);
    116	if (!dd->kregbase1) {
    117		dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
    118		return -ENOMEM;
    119	}
    120	dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
    121
    122	/* verify that reads actually work, save revision for reset check */
    123	dd->revision = readq(dd->kregbase1 + CCE_REVISION);
    124	if (dd->revision == ~(u64)0) {
    125		dd_dev_err(dd, "Cannot read chip CSRs\n");
    126		goto nomem;
    127	}
    128
    129	rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
    130	dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
    131	dd->base2_start  = RCV_ARRAY + rcv_array_count * 8;
    132
    133	dd->kregbase2 = ioremap(
    134		addr + dd->base2_start,
    135		TXE_PIO_SEND - dd->base2_start);
    136	if (!dd->kregbase2) {
    137		dd_dev_err(dd, "UC mapping of kregbase2 failed\n");
    138		goto nomem;
    139	}
    140	dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2,
    141		    TXE_PIO_SEND - dd->base2_start);
    142
    143	dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE);
    144	if (!dd->piobase) {
    145		dd_dev_err(dd, "WC mapping of send buffers failed\n");
    146		goto nomem;
    147	}
    148	dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
    149
    150	dd->physaddr = addr;        /* used for io_remap, etc. */
    151
    152	/*
    153	 * Map the chip's RcvArray as write-combining to allow us
    154	 * to write an entire cacheline worth of entries in one shot.
    155	 */
    156	dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
    157				     rcv_array_count * 8);
    158	if (!dd->rcvarray_wc) {
    159		dd_dev_err(dd, "WC mapping of receive array failed\n");
    160		goto nomem;
    161	}
    162	dd_dev_info(dd, "WC RcvArray: %p for %x\n",
    163		    dd->rcvarray_wc, rcv_array_count * 8);
    164
    165	dd->flags |= HFI1_PRESENT;	/* chip.c CSR routines now work */
    166	return 0;
    167nomem:
    168	ret = -ENOMEM;
    169	hfi1_pcie_ddcleanup(dd);
    170	return ret;
    171}
    172
    173/*
    174 * Do PCIe cleanup related to dd, after chip-specific cleanup, etc.  Just prior
    175 * to releasing the dd memory.
    176 * Void because all of the core pcie cleanup functions are void.
    177 */
    178void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
    179{
    180	dd->flags &= ~HFI1_PRESENT;
    181	if (dd->kregbase1)
    182		iounmap(dd->kregbase1);
    183	dd->kregbase1 = NULL;
    184	if (dd->kregbase2)
    185		iounmap(dd->kregbase2);
    186	dd->kregbase2 = NULL;
    187	if (dd->rcvarray_wc)
    188		iounmap(dd->rcvarray_wc);
    189	dd->rcvarray_wc = NULL;
    190	if (dd->piobase)
    191		iounmap(dd->piobase);
    192	dd->piobase = NULL;
    193}
    194
    195/* return the PCIe link speed from the given link status */
    196static u32 extract_speed(u16 linkstat)
    197{
    198	u32 speed;
    199
    200	switch (linkstat & PCI_EXP_LNKSTA_CLS) {
    201	default: /* not defined, assume Gen1 */
    202	case PCI_EXP_LNKSTA_CLS_2_5GB:
    203		speed = 2500; /* Gen 1, 2.5GHz */
    204		break;
    205	case PCI_EXP_LNKSTA_CLS_5_0GB:
    206		speed = 5000; /* Gen 2, 5GHz */
    207		break;
    208	case PCI_EXP_LNKSTA_CLS_8_0GB:
    209		speed = 8000; /* Gen 3, 8GHz */
    210		break;
    211	}
    212	return speed;
    213}
    214
    215/* return the PCIe link speed from the given link status */
    216static u32 extract_width(u16 linkstat)
    217{
    218	return (linkstat & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT;
    219}
    220
    221/* read the link status and set dd->{lbus_width,lbus_speed,lbus_info} */
    222static void update_lbus_info(struct hfi1_devdata *dd)
    223{
    224	u16 linkstat;
    225	int ret;
    226
    227	ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
    228	if (ret) {
    229		dd_dev_err(dd, "Unable to read from PCI config\n");
    230		return;
    231	}
    232
    233	dd->lbus_width = extract_width(linkstat);
    234	dd->lbus_speed = extract_speed(linkstat);
    235	snprintf(dd->lbus_info, sizeof(dd->lbus_info),
    236		 "PCIe,%uMHz,x%u", dd->lbus_speed, dd->lbus_width);
    237}
    238
    239/*
    240 * Read in the current PCIe link width and speed.  Find if the link is
    241 * Gen3 capable.
    242 */
    243int pcie_speeds(struct hfi1_devdata *dd)
    244{
    245	u32 linkcap;
    246	struct pci_dev *parent = dd->pcidev->bus->self;
    247	int ret;
    248
    249	if (!pci_is_pcie(dd->pcidev)) {
    250		dd_dev_err(dd, "Can't find PCI Express capability!\n");
    251		return -EINVAL;
    252	}
    253
    254	/* find if our max speed is Gen3 and parent supports Gen3 speeds */
    255	dd->link_gen3_capable = 1;
    256
    257	ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
    258	if (ret) {
    259		dd_dev_err(dd, "Unable to read from PCI config\n");
    260		return pcibios_err_to_errno(ret);
    261	}
    262
    263	if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
    264		dd_dev_info(dd,
    265			    "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
    266			    linkcap & PCI_EXP_LNKCAP_SLS);
    267		dd->link_gen3_capable = 0;
    268	}
    269
    270	/*
    271	 * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
    272	 */
    273	if (parent &&
    274	    (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
    275	     dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
    276		dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
    277		dd->link_gen3_capable = 0;
    278	}
    279
    280	/* obtain the link width and current speed */
    281	update_lbus_info(dd);
    282
    283	dd_dev_info(dd, "%s\n", dd->lbus_info);
    284
    285	return 0;
    286}
    287
    288/*
    289 * Restore command and BARs after a reset has wiped them out
    290 *
    291 * Returns 0 on success, otherwise a negative error value
    292 */
    293int restore_pci_variables(struct hfi1_devdata *dd)
    294{
    295	int ret;
    296
    297	ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
    298	if (ret)
    299		goto error;
    300
    301	ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
    302				     dd->pcibar0);
    303	if (ret)
    304		goto error;
    305
    306	ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
    307				     dd->pcibar1);
    308	if (ret)
    309		goto error;
    310
    311	ret = pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
    312	if (ret)
    313		goto error;
    314
    315	ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL,
    316					 dd->pcie_devctl);
    317	if (ret)
    318		goto error;
    319
    320	ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL,
    321					 dd->pcie_lnkctl);
    322	if (ret)
    323		goto error;
    324
    325	ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
    326					 dd->pcie_devctl2);
    327	if (ret)
    328		goto error;
    329
    330	ret = pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
    331	if (ret)
    332		goto error;
    333
    334	if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
    335		ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
    336					     dd->pci_tph2);
    337		if (ret)
    338			goto error;
    339	}
    340	return 0;
    341
    342error:
    343	dd_dev_err(dd, "Unable to write to PCI config\n");
    344	return pcibios_err_to_errno(ret);
    345}
    346
    347/*
    348 * Save BARs and command to rewrite after device reset
    349 *
    350 * Returns 0 on success, otherwise a negative error value
    351 */
    352int save_pci_variables(struct hfi1_devdata *dd)
    353{
    354	int ret;
    355
    356	ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
    357				    &dd->pcibar0);
    358	if (ret)
    359		goto error;
    360
    361	ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
    362				    &dd->pcibar1);
    363	if (ret)
    364		goto error;
    365
    366	ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
    367	if (ret)
    368		goto error;
    369
    370	ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
    371	if (ret)
    372		goto error;
    373
    374	ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL,
    375					&dd->pcie_devctl);
    376	if (ret)
    377		goto error;
    378
    379	ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL,
    380					&dd->pcie_lnkctl);
    381	if (ret)
    382		goto error;
    383
    384	ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
    385					&dd->pcie_devctl2);
    386	if (ret)
    387		goto error;
    388
    389	ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
    390	if (ret)
    391		goto error;
    392
    393	if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
    394		ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
    395					    &dd->pci_tph2);
    396		if (ret)
    397			goto error;
    398	}
    399	return 0;
    400
    401error:
    402	dd_dev_err(dd, "Unable to read from PCI config\n");
    403	return pcibios_err_to_errno(ret);
    404}
    405
    406/*
    407 * BIOS may not set PCIe bus-utilization parameters for best performance.
    408 * Check and optionally adjust them to maximize our throughput.
    409 */
    410static int hfi1_pcie_caps;
    411module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
    412MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
    413
    414/**
    415 * tune_pcie_caps() - Code to adjust PCIe capabilities.
    416 * @dd: Valid device data structure
    417 *
    418 */
    419void tune_pcie_caps(struct hfi1_devdata *dd)
    420{
    421	struct pci_dev *parent;
    422	u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
    423	u16 rc_mrrs, ep_mrrs, max_mrrs, ectl;
    424	int ret;
    425
    426	/*
    427	 * Turn on extended tags in DevCtl in case the BIOS has turned it off
    428	 * to improve WFR SDMA bandwidth
    429	 */
    430	ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
    431	if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
    432		dd_dev_info(dd, "Enabling PCIe extended tags\n");
    433		ectl |= PCI_EXP_DEVCTL_EXT_TAG;
    434		ret = pcie_capability_write_word(dd->pcidev,
    435						 PCI_EXP_DEVCTL, ectl);
    436		if (ret)
    437			dd_dev_info(dd, "Unable to write to PCI config\n");
    438	}
    439	/* Find out supported and configured values for parent (root) */
    440	parent = dd->pcidev->bus->self;
    441	/*
    442	 * The driver cannot perform the tuning if it does not have
    443	 * access to the upstream component.
    444	 */
    445	if (!parent) {
    446		dd_dev_info(dd, "Parent not found\n");
    447		return;
    448	}
    449	if (!pci_is_root_bus(parent->bus)) {
    450		dd_dev_info(dd, "Parent not root\n");
    451		return;
    452	}
    453	if (!pci_is_pcie(parent)) {
    454		dd_dev_info(dd, "Parent is not PCI Express capable\n");
    455		return;
    456	}
    457	if (!pci_is_pcie(dd->pcidev)) {
    458		dd_dev_info(dd, "PCI device is not PCI Express capable\n");
    459		return;
    460	}
    461	rc_mpss = parent->pcie_mpss;
    462	rc_mps = ffs(pcie_get_mps(parent)) - 8;
    463	/* Find out supported and configured values for endpoint (us) */
    464	ep_mpss = dd->pcidev->pcie_mpss;
    465	ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8;
    466
    467	/* Find max payload supported by root, endpoint */
    468	if (rc_mpss > ep_mpss)
    469		rc_mpss = ep_mpss;
    470
    471	/* If Supported greater than limit in module param, limit it */
    472	if (rc_mpss > (hfi1_pcie_caps & 7))
    473		rc_mpss = hfi1_pcie_caps & 7;
    474	/* If less than (allowed, supported), bump root payload */
    475	if (rc_mpss > rc_mps) {
    476		rc_mps = rc_mpss;
    477		pcie_set_mps(parent, 128 << rc_mps);
    478	}
    479	/* If less than (allowed, supported), bump endpoint payload */
    480	if (rc_mpss > ep_mps) {
    481		ep_mps = rc_mpss;
    482		pcie_set_mps(dd->pcidev, 128 << ep_mps);
    483	}
    484
    485	/*
    486	 * Now the Read Request size.
    487	 * No field for max supported, but PCIe spec limits it to 4096,
    488	 * which is code '5' (log2(4096) - 7)
    489	 */
    490	max_mrrs = 5;
    491	if (max_mrrs > ((hfi1_pcie_caps >> 4) & 7))
    492		max_mrrs = (hfi1_pcie_caps >> 4) & 7;
    493
    494	max_mrrs = 128 << max_mrrs;
    495	rc_mrrs = pcie_get_readrq(parent);
    496	ep_mrrs = pcie_get_readrq(dd->pcidev);
    497
    498	if (max_mrrs > rc_mrrs) {
    499		rc_mrrs = max_mrrs;
    500		pcie_set_readrq(parent, rc_mrrs);
    501	}
    502	if (max_mrrs > ep_mrrs) {
    503		ep_mrrs = max_mrrs;
    504		pcie_set_readrq(dd->pcidev, ep_mrrs);
    505	}
    506}
    507
    508/* End of PCIe capability tuning */
    509
    510/*
    511 * From here through hfi1_pci_err_handler definition is invoked via
    512 * PCI error infrastructure, registered via pci
    513 */
    514static pci_ers_result_t
    515pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
    516{
    517	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
    518	pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED;
    519
    520	switch (state) {
    521	case pci_channel_io_normal:
    522		dd_dev_info(dd, "State Normal, ignoring\n");
    523		break;
    524
    525	case pci_channel_io_frozen:
    526		dd_dev_info(dd, "State Frozen, requesting reset\n");
    527		pci_disable_device(pdev);
    528		ret = PCI_ERS_RESULT_NEED_RESET;
    529		break;
    530
    531	case pci_channel_io_perm_failure:
    532		if (dd) {
    533			dd_dev_info(dd, "State Permanent Failure, disabling\n");
    534			/* no more register accesses! */
    535			dd->flags &= ~HFI1_PRESENT;
    536			hfi1_disable_after_error(dd);
    537		}
    538		 /* else early, or other problem */
    539		ret =  PCI_ERS_RESULT_DISCONNECT;
    540		break;
    541
    542	default: /* shouldn't happen */
    543		dd_dev_info(dd, "HFI1 PCI errors detected (state %d)\n",
    544			    state);
    545		break;
    546	}
    547	return ret;
    548}
    549
    550static pci_ers_result_t
    551pci_mmio_enabled(struct pci_dev *pdev)
    552{
    553	u64 words = 0U;
    554	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
    555	pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED;
    556
    557	if (dd && dd->pport) {
    558		words = read_port_cntr(dd->pport, C_RX_WORDS, CNTR_INVALID_VL);
    559		if (words == ~0ULL)
    560			ret = PCI_ERS_RESULT_NEED_RESET;
    561		dd_dev_info(dd,
    562			    "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n",
    563			    words, ret);
    564	}
    565	return  ret;
    566}
    567
    568static pci_ers_result_t
    569pci_slot_reset(struct pci_dev *pdev)
    570{
    571	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
    572
    573	dd_dev_info(dd, "HFI1 slot_reset function called, ignored\n");
    574	return PCI_ERS_RESULT_CAN_RECOVER;
    575}
    576
    577static void
    578pci_resume(struct pci_dev *pdev)
    579{
    580	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
    581
    582	dd_dev_info(dd, "HFI1 resume function called\n");
    583	/*
    584	 * Running jobs will fail, since it's asynchronous
    585	 * unlike sysfs-requested reset.   Better than
    586	 * doing nothing.
    587	 */
    588	hfi1_init(dd, 1); /* same as re-init after reset */
    589}
    590
    591const struct pci_error_handlers hfi1_pci_err_handler = {
    592	.error_detected = pci_error_detected,
    593	.mmio_enabled = pci_mmio_enabled,
    594	.slot_reset = pci_slot_reset,
    595	.resume = pci_resume,
    596};
    597
    598/*============================================================================*/
    599/* PCIe Gen3 support */
    600
    601/*
    602 * This code is separated out because it is expected to be removed in the
    603 * final shipping product.  If not, then it will be revisited and items
    604 * will be moved to more standard locations.
    605 */
    606
    607/* ASIC_PCI_SD_HOST_STATUS.FW_DNLD_STS field values */
    608#define DL_STATUS_HFI0 0x1	/* hfi0 firmware download complete */
    609#define DL_STATUS_HFI1 0x2	/* hfi1 firmware download complete */
    610#define DL_STATUS_BOTH 0x3	/* hfi0 and hfi1 firmware download complete */
    611
    612/* ASIC_PCI_SD_HOST_STATUS.FW_DNLD_ERR field values */
    613#define DL_ERR_NONE		0x0	/* no error */
    614#define DL_ERR_SWAP_PARITY	0x1	/* parity error in SerDes interrupt */
    615					/*   or response data */
    616#define DL_ERR_DISABLED	0x2	/* hfi disabled */
    617#define DL_ERR_SECURITY	0x3	/* security check failed */
    618#define DL_ERR_SBUS		0x4	/* SBus status error */
    619#define DL_ERR_XFR_PARITY	0x5	/* parity error during ROM transfer*/
    620
    621/* gasket block secondary bus reset delay */
    622#define SBR_DELAY_US 200000	/* 200ms */
    623
    624static uint pcie_target = 3;
    625module_param(pcie_target, uint, S_IRUGO);
    626MODULE_PARM_DESC(pcie_target, "PCIe target speed (0 skip, 1-3 Gen1-3)");
    627
    628static uint pcie_force;
    629module_param(pcie_force, uint, S_IRUGO);
    630MODULE_PARM_DESC(pcie_force, "Force driver to do a PCIe firmware download even if already at target speed");
    631
    632static uint pcie_retry = 5;
    633module_param(pcie_retry, uint, S_IRUGO);
    634MODULE_PARM_DESC(pcie_retry, "Driver will try this many times to reach requested speed");
    635
    636#define UNSET_PSET 255
    637#define DEFAULT_DISCRETE_PSET 2	/* discrete HFI */
    638#define DEFAULT_MCP_PSET 6	/* MCP HFI */
    639static uint pcie_pset = UNSET_PSET;
    640module_param(pcie_pset, uint, S_IRUGO);
    641MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10");
    642
    643static uint pcie_ctle = 3; /* discrete on, integrated on */
    644module_param(pcie_ctle, uint, S_IRUGO);
    645MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off");
    646
    647/* equalization columns */
    648#define PREC 0
    649#define ATTN 1
    650#define POST 2
    651
    652/* discrete silicon preliminary equalization values */
    653static const u8 discrete_preliminary_eq[11][3] = {
    654	/* prec   attn   post */
    655	{  0x00,  0x00,  0x12 },	/* p0 */
    656	{  0x00,  0x00,  0x0c },	/* p1 */
    657	{  0x00,  0x00,  0x0f },	/* p2 */
    658	{  0x00,  0x00,  0x09 },	/* p3 */
    659	{  0x00,  0x00,  0x00 },	/* p4 */
    660	{  0x06,  0x00,  0x00 },	/* p5 */
    661	{  0x09,  0x00,  0x00 },	/* p6 */
    662	{  0x06,  0x00,  0x0f },	/* p7 */
    663	{  0x09,  0x00,  0x09 },	/* p8 */
    664	{  0x0c,  0x00,  0x00 },	/* p9 */
    665	{  0x00,  0x00,  0x18 },	/* p10 */
    666};
    667
    668/* integrated silicon preliminary equalization values */
    669static const u8 integrated_preliminary_eq[11][3] = {
    670	/* prec   attn   post */
    671	{  0x00,  0x1e,  0x07 },	/* p0 */
    672	{  0x00,  0x1e,  0x05 },	/* p1 */
    673	{  0x00,  0x1e,  0x06 },	/* p2 */
    674	{  0x00,  0x1e,  0x04 },	/* p3 */
    675	{  0x00,  0x1e,  0x00 },	/* p4 */
    676	{  0x03,  0x1e,  0x00 },	/* p5 */
    677	{  0x04,  0x1e,  0x00 },	/* p6 */
    678	{  0x03,  0x1e,  0x06 },	/* p7 */
    679	{  0x03,  0x1e,  0x04 },	/* p8 */
    680	{  0x05,  0x1e,  0x00 },	/* p9 */
    681	{  0x00,  0x1e,  0x0a },	/* p10 */
    682};
    683
    684static const u8 discrete_ctle_tunings[11][4] = {
    685	/* DC     LF     HF     BW */
    686	{  0x48,  0x0b,  0x04,  0x04 },	/* p0 */
    687	{  0x60,  0x05,  0x0f,  0x0a },	/* p1 */
    688	{  0x50,  0x09,  0x06,  0x06 },	/* p2 */
    689	{  0x68,  0x05,  0x0f,  0x0a },	/* p3 */
    690	{  0x80,  0x05,  0x0f,  0x0a },	/* p4 */
    691	{  0x70,  0x05,  0x0f,  0x0a },	/* p5 */
    692	{  0x68,  0x05,  0x0f,  0x0a },	/* p6 */
    693	{  0x38,  0x0f,  0x00,  0x00 },	/* p7 */
    694	{  0x48,  0x09,  0x06,  0x06 },	/* p8 */
    695	{  0x60,  0x05,  0x0f,  0x0a },	/* p9 */
    696	{  0x38,  0x0f,  0x00,  0x00 },	/* p10 */
    697};
    698
    699static const u8 integrated_ctle_tunings[11][4] = {
    700	/* DC     LF     HF     BW */
    701	{  0x38,  0x0f,  0x00,  0x00 },	/* p0 */
    702	{  0x38,  0x0f,  0x00,  0x00 },	/* p1 */
    703	{  0x38,  0x0f,  0x00,  0x00 },	/* p2 */
    704	{  0x38,  0x0f,  0x00,  0x00 },	/* p3 */
    705	{  0x58,  0x0a,  0x05,  0x05 },	/* p4 */
    706	{  0x48,  0x0a,  0x05,  0x05 },	/* p5 */
    707	{  0x40,  0x0a,  0x05,  0x05 },	/* p6 */
    708	{  0x38,  0x0f,  0x00,  0x00 },	/* p7 */
    709	{  0x38,  0x0f,  0x00,  0x00 },	/* p8 */
    710	{  0x38,  0x09,  0x06,  0x06 },	/* p9 */
    711	{  0x38,  0x0e,  0x01,  0x01 },	/* p10 */
    712};
    713
    714/* helper to format the value to write to hardware */
    715#define eq_value(pre, curr, post) \
    716	((((u32)(pre)) << \
    717			PCIE_CFG_REG_PL102_GEN3_EQ_PRE_CURSOR_PSET_SHIFT) \
    718	| (((u32)(curr)) << PCIE_CFG_REG_PL102_GEN3_EQ_CURSOR_PSET_SHIFT) \
    719	| (((u32)(post)) << \
    720		PCIE_CFG_REG_PL102_GEN3_EQ_POST_CURSOR_PSET_SHIFT))
    721
    722/*
    723 * Load the given EQ preset table into the PCIe hardware.
    724 */
    725static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
    726			 u8 div)
    727{
    728	struct pci_dev *pdev = dd->pcidev;
    729	u32 hit_error = 0;
    730	u32 violation;
    731	u32 i;
    732	u8 c_minus1, c0, c_plus1;
    733	int ret;
    734
    735	for (i = 0; i < 11; i++) {
    736		/* set index */
    737		pci_write_config_dword(pdev, PCIE_CFG_REG_PL103, i);
    738		/* write the value */
    739		c_minus1 = eq[i][PREC] / div;
    740		c0 = fs - (eq[i][PREC] / div) - (eq[i][POST] / div);
    741		c_plus1 = eq[i][POST] / div;
    742		pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
    743				       eq_value(c_minus1, c0, c_plus1));
    744		/* check if these coefficients violate EQ rules */
    745		ret = pci_read_config_dword(dd->pcidev,
    746					    PCIE_CFG_REG_PL105, &violation);
    747		if (ret) {
    748			dd_dev_err(dd, "Unable to read from PCI config\n");
    749			hit_error = 1;
    750			break;
    751		}
    752
    753		if (violation
    754		    & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
    755			if (hit_error == 0) {
    756				dd_dev_err(dd,
    757					   "Gen3 EQ Table Coefficient rule violations\n");
    758				dd_dev_err(dd, "         prec   attn   post\n");
    759			}
    760			dd_dev_err(dd, "   p%02d:   %02x     %02x     %02x\n",
    761				   i, (u32)eq[i][0], (u32)eq[i][1],
    762				   (u32)eq[i][2]);
    763			dd_dev_err(dd, "            %02x     %02x     %02x\n",
    764				   (u32)c_minus1, (u32)c0, (u32)c_plus1);
    765			hit_error = 1;
    766		}
    767	}
    768	if (hit_error)
    769		return -EINVAL;
    770	return 0;
    771}
    772
    773/*
    774 * Steps to be done after the PCIe firmware is downloaded and
    775 * before the SBR for the Pcie Gen3.
    776 * The SBus resource is already being held.
    777 */
    778static void pcie_post_steps(struct hfi1_devdata *dd)
    779{
    780	int i;
    781
    782	set_sbus_fast_mode(dd);
    783	/*
    784	 * Write to the PCIe PCSes to set the G3_LOCKED_NEXT bits to 1.
    785	 * This avoids a spurious framing error that can otherwise be
    786	 * generated by the MAC layer.
    787	 *
    788	 * Use individual addresses since no broadcast is set up.
    789	 */
    790	for (i = 0; i < NUM_PCIE_SERDES; i++) {
    791		sbus_request(dd, pcie_pcs_addrs[dd->hfi1_id][i],
    792			     0x03, WRITE_SBUS_RECEIVER, 0x00022132);
    793	}
    794
    795	clear_sbus_fast_mode(dd);
    796}
    797
    798/*
    799 * Trigger a secondary bus reset (SBR) on ourselves using our parent.
    800 *
    801 * Based on pci_parent_bus_reset() which is not exported by the
    802 * kernel core.
    803 */
    804static int trigger_sbr(struct hfi1_devdata *dd)
    805{
    806	struct pci_dev *dev = dd->pcidev;
    807	struct pci_dev *pdev;
    808
    809	/* need a parent */
    810	if (!dev->bus->self) {
    811		dd_dev_err(dd, "%s: no parent device\n", __func__);
    812		return -ENOTTY;
    813	}
    814
    815	/* should not be anyone else on the bus */
    816	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
    817		if (pdev != dev) {
    818			dd_dev_err(dd,
    819				   "%s: another device is on the same bus\n",
    820				   __func__);
    821			return -ENOTTY;
    822		}
    823
    824	/*
    825	 * This is an end around to do an SBR during probe time. A new API needs
    826	 * to be implemented to have cleaner interface but this fixes the
    827	 * current brokenness
    828	 */
    829	return pci_bridge_secondary_bus_reset(dev->bus->self);
    830}
    831
    832/*
    833 * Write the given gasket interrupt register.
    834 */
    835static void write_gasket_interrupt(struct hfi1_devdata *dd, int index,
    836				   u16 code, u16 data)
    837{
    838	write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (index * 8),
    839		  (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) |
    840		   ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
    841}
    842
    843/*
    844 * Tell the gasket logic how to react to the reset.
    845 */
    846static void arm_gasket_logic(struct hfi1_devdata *dd)
    847{
    848	u64 reg;
    849
    850	reg = (((u64)1 << dd->hfi1_id) <<
    851	       ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT) |
    852	      ((u64)pcie_serdes_broadcast[dd->hfi1_id] <<
    853	       ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT |
    854	       ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK |
    855	       ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK) <<
    856	       ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT);
    857	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, reg);
    858	/* read back to push the write */
    859	read_csr(dd, ASIC_PCIE_SD_HOST_CMD);
    860}
    861
    862/*
    863 * CCE_PCIE_CTRL long name helpers
    864 * We redefine these shorter macros to use in the code while leaving
    865 * chip_registers.h to be autogenerated from the hardware spec.
    866 */
    867#define LANE_BUNDLE_MASK              CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_MASK
    868#define LANE_BUNDLE_SHIFT             CCE_PCIE_CTRL_PCIE_LANE_BUNDLE_SHIFT
    869#define LANE_DELAY_MASK               CCE_PCIE_CTRL_PCIE_LANE_DELAY_MASK
    870#define LANE_DELAY_SHIFT              CCE_PCIE_CTRL_PCIE_LANE_DELAY_SHIFT
    871#define MARGIN_OVERWRITE_ENABLE_SHIFT CCE_PCIE_CTRL_XMT_MARGIN_OVERWRITE_ENABLE_SHIFT
    872#define MARGIN_SHIFT                  CCE_PCIE_CTRL_XMT_MARGIN_SHIFT
    873#define MARGIN_G1_G2_OVERWRITE_MASK   CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_MASK
    874#define MARGIN_G1_G2_OVERWRITE_SHIFT  CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_OVERWRITE_ENABLE_SHIFT
    875#define MARGIN_GEN1_GEN2_MASK         CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_MASK
    876#define MARGIN_GEN1_GEN2_SHIFT        CCE_PCIE_CTRL_XMT_MARGIN_GEN1_GEN2_SHIFT
    877
    878 /*
    879  * Write xmt_margin for full-swing (WFR-B) or half-swing (WFR-C).
    880  */
    881static void write_xmt_margin(struct hfi1_devdata *dd, const char *fname)
    882{
    883	u64 pcie_ctrl;
    884	u64 xmt_margin;
    885	u64 xmt_margin_oe;
    886	u64 lane_delay;
    887	u64 lane_bundle;
    888
    889	pcie_ctrl = read_csr(dd, CCE_PCIE_CTRL);
    890
    891	/*
    892	 * For Discrete, use full-swing.
    893	 *  - PCIe TX defaults to full-swing.
    894	 *    Leave this register as default.
    895	 * For Integrated, use half-swing
    896	 *  - Copy xmt_margin and xmt_margin_oe
    897	 *    from Gen1/Gen2 to Gen3.
    898	 */
    899	if (dd->pcidev->device == PCI_DEVICE_ID_INTEL1) { /* integrated */
    900		/* extract initial fields */
    901		xmt_margin = (pcie_ctrl >> MARGIN_GEN1_GEN2_SHIFT)
    902			      & MARGIN_GEN1_GEN2_MASK;
    903		xmt_margin_oe = (pcie_ctrl >> MARGIN_G1_G2_OVERWRITE_SHIFT)
    904				 & MARGIN_G1_G2_OVERWRITE_MASK;
    905		lane_delay = (pcie_ctrl >> LANE_DELAY_SHIFT) & LANE_DELAY_MASK;
    906		lane_bundle = (pcie_ctrl >> LANE_BUNDLE_SHIFT)
    907			       & LANE_BUNDLE_MASK;
    908
    909		/*
    910		 * For A0, EFUSE values are not set.  Override with the
    911		 * correct values.
    912		 */
    913		if (is_ax(dd)) {
    914			/*
    915			 * xmt_margin and OverwiteEnabel should be the
    916			 * same for Gen1/Gen2 and Gen3
    917			 */
    918			xmt_margin = 0x5;
    919			xmt_margin_oe = 0x1;
    920			lane_delay = 0xF; /* Delay 240ns. */
    921			lane_bundle = 0x0; /* Set to 1 lane. */
    922		}
    923
    924		/* overwrite existing values */
    925		pcie_ctrl = (xmt_margin << MARGIN_GEN1_GEN2_SHIFT)
    926			| (xmt_margin_oe << MARGIN_G1_G2_OVERWRITE_SHIFT)
    927			| (xmt_margin << MARGIN_SHIFT)
    928			| (xmt_margin_oe << MARGIN_OVERWRITE_ENABLE_SHIFT)
    929			| (lane_delay << LANE_DELAY_SHIFT)
    930			| (lane_bundle << LANE_BUNDLE_SHIFT);
    931
    932		write_csr(dd, CCE_PCIE_CTRL, pcie_ctrl);
    933	}
    934
    935	dd_dev_dbg(dd, "%s: program XMT margin, CcePcieCtrl 0x%llx\n",
    936		   fname, pcie_ctrl);
    937}
    938
    939/*
    940 * Do all the steps needed to transition the PCIe link to Gen3 speed.
    941 */
    942int do_pcie_gen3_transition(struct hfi1_devdata *dd)
    943{
    944	struct pci_dev *parent = dd->pcidev->bus->self;
    945	u64 fw_ctrl;
    946	u64 reg, therm;
    947	u32 reg32, fs, lf;
    948	u32 status, err;
    949	int ret;
    950	int do_retry, retry_count = 0;
    951	int intnum = 0;
    952	uint default_pset;
    953	uint pset = pcie_pset;
    954	u16 target_vector, target_speed;
    955	u16 lnkctl2, vendor;
    956	u8 div;
    957	const u8 (*eq)[3];
    958	const u8 (*ctle_tunings)[4];
    959	uint static_ctle_mode;
    960	int return_error = 0;
    961	u32 target_width;
    962
    963	/* PCIe Gen3 is for the ASIC only */
    964	if (dd->icode != ICODE_RTL_SILICON)
    965		return 0;
    966
    967	if (pcie_target == 1) {			/* target Gen1 */
    968		target_vector = PCI_EXP_LNKCTL2_TLS_2_5GT;
    969		target_speed = 2500;
    970	} else if (pcie_target == 2) {		/* target Gen2 */
    971		target_vector = PCI_EXP_LNKCTL2_TLS_5_0GT;
    972		target_speed = 5000;
    973	} else if (pcie_target == 3) {		/* target Gen3 */
    974		target_vector = PCI_EXP_LNKCTL2_TLS_8_0GT;
    975		target_speed = 8000;
    976	} else {
    977		/* off or invalid target - skip */
    978		dd_dev_info(dd, "%s: Skipping PCIe transition\n", __func__);
    979		return 0;
    980	}
    981
    982	/* if already at target speed, done (unless forced) */
    983	if (dd->lbus_speed == target_speed) {
    984		dd_dev_info(dd, "%s: PCIe already at gen%d, %s\n", __func__,
    985			    pcie_target,
    986			    pcie_force ? "re-doing anyway" : "skipping");
    987		if (!pcie_force)
    988			return 0;
    989	}
    990
    991	/*
    992	 * The driver cannot do the transition if it has no access to the
    993	 * upstream component
    994	 */
    995	if (!parent) {
    996		dd_dev_info(dd, "%s: No upstream, Can't do gen3 transition\n",
    997			    __func__);
    998		return 0;
    999	}
   1000
   1001	/* Previous Gen1/Gen2 bus width */
   1002	target_width = dd->lbus_width;
   1003
   1004	/*
   1005	 * Do the Gen3 transition.  Steps are those of the PCIe Gen3
   1006	 * recipe.
   1007	 */
   1008
   1009	/* step 1: pcie link working in gen1/gen2 */
   1010
   1011	/* step 2: if either side is not capable of Gen3, done */
   1012	if (pcie_target == 3 && !dd->link_gen3_capable) {
   1013		dd_dev_err(dd, "The PCIe link is not Gen3 capable\n");
   1014		ret = -ENOSYS;
   1015		goto done_no_mutex;
   1016	}
   1017
   1018	/* hold the SBus resource across the firmware download and SBR */
   1019	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
   1020	if (ret) {
   1021		dd_dev_err(dd, "%s: unable to acquire SBus resource\n",
   1022			   __func__);
   1023		return ret;
   1024	}
   1025
   1026	/* make sure thermal polling is not causing interrupts */
   1027	therm = read_csr(dd, ASIC_CFG_THERM_POLL_EN);
   1028	if (therm) {
   1029		write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x0);
   1030		msleep(100);
   1031		dd_dev_info(dd, "%s: Disabled therm polling\n",
   1032			    __func__);
   1033	}
   1034
   1035retry:
   1036	/* the SBus download will reset the spico for thermal */
   1037
   1038	/* step 3: download SBus Master firmware */
   1039	/* step 4: download PCIe Gen3 SerDes firmware */
   1040	dd_dev_info(dd, "%s: downloading firmware\n", __func__);
   1041	ret = load_pcie_firmware(dd);
   1042	if (ret) {
   1043		/* do not proceed if the firmware cannot be downloaded */
   1044		return_error = 1;
   1045		goto done;
   1046	}
   1047
   1048	/* step 5: set up device parameter settings */
   1049	dd_dev_info(dd, "%s: setting PCIe registers\n", __func__);
   1050
   1051	/*
   1052	 * PcieCfgSpcie1 - Link Control 3
   1053	 * Leave at reset value.  No need to set PerfEq - link equalization
   1054	 * will be performed automatically after the SBR when the target
   1055	 * speed is 8GT/s.
   1056	 */
   1057
   1058	/* clear all 16 per-lane error bits (PCIe: Lane Error Status) */
   1059	pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, 0xffff);
   1060
   1061	/* step 5a: Set Synopsys Port Logic registers */
   1062
   1063	/*
   1064	 * PcieCfgRegPl2 - Port Force Link
   1065	 *
   1066	 * Set the low power field to 0x10 to avoid unnecessary power
   1067	 * management messages.  All other fields are zero.
   1068	 */
   1069	reg32 = 0x10ul << PCIE_CFG_REG_PL2_LOW_PWR_ENT_CNT_SHIFT;
   1070	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL2, reg32);
   1071
   1072	/*
   1073	 * PcieCfgRegPl100 - Gen3 Control
   1074	 *
   1075	 * turn off PcieCfgRegPl100.Gen3ZRxDcNonCompl
   1076	 * turn on PcieCfgRegPl100.EqEieosCnt
   1077	 * Everything else zero.
   1078	 */
   1079	reg32 = PCIE_CFG_REG_PL100_EQ_EIEOS_CNT_SMASK;
   1080	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL100, reg32);
   1081
   1082	/*
   1083	 * PcieCfgRegPl101 - Gen3 EQ FS and LF
   1084	 * PcieCfgRegPl102 - Gen3 EQ Presets to Coefficients Mapping
   1085	 * PcieCfgRegPl103 - Gen3 EQ Preset Index
   1086	 * PcieCfgRegPl105 - Gen3 EQ Status
   1087	 *
   1088	 * Give initial EQ settings.
   1089	 */
   1090	if (dd->pcidev->device == PCI_DEVICE_ID_INTEL0) { /* discrete */
   1091		/* 1000mV, FS=24, LF = 8 */
   1092		fs = 24;
   1093		lf = 8;
   1094		div = 3;
   1095		eq = discrete_preliminary_eq;
   1096		default_pset = DEFAULT_DISCRETE_PSET;
   1097		ctle_tunings = discrete_ctle_tunings;
   1098		/* bit 0 - discrete on/off */
   1099		static_ctle_mode = pcie_ctle & 0x1;
   1100	} else {
   1101		/* 400mV, FS=29, LF = 9 */
   1102		fs = 29;
   1103		lf = 9;
   1104		div = 1;
   1105		eq = integrated_preliminary_eq;
   1106		default_pset = DEFAULT_MCP_PSET;
   1107		ctle_tunings = integrated_ctle_tunings;
   1108		/* bit 1 - integrated on/off */
   1109		static_ctle_mode = (pcie_ctle >> 1) & 0x1;
   1110	}
   1111	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101,
   1112			       (fs <<
   1113				PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT) |
   1114			       (lf <<
   1115				PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
   1116	ret = load_eq_table(dd, eq, fs, div);
   1117	if (ret)
   1118		goto done;
   1119
   1120	/*
   1121	 * PcieCfgRegPl106 - Gen3 EQ Control
   1122	 *
   1123	 * Set Gen3EqPsetReqVec, leave other fields 0.
   1124	 */
   1125	if (pset == UNSET_PSET)
   1126		pset = default_pset;
   1127	if (pset > 10) {	/* valid range is 0-10, inclusive */
   1128		dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n",
   1129			   __func__, pset, default_pset);
   1130		pset = default_pset;
   1131	}
   1132	dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pset);
   1133	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106,
   1134			       ((1 << pset) <<
   1135			PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) |
   1136			PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK |
   1137			PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
   1138
   1139	/*
   1140	 * step 5b: Do post firmware download steps via SBus
   1141	 */
   1142	dd_dev_info(dd, "%s: doing pcie post steps\n", __func__);
   1143	pcie_post_steps(dd);
   1144
   1145	/*
   1146	 * step 5c: Program gasket interrupts
   1147	 */
   1148	/* set the Rx Bit Rate to REFCLK ratio */
   1149	write_gasket_interrupt(dd, intnum++, 0x0006, 0x0050);
   1150	/* disable pCal for PCIe Gen3 RX equalization */
   1151	/* select adaptive or static CTLE */
   1152	write_gasket_interrupt(dd, intnum++, 0x0026,
   1153			       0x5b01 | (static_ctle_mode << 3));
   1154	/*
   1155	 * Enable iCal for PCIe Gen3 RX equalization, and set which
   1156	 * evaluation of RX_EQ_EVAL will launch the iCal procedure.
   1157	 */
   1158	write_gasket_interrupt(dd, intnum++, 0x0026, 0x5202);
   1159
   1160	if (static_ctle_mode) {
   1161		/* apply static CTLE tunings */
   1162		u8 pcie_dc, pcie_lf, pcie_hf, pcie_bw;
   1163
   1164		pcie_dc = ctle_tunings[pset][0];
   1165		pcie_lf = ctle_tunings[pset][1];
   1166		pcie_hf = ctle_tunings[pset][2];
   1167		pcie_bw = ctle_tunings[pset][3];
   1168		write_gasket_interrupt(dd, intnum++, 0x0026, 0x0200 | pcie_dc);
   1169		write_gasket_interrupt(dd, intnum++, 0x0026, 0x0100 | pcie_lf);
   1170		write_gasket_interrupt(dd, intnum++, 0x0026, 0x0000 | pcie_hf);
   1171		write_gasket_interrupt(dd, intnum++, 0x0026, 0x5500 | pcie_bw);
   1172	}
   1173
   1174	/* terminate list */
   1175	write_gasket_interrupt(dd, intnum++, 0x0000, 0x0000);
   1176
   1177	/*
   1178	 * step 5d: program XMT margin
   1179	 */
   1180	write_xmt_margin(dd, __func__);
   1181
   1182	/*
   1183	 * step 5e: disable active state power management (ASPM). It
   1184	 * will be enabled if required later
   1185	 */
   1186	dd_dev_info(dd, "%s: clearing ASPM\n", __func__);
   1187	aspm_hw_disable_l1(dd);
   1188
   1189	/*
   1190	 * step 5f: clear DirectSpeedChange
   1191	 * PcieCfgRegPl67.DirectSpeedChange must be zero to prevent the
   1192	 * change in the speed target from starting before we are ready.
   1193	 * This field defaults to 0 and we are not changing it, so nothing
   1194	 * needs to be done.
   1195	 */
   1196
   1197	/* step 5g: Set target link speed */
   1198	/*
   1199	 * Set target link speed to be target on both device and parent.
   1200	 * On setting the parent: Some system BIOSs "helpfully" set the
   1201	 * parent target speed to Gen2 to match the ASIC's initial speed.
   1202	 * We can set the target Gen3 because we have already checked
   1203	 * that it is Gen3 capable earlier.
   1204	 */
   1205	dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
   1206	ret = pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
   1207	if (ret) {
   1208		dd_dev_err(dd, "Unable to read from PCI config\n");
   1209		return_error = 1;
   1210		goto done;
   1211	}
   1212
   1213	dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
   1214		    (u32)lnkctl2);
   1215	/* only write to parent if target is not as high as ours */
   1216	if ((lnkctl2 & PCI_EXP_LNKCTL2_TLS) < target_vector) {
   1217		lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
   1218		lnkctl2 |= target_vector;
   1219		dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
   1220			    (u32)lnkctl2);
   1221		ret = pcie_capability_write_word(parent,
   1222						 PCI_EXP_LNKCTL2, lnkctl2);
   1223		if (ret) {
   1224			dd_dev_err(dd, "Unable to write to PCI config\n");
   1225			return_error = 1;
   1226			goto done;
   1227		}
   1228	} else {
   1229		dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
   1230	}
   1231
   1232	dd_dev_info(dd, "%s: setting target link speed\n", __func__);
   1233	ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
   1234	if (ret) {
   1235		dd_dev_err(dd, "Unable to read from PCI config\n");
   1236		return_error = 1;
   1237		goto done;
   1238	}
   1239
   1240	dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
   1241		    (u32)lnkctl2);
   1242	lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
   1243	lnkctl2 |= target_vector;
   1244	dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
   1245		    (u32)lnkctl2);
   1246	ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
   1247	if (ret) {
   1248		dd_dev_err(dd, "Unable to write to PCI config\n");
   1249		return_error = 1;
   1250		goto done;
   1251	}
   1252
   1253	/* step 5h: arm gasket logic */
   1254	/* hold DC in reset across the SBR */
   1255	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
   1256	(void)read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
   1257	/* save firmware control across the SBR */
   1258	fw_ctrl = read_csr(dd, MISC_CFG_FW_CTRL);
   1259
   1260	dd_dev_info(dd, "%s: arming gasket logic\n", __func__);
   1261	arm_gasket_logic(dd);
   1262
   1263	/*
   1264	 * step 6: quiesce PCIe link
   1265	 * The chip has already been reset, so there will be no traffic
   1266	 * from the chip.  Linux has no easy way to enforce that it will
   1267	 * not try to access the device, so we just need to hope it doesn't
   1268	 * do it while we are doing the reset.
   1269	 */
   1270
   1271	/*
   1272	 * step 7: initiate the secondary bus reset (SBR)
   1273	 * step 8: hardware brings the links back up
   1274	 * step 9: wait for link speed transition to be complete
   1275	 */
   1276	dd_dev_info(dd, "%s: calling trigger_sbr\n", __func__);
   1277	ret = trigger_sbr(dd);
   1278	if (ret)
   1279		goto done;
   1280
   1281	/* step 10: decide what to do next */
   1282
   1283	/* check if we can read PCI space */
   1284	ret = pci_read_config_word(dd->pcidev, PCI_VENDOR_ID, &vendor);
   1285	if (ret) {
   1286		dd_dev_info(dd,
   1287			    "%s: read of VendorID failed after SBR, err %d\n",
   1288			    __func__, ret);
   1289		return_error = 1;
   1290		goto done;
   1291	}
   1292	if (vendor == 0xffff) {
   1293		dd_dev_info(dd, "%s: VendorID is all 1s after SBR\n", __func__);
   1294		return_error = 1;
   1295		ret = -EIO;
   1296		goto done;
   1297	}
   1298
   1299	/* restore PCI space registers we know were reset */
   1300	dd_dev_info(dd, "%s: calling restore_pci_variables\n", __func__);
   1301	ret = restore_pci_variables(dd);
   1302	if (ret) {
   1303		dd_dev_err(dd, "%s: Could not restore PCI variables\n",
   1304			   __func__);
   1305		return_error = 1;
   1306		goto done;
   1307	}
   1308
   1309	/* restore firmware control */
   1310	write_csr(dd, MISC_CFG_FW_CTRL, fw_ctrl);
   1311
   1312	/*
   1313	 * Check the gasket block status.
   1314	 *
   1315	 * This is the first CSR read after the SBR.  If the read returns
   1316	 * all 1s (fails), the link did not make it back.
   1317	 *
   1318	 * Once we're sure we can read and write, clear the DC reset after
   1319	 * the SBR.  Then check for any per-lane errors. Then look over
   1320	 * the status.
   1321	 */
   1322	reg = read_csr(dd, ASIC_PCIE_SD_HOST_STATUS);
   1323	dd_dev_info(dd, "%s: gasket block status: 0x%llx\n", __func__, reg);
   1324	if (reg == ~0ull) {	/* PCIe read failed/timeout */
   1325		dd_dev_err(dd, "SBR failed - unable to read from device\n");
   1326		return_error = 1;
   1327		ret = -ENOSYS;
   1328		goto done;
   1329	}
   1330
   1331	/* clear the DC reset */
   1332	write_csr(dd, CCE_DC_CTRL, 0);
   1333
   1334	/* Set the LED off */
   1335	setextled(dd, 0);
   1336
   1337	/* check for any per-lane errors */
   1338	ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
   1339	if (ret) {
   1340		dd_dev_err(dd, "Unable to read from PCI config\n");
   1341		return_error = 1;
   1342		goto done;
   1343	}
   1344
   1345	dd_dev_info(dd, "%s: per-lane errors: 0x%x\n", __func__, reg32);
   1346
   1347	/* extract status, look for our HFI */
   1348	status = (reg >> ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_STS_SHIFT)
   1349			& ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_STS_MASK;
   1350	if ((status & (1 << dd->hfi1_id)) == 0) {
   1351		dd_dev_err(dd,
   1352			   "%s: gasket status 0x%x, expecting 0x%x\n",
   1353			   __func__, status, 1 << dd->hfi1_id);
   1354		ret = -EIO;
   1355		goto done;
   1356	}
   1357
   1358	/* extract error */
   1359	err = (reg >> ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_ERR_SHIFT)
   1360		& ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_ERR_MASK;
   1361	if (err) {
   1362		dd_dev_err(dd, "%s: gasket error %d\n", __func__, err);
   1363		ret = -EIO;
   1364		goto done;
   1365	}
   1366
   1367	/* update our link information cache */
   1368	update_lbus_info(dd);
   1369	dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
   1370		    dd->lbus_info);
   1371
   1372	if (dd->lbus_speed != target_speed ||
   1373	    dd->lbus_width < target_width) { /* not target */
   1374		/* maybe retry */
   1375		do_retry = retry_count < pcie_retry;
   1376		dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
   1377			   do_retry ? ", retrying" : "");
   1378		retry_count++;
   1379		if (do_retry) {
   1380			msleep(100); /* allow time to settle */
   1381			goto retry;
   1382		}
   1383		ret = -EIO;
   1384	}
   1385
   1386done:
   1387	if (therm) {
   1388		write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
   1389		msleep(100);
   1390		dd_dev_info(dd, "%s: Re-enable therm polling\n",
   1391			    __func__);
   1392	}
   1393	release_chip_resource(dd, CR_SBUS);
   1394done_no_mutex:
   1395	/* return no error if it is OK to be at current speed */
   1396	if (ret && !return_error) {
   1397		dd_dev_err(dd, "Proceeding at current speed PCIe speed\n");
   1398		ret = 0;
   1399	}
   1400
   1401	dd_dev_info(dd, "%s: done\n", __func__);
   1402	return ret;
   1403}