cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ocxl.c (15641B)


      1// SPDX-License-Identifier: GPL-2.0+
      2// Copyright 2017 IBM Corp.
      3#include <asm/pnv-ocxl.h>
      4#include <asm/opal.h>
      5#include <misc/ocxl-config.h>
      6#include "pci.h"
      7
      8#define PNV_OCXL_TL_P9_RECV_CAP		0x000000000000000Full
      9#define PNV_OCXL_ACTAG_MAX		64
     10/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
     11#define PNV_OCXL_PASID_BITS		15
     12#define PNV_OCXL_PASID_MAX		((1 << PNV_OCXL_PASID_BITS) - 1)
     13
     14#define AFU_PRESENT (1 << 31)
     15#define AFU_INDEX_MASK 0x3F000000
     16#define AFU_INDEX_SHIFT 24
     17#define ACTAG_MASK 0xFFF
     18
     19
     20struct actag_range {
     21	u16 start;
     22	u16 count;
     23};
     24
     25struct npu_link {
     26	struct list_head list;
     27	int domain;
     28	int bus;
     29	int dev;
     30	u16 fn_desired_actags[8];
     31	struct actag_range fn_actags[8];
     32	bool assignment_done;
     33};
     34static struct list_head links_list = LIST_HEAD_INIT(links_list);
     35static DEFINE_MUTEX(links_list_lock);
     36
     37
     38/*
     39 * opencapi actags handling:
     40 *
     41 * When sending commands, the opencapi device references the memory
     42 * context it's targeting with an 'actag', which is really an alias
     43 * for a (BDF, pasid) combination. When it receives a command, the NPU
     44 * must do a lookup of the actag to identify the memory context. The
     45 * hardware supports a finite number of actags per link (64 for
     46 * POWER9).
     47 *
     48 * The device can carry multiple functions, and each function can have
     49 * multiple AFUs. Each AFU advertises in its config space the number
     50 * of desired actags. The host must configure in the config space of
     51 * the AFU how many actags the AFU is really allowed to use (which can
     52 * be less than what the AFU desires).
     53 *
     54 * When a PCI function is probed by the driver, it has no visibility
     55 * about the other PCI functions and how many actags they'd like,
     56 * which makes it impossible to distribute actags fairly among AFUs.
     57 *
     58 * Unfortunately, the only way to know how many actags a function
     59 * desires is by looking at the data for each AFU in the config space
     60 * and add them up. Similarly, the only way to know how many actags
     61 * all the functions of the physical device desire is by adding the
     62 * previously computed function counts. Then we can match that against
     63 * what the hardware supports.
     64 *
     65 * To get a comprehensive view, we use a 'pci fixup': at the end of
     66 * PCI enumeration, each function counts how many actags its AFUs
     67 * desire and we save it in a 'npu_link' structure, shared between all
     68 * the PCI functions of a same device. Therefore, when the first
     69 * function is probed by the driver, we can get an idea of the total
     70 * count of desired actags for the device, and assign the actags to
     71 * the AFUs, by pro-rating if needed.
     72 */
     73
     74static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
     75{
     76	int vsec = pos;
     77	u16 vendor, id;
     78
     79	while ((vsec = pci_find_next_ext_capability(dev, vsec,
     80						    OCXL_EXT_CAP_ID_DVSEC))) {
     81		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
     82				&vendor);
     83		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
     84		if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
     85			return vsec;
     86	}
     87	return 0;
     88}
     89
     90static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
     91{
     92	int vsec = 0;
     93	u8 idx;
     94
     95	while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
     96					   vsec))) {
     97		pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
     98				&idx);
     99		if (idx == afu_idx)
    100			return vsec;
    101	}
    102	return 0;
    103}
    104
    105static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
    106{
    107	int pos;
    108	u32 val;
    109
    110	pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,
    111					OCXL_DVSEC_FUNC_ID);
    112	if (!pos)
    113		return -ESRCH;
    114
    115	pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
    116	if (val & AFU_PRESENT)
    117		*afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
    118	else
    119		*afu_idx = -1;
    120	return 0;
    121}
    122
    123static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
    124{
    125	int pos;
    126	u16 actag_sup;
    127
    128	pos = find_dvsec_afu_ctrl(dev, afu_idx);
    129	if (!pos)
    130		return -ESRCH;
    131
    132	pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
    133			&actag_sup);
    134	*actag = actag_sup & ACTAG_MASK;
    135	return 0;
    136}
    137
    138static struct npu_link *find_link(struct pci_dev *dev)
    139{
    140	struct npu_link *link;
    141
    142	list_for_each_entry(link, &links_list, list) {
    143		/* The functions of a device all share the same link */
    144		if (link->domain == pci_domain_nr(dev->bus) &&
    145			link->bus == dev->bus->number &&
    146			link->dev == PCI_SLOT(dev->devfn)) {
    147			return link;
    148		}
    149	}
    150
    151	/* link doesn't exist yet. Allocate one */
    152	link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
    153	if (!link)
    154		return NULL;
    155	link->domain = pci_domain_nr(dev->bus);
    156	link->bus = dev->bus->number;
    157	link->dev = PCI_SLOT(dev->devfn);
    158	list_add(&link->list, &links_list);
    159	return link;
    160}
    161
    162static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
    163{
    164	struct pci_controller *hose = pci_bus_to_host(dev->bus);
    165	struct pnv_phb *phb = hose->private_data;
    166	struct npu_link *link;
    167	int rc, afu_idx = -1, i, actag;
    168
    169	if (!machine_is(powernv))
    170		return;
    171
    172	if (phb->type != PNV_PHB_NPU_OCAPI)
    173		return;
    174
    175	mutex_lock(&links_list_lock);
    176
    177	link = find_link(dev);
    178	if (!link) {
    179		dev_warn(&dev->dev, "couldn't update actag information\n");
    180		mutex_unlock(&links_list_lock);
    181		return;
    182	}
    183
    184	/*
    185	 * Check how many actags are desired for the AFUs under that
    186	 * function and add it to the count for the link
    187	 */
    188	rc = get_max_afu_index(dev, &afu_idx);
    189	if (rc) {
    190		/* Most likely an invalid config space */
    191		dev_dbg(&dev->dev, "couldn't find AFU information\n");
    192		afu_idx = -1;
    193	}
    194
    195	link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
    196	for (i = 0; i <= afu_idx; i++) {
    197		/*
    198		 * AFU index 'holes' are allowed. So don't fail if we
    199		 * can't read the actag info for an index
    200		 */
    201		rc = get_actag_count(dev, i, &actag);
    202		if (rc)
    203			continue;
    204		link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
    205	}
    206	dev_dbg(&dev->dev, "total actags for function: %d\n",
    207		link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
    208
    209	mutex_unlock(&links_list_lock);
    210}
    211DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
    212
    213static u16 assign_fn_actags(u16 desired, u16 total)
    214{
    215	u16 count;
    216
    217	if (total <= PNV_OCXL_ACTAG_MAX)
    218		count = desired;
    219	else
    220		count = PNV_OCXL_ACTAG_MAX * desired / total;
    221
    222	return count;
    223}
    224
    225static void assign_actags(struct npu_link *link)
    226{
    227	u16 actag_count, range_start = 0, total_desired = 0;
    228	int i;
    229
    230	for (i = 0; i < 8; i++)
    231		total_desired += link->fn_desired_actags[i];
    232
    233	for (i = 0; i < 8; i++) {
    234		if (link->fn_desired_actags[i]) {
    235			actag_count = assign_fn_actags(
    236				link->fn_desired_actags[i],
    237				total_desired);
    238			link->fn_actags[i].start = range_start;
    239			link->fn_actags[i].count = actag_count;
    240			range_start += actag_count;
    241			WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
    242		}
    243		pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
    244			link->domain, link->bus, link->dev, i,
    245			link->fn_actags[i].start, link->fn_actags[i].count,
    246			link->fn_desired_actags[i]);
    247	}
    248	link->assignment_done = true;
    249}
    250
    251int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
    252		u16 *supported)
    253{
    254	struct npu_link *link;
    255
    256	mutex_lock(&links_list_lock);
    257
    258	link = find_link(dev);
    259	if (!link) {
    260		dev_err(&dev->dev, "actag information not found\n");
    261		mutex_unlock(&links_list_lock);
    262		return -ENODEV;
    263	}
    264	/*
    265	 * On p9, we only have 64 actags per link, so they must be
    266	 * shared by all the functions of the same adapter. We counted
    267	 * the desired actag counts during PCI enumeration, so that we
    268	 * can allocate a pro-rated number of actags to each function.
    269	 */
    270	if (!link->assignment_done)
    271		assign_actags(link);
    272
    273	*base      = link->fn_actags[PCI_FUNC(dev->devfn)].start;
    274	*enabled   = link->fn_actags[PCI_FUNC(dev->devfn)].count;
    275	*supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
    276
    277	mutex_unlock(&links_list_lock);
    278	return 0;
    279}
    280EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
    281
    282int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
    283{
    284	struct npu_link *link;
    285	int i, rc = -EINVAL;
    286
    287	/*
    288	 * The number of PASIDs (process address space ID) which can
    289	 * be used by a function depends on how many functions exist
    290	 * on the device. The NPU needs to be configured to know how
    291	 * many bits are available to PASIDs and how many are to be
    292	 * used by the function BDF identifier.
    293	 *
    294	 * We only support one AFU-carrying function for now.
    295	 */
    296	mutex_lock(&links_list_lock);
    297
    298	link = find_link(dev);
    299	if (!link) {
    300		dev_err(&dev->dev, "actag information not found\n");
    301		mutex_unlock(&links_list_lock);
    302		return -ENODEV;
    303	}
    304
    305	for (i = 0; i < 8; i++)
    306		if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
    307			*count = PNV_OCXL_PASID_MAX;
    308			rc = 0;
    309			break;
    310		}
    311
    312	mutex_unlock(&links_list_lock);
    313	dev_dbg(&dev->dev, "%d PASIDs available for function\n",
    314		rc ? 0 : *count);
    315	return rc;
    316}
    317EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
    318
    319static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
    320{
    321	int shift, idx;
    322
    323	WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
    324	idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
    325	shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
    326	buf[idx] |= rate << shift;
    327}
    328
    329int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
    330			char *rate_buf, int rate_buf_size)
    331{
    332	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
    333		return -EINVAL;
    334	/*
    335	 * The TL capabilities are a characteristic of the NPU, so
    336	 * we go with hard-coded values.
    337	 *
    338	 * The receiving rate of each template is encoded on 4 bits.
    339	 *
    340	 * On P9:
    341	 * - templates 0 -> 3 are supported
    342	 * - templates 0, 1 and 3 have a 0 receiving rate
    343	 * - template 2 has receiving rate of 1 (extra cycle)
    344	 */
    345	memset(rate_buf, 0, rate_buf_size);
    346	set_templ_rate(2, 1, rate_buf);
    347	*cap = PNV_OCXL_TL_P9_RECV_CAP;
    348	return 0;
    349}
    350EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
    351
    352int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
    353			uint64_t rate_buf_phys, int rate_buf_size)
    354{
    355	struct pci_controller *hose = pci_bus_to_host(dev->bus);
    356	struct pnv_phb *phb = hose->private_data;
    357	int rc;
    358
    359	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
    360		return -EINVAL;
    361
    362	rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
    363			rate_buf_phys, rate_buf_size);
    364	if (rc) {
    365		dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
    366		return -EINVAL;
    367	}
    368	return 0;
    369}
    370EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
    371
    372int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
    373{
    374	int rc;
    375
    376	rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
    377	if (rc) {
    378		dev_err(&dev->dev,
    379			"Can't get translation interrupt for device\n");
    380		return rc;
    381	}
    382	return 0;
    383}
    384EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
    385
    386void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
    387			void __iomem *tfc, void __iomem *pe_handle)
    388{
    389	iounmap(dsisr);
    390	iounmap(dar);
    391	iounmap(tfc);
    392	iounmap(pe_handle);
    393}
    394EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
    395
    396int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
    397			void __iomem **dar, void __iomem **tfc,
    398			void __iomem **pe_handle)
    399{
    400	u64 reg;
    401	int i, j, rc = 0;
    402	void __iomem *regs[4];
    403
    404	/*
    405	 * opal stores the mmio addresses of the DSISR, DAR, TFC and
    406	 * PE_HANDLE registers in a device tree property, in that
    407	 * order
    408	 */
    409	for (i = 0; i < 4; i++) {
    410		rc = of_property_read_u64_index(dev->dev.of_node,
    411						"ibm,opal-xsl-mmio", i, &reg);
    412		if (rc)
    413			break;
    414		regs[i] = ioremap(reg, 8);
    415		if (!regs[i]) {
    416			rc = -EINVAL;
    417			break;
    418		}
    419	}
    420	if (rc) {
    421		dev_err(&dev->dev, "Can't map translation mmio registers\n");
    422		for (j = i - 1; j >= 0; j--)
    423			iounmap(regs[j]);
    424	} else {
    425		*dsisr = regs[0];
    426		*dar = regs[1];
    427		*tfc = regs[2];
    428		*pe_handle = regs[3];
    429	}
    430	return rc;
    431}
    432EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
    433
    434struct spa_data {
    435	u64 phb_opal_id;
    436	u32 bdfn;
    437};
    438
    439int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
    440		void **platform_data)
    441{
    442	struct pci_controller *hose = pci_bus_to_host(dev->bus);
    443	struct pnv_phb *phb = hose->private_data;
    444	struct spa_data *data;
    445	u32 bdfn;
    446	int rc;
    447
    448	data = kzalloc(sizeof(*data), GFP_KERNEL);
    449	if (!data)
    450		return -ENOMEM;
    451
    452	bdfn = (dev->bus->number << 8) | dev->devfn;
    453	rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
    454				PE_mask);
    455	if (rc) {
    456		dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
    457		kfree(data);
    458		return rc;
    459	}
    460	data->phb_opal_id = phb->opal_id;
    461	data->bdfn = bdfn;
    462	*platform_data = (void *) data;
    463	return 0;
    464}
    465EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
    466
    467void pnv_ocxl_spa_release(void *platform_data)
    468{
    469	struct spa_data *data = (struct spa_data *) platform_data;
    470	int rc;
    471
    472	rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
    473	WARN_ON(rc);
    474	kfree(data);
    475}
    476EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
    477
    478int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
    479{
    480	struct spa_data *data = (struct spa_data *) platform_data;
    481	int rc;
    482
    483	rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
    484	return rc;
    485}
    486EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
    487
    488int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
    489		      uint64_t lpcr, void __iomem **arva)
    490{
    491	struct pci_controller *hose = pci_bus_to_host(dev->bus);
    492	struct pnv_phb *phb = hose->private_data;
    493	u64 mmio_atsd;
    494	int rc;
    495
    496	/* ATSD physical address.
    497	 * ATSD LAUNCH register: write access initiates a shoot down to
    498	 * initiate the TLB Invalidate command.
    499	 */
    500	rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
    501					0, &mmio_atsd);
    502	if (rc) {
    503		dev_info(&dev->dev, "No available ATSD found\n");
    504		return rc;
    505	}
    506
    507	/* Assign a register set to a Logical Partition and MMIO ATSD
    508	 * LPARID register to the required value.
    509	 */
    510	rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
    511			       lparid, lpcr);
    512	if (rc) {
    513		dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
    514		return rc;
    515	}
    516
    517	*arva = ioremap(mmio_atsd, 24);
    518	if (!(*arva)) {
    519		dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
    520		rc = -ENOMEM;
    521	}
    522
    523	return rc;
    524}
    525EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
    526
    527void pnv_ocxl_unmap_lpar(void __iomem *arva)
    528{
    529	iounmap(arva);
    530}
    531EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
    532
    533void pnv_ocxl_tlb_invalidate(void __iomem *arva,
    534			     unsigned long pid,
    535			     unsigned long addr,
    536			     unsigned long page_size)
    537{
    538	unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
    539	u64 val = 0ull;
    540	int pend;
    541	u8 size;
    542
    543	if (!(arva))
    544		return;
    545
    546	if (addr) {
    547		/* load Abbreviated Virtual Address register with
    548		 * the necessary value
    549		 */
    550		val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
    551		out_be64(arva + PNV_OCXL_ATSD_AVA, val);
    552	}
    553
    554	/* Write access initiates a shoot down to initiate the
    555	 * TLB Invalidate command
    556	 */
    557	val = PNV_OCXL_ATSD_LNCH_R;
    558	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
    559	if (addr)
    560		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
    561	else {
    562		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
    563		val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
    564	}
    565	val |= PNV_OCXL_ATSD_LNCH_PRS;
    566	/* Actual Page Size to be invalidated
    567	 * 000 4KB
    568	 * 101 64KB
    569	 * 001 2MB
    570	 * 010 1GB
    571	 */
    572	size = 0b101;
    573	if (page_size == 0x1000)
    574		size = 0b000;
    575	if (page_size == 0x200000)
    576		size = 0b001;
    577	if (page_size == 0x40000000)
    578		size = 0b010;
    579	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
    580	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
    581	out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
    582
    583	/* Poll the ATSD status register to determine when the
    584	 * TLB Invalidate has been completed.
    585	 */
    586	val = in_be64(arva + PNV_OCXL_ATSD_STAT);
    587	pend = val >> 63;
    588
    589	while (pend) {
    590		if (time_after_eq(jiffies, timeout)) {
    591			pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
    592			       __func__, val, pid);
    593			return;
    594		}
    595		cpu_relax();
    596		val = in_be64(arva + PNV_OCXL_ATSD_STAT);
    597		pend = val >> 63;
    598	}
    599}
    600EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);