cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pci.c (14349B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
      3#include <linux/io-64-nonatomic-lo-hi.h>
      4#include <linux/moduleparam.h>
      5#include <linux/module.h>
      6#include <linux/delay.h>
      7#include <linux/sizes.h>
      8#include <linux/mutex.h>
      9#include <linux/list.h>
     10#include <linux/pci.h>
     11#include <linux/io.h>
     12#include "cxlmem.h"
     13#include "cxlpci.h"
     14#include "cxl.h"
     15
     16/**
     17 * DOC: cxl pci
     18 *
     19 * This implements the PCI exclusive functionality for a CXL device as it is
     20 * defined by the Compute Express Link specification. CXL devices may surface
     21 * certain functionality even if it isn't CXL enabled. While this driver is
     22 * focused around the PCI specific aspects of a CXL device, it binds to the
     23 * specific CXL memory device class code, and therefore the implementation of
     24 * cxl_pci is focused around CXL memory devices.
     25 *
     26 * The driver has several responsibilities, mainly:
     27 *  - Create the memX device and register on the CXL bus.
     28 *  - Enumerate device's register interface and map them.
     29 *  - Registers nvdimm bridge device with cxl_core.
     30 *  - Registers a CXL mailbox with cxl_core.
     31 */
     32
     33#define cxl_doorbell_busy(cxlds)                                                \
     34	(readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
     35	 CXLDEV_MBOX_CTRL_DOORBELL)
     36
     37/* CXL 2.0 - 8.2.8.4 */
     38#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
     39
     40/*
     41 * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
     42 * dictate how long to wait for the mailbox to become ready. The new
     43 * field allows the device to tell software the amount of time to wait
     44 * before mailbox ready. This field per the spec theoretically allows
     45 * for up to 255 seconds. 255 seconds is unreasonably long, its longer
     46 * than the maximum SATA port link recovery wait. Default to 60 seconds
     47 * until someone builds a CXL device that needs more time in practice.
     48 */
     49static unsigned short mbox_ready_timeout = 60;
     50module_param(mbox_ready_timeout, ushort, 0644);
     51MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
     52
     53static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
     54{
     55	const unsigned long start = jiffies;
     56	unsigned long end = start;
     57
     58	while (cxl_doorbell_busy(cxlds)) {
     59		end = jiffies;
     60
     61		if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
     62			/* Check again in case preempted before timeout test */
     63			if (!cxl_doorbell_busy(cxlds))
     64				break;
     65			return -ETIMEDOUT;
     66		}
     67		cpu_relax();
     68	}
     69
     70	dev_dbg(cxlds->dev, "Doorbell wait took %dms",
     71		jiffies_to_msecs(end) - jiffies_to_msecs(start));
     72	return 0;
     73}
     74
     75#define cxl_err(dev, status, msg)                                        \
     76	dev_err_ratelimited(dev, msg ", device state %s%s\n",                  \
     77			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
     78			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
     79
     80#define cxl_cmd_err(dev, cmd, status, msg)                               \
     81	dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n",    \
     82			    (cmd)->opcode,                                     \
     83			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
     84			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
     85
     86/**
     87 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
     88 * @cxlds: The device state to communicate with.
     89 * @mbox_cmd: Command to send to the memory device.
     90 *
     91 * Context: Any context. Expects mbox_mutex to be held.
     92 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
     93 *         Caller should check the return code in @mbox_cmd to make sure it
     94 *         succeeded.
     95 *
     96 * This is a generic form of the CXL mailbox send command thus only using the
     97 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
     98 * devices, and perhaps other types of CXL devices may have further information
     99 * available upon error conditions. Driver facilities wishing to send mailbox
    100 * commands should use the wrapper command.
    101 *
    102 * The CXL spec allows for up to two mailboxes. The intention is for the primary
    103 * mailbox to be OS controlled and the secondary mailbox to be used by system
    104 * firmware. This allows the OS and firmware to communicate with the device and
    105 * not need to coordinate with each other. The driver only uses the primary
    106 * mailbox.
    107 */
    108static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
    109				   struct cxl_mbox_cmd *mbox_cmd)
    110{
    111	void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
    112	struct device *dev = cxlds->dev;
    113	u64 cmd_reg, status_reg;
    114	size_t out_len;
    115	int rc;
    116
    117	lockdep_assert_held(&cxlds->mbox_mutex);
    118
    119	/*
    120	 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
    121	 *   1. Caller reads MB Control Register to verify doorbell is clear
    122	 *   2. Caller writes Command Register
    123	 *   3. Caller writes Command Payload Registers if input payload is non-empty
    124	 *   4. Caller writes MB Control Register to set doorbell
    125	 *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
    126	 *   6. Caller reads MB Status Register to fetch Return code
    127	 *   7. If command successful, Caller reads Command Register to get Payload Length
    128	 *   8. If output payload is non-empty, host reads Command Payload Registers
    129	 *
    130	 * Hardware is free to do whatever it wants before the doorbell is rung,
    131	 * and isn't allowed to change anything after it clears the doorbell. As
    132	 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
    133	 * also happen in any order (though some orders might not make sense).
    134	 */
    135
    136	/* #1 */
    137	if (cxl_doorbell_busy(cxlds)) {
    138		u64 md_status =
    139			readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
    140
    141		cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
    142			    "mailbox queue busy");
    143		return -EBUSY;
    144	}
    145
    146	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
    147			     mbox_cmd->opcode);
    148	if (mbox_cmd->size_in) {
    149		if (WARN_ON(!mbox_cmd->payload_in))
    150			return -EINVAL;
    151
    152		cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
    153				      mbox_cmd->size_in);
    154		memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
    155	}
    156
    157	/* #2, #3 */
    158	writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
    159
    160	/* #4 */
    161	dev_dbg(dev, "Sending command\n");
    162	writel(CXLDEV_MBOX_CTRL_DOORBELL,
    163	       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
    164
    165	/* #5 */
    166	rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
    167	if (rc == -ETIMEDOUT) {
    168		u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
    169
    170		cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
    171		return rc;
    172	}
    173
    174	/* #6 */
    175	status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
    176	mbox_cmd->return_code =
    177		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
    178
    179	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
    180		dev_dbg(dev, "Mailbox operation had an error: %s\n",
    181			cxl_mbox_cmd_rc2str(mbox_cmd));
    182		return 0; /* completed but caller must check return_code */
    183	}
    184
    185	/* #7 */
    186	cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
    187	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
    188
    189	/* #8 */
    190	if (out_len && mbox_cmd->payload_out) {
    191		/*
    192		 * Sanitize the copy. If hardware misbehaves, out_len per the
    193		 * spec can actually be greater than the max allowed size (21
    194		 * bits available but spec defined 1M max). The caller also may
    195		 * have requested less data than the hardware supplied even
    196		 * within spec.
    197		 */
    198		size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len);
    199
    200		memcpy_fromio(mbox_cmd->payload_out, payload, n);
    201		mbox_cmd->size_out = n;
    202	} else {
    203		mbox_cmd->size_out = 0;
    204	}
    205
    206	return 0;
    207}
    208
    209static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
    210{
    211	int rc;
    212
    213	mutex_lock_io(&cxlds->mbox_mutex);
    214	rc = __cxl_pci_mbox_send_cmd(cxlds, cmd);
    215	mutex_unlock(&cxlds->mbox_mutex);
    216
    217	return rc;
    218}
    219
    220static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
    221{
    222	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
    223	unsigned long timeout;
    224	u64 md_status;
    225
    226	timeout = jiffies + mbox_ready_timeout * HZ;
    227	do {
    228		md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
    229		if (md_status & CXLMDEV_MBOX_IF_READY)
    230			break;
    231		if (msleep_interruptible(100))
    232			break;
    233	} while (!time_after(jiffies, timeout));
    234
    235	if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
    236		cxl_err(cxlds->dev, md_status,
    237			"timeout awaiting mailbox ready");
    238		return -ETIMEDOUT;
    239	}
    240
    241	/*
    242	 * A command may be in flight from a previous driver instance,
    243	 * think kexec, do one doorbell wait so that
    244	 * __cxl_pci_mbox_send_cmd() can assume that it is the only
    245	 * source for future doorbell busy events.
    246	 */
    247	if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
    248		cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle");
    249		return -ETIMEDOUT;
    250	}
    251
    252	cxlds->mbox_send = cxl_pci_mbox_send;
    253	cxlds->payload_size =
    254		1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
    255
    256	/*
    257	 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
    258	 *
    259	 * If the size is too small, mandatory commands will not work and so
    260	 * there's no point in going forward. If the size is too large, there's
    261	 * no harm is soft limiting it.
    262	 */
    263	cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M);
    264	if (cxlds->payload_size < 256) {
    265		dev_err(cxlds->dev, "Mailbox is too small (%zub)",
    266			cxlds->payload_size);
    267		return -ENXIO;
    268	}
    269
    270	dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
    271		cxlds->payload_size);
    272
    273	return 0;
    274}
    275
    276static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
    277{
    278	void __iomem *addr;
    279	int bar = map->barno;
    280	struct device *dev = &pdev->dev;
    281	resource_size_t offset = map->block_offset;
    282
    283	/* Basic sanity check that BAR is big enough */
    284	if (pci_resource_len(pdev, bar) < offset) {
    285		dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
    286			&pdev->resource[bar], &offset);
    287		return -ENXIO;
    288	}
    289
    290	addr = pci_iomap(pdev, bar, 0);
    291	if (!addr) {
    292		dev_err(dev, "failed to map registers\n");
    293		return -ENOMEM;
    294	}
    295
    296	dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
    297		bar, &offset);
    298
    299	map->base = addr + map->block_offset;
    300	return 0;
    301}
    302
    303static void cxl_unmap_regblock(struct pci_dev *pdev,
    304			       struct cxl_register_map *map)
    305{
    306	pci_iounmap(pdev, map->base - map->block_offset);
    307	map->base = NULL;
    308}
    309
    310static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
    311{
    312	struct cxl_component_reg_map *comp_map;
    313	struct cxl_device_reg_map *dev_map;
    314	struct device *dev = &pdev->dev;
    315	void __iomem *base = map->base;
    316
    317	switch (map->reg_type) {
    318	case CXL_REGLOC_RBI_COMPONENT:
    319		comp_map = &map->component_map;
    320		cxl_probe_component_regs(dev, base, comp_map);
    321		if (!comp_map->hdm_decoder.valid) {
    322			dev_err(dev, "HDM decoder registers not found\n");
    323			return -ENXIO;
    324		}
    325
    326		dev_dbg(dev, "Set up component registers\n");
    327		break;
    328	case CXL_REGLOC_RBI_MEMDEV:
    329		dev_map = &map->device_map;
    330		cxl_probe_device_regs(dev, base, dev_map);
    331		if (!dev_map->status.valid || !dev_map->mbox.valid ||
    332		    !dev_map->memdev.valid) {
    333			dev_err(dev, "registers not found: %s%s%s\n",
    334				!dev_map->status.valid ? "status " : "",
    335				!dev_map->mbox.valid ? "mbox " : "",
    336				!dev_map->memdev.valid ? "memdev " : "");
    337			return -ENXIO;
    338		}
    339
    340		dev_dbg(dev, "Probing device registers...\n");
    341		break;
    342	default:
    343		break;
    344	}
    345
    346	return 0;
    347}
    348
    349static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map)
    350{
    351	struct device *dev = cxlds->dev;
    352	struct pci_dev *pdev = to_pci_dev(dev);
    353
    354	switch (map->reg_type) {
    355	case CXL_REGLOC_RBI_COMPONENT:
    356		cxl_map_component_regs(pdev, &cxlds->regs.component, map);
    357		dev_dbg(dev, "Mapping component registers...\n");
    358		break;
    359	case CXL_REGLOC_RBI_MEMDEV:
    360		cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map);
    361		dev_dbg(dev, "Probing device registers...\n");
    362		break;
    363	default:
    364		break;
    365	}
    366
    367	return 0;
    368}
    369
    370static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
    371			  struct cxl_register_map *map)
    372{
    373	int rc;
    374
    375	rc = cxl_find_regblock(pdev, type, map);
    376	if (rc)
    377		return rc;
    378
    379	rc = cxl_map_regblock(pdev, map);
    380	if (rc)
    381		return rc;
    382
    383	rc = cxl_probe_regs(pdev, map);
    384	cxl_unmap_regblock(pdev, map);
    385
    386	return rc;
    387}
    388
    389static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
    390{
    391	struct cxl_register_map map;
    392	struct cxl_memdev *cxlmd;
    393	struct cxl_dev_state *cxlds;
    394	int rc;
    395
    396	/*
    397	 * Double check the anonymous union trickery in struct cxl_regs
    398	 * FIXME switch to struct_group()
    399	 */
    400	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
    401		     offsetof(struct cxl_regs, device_regs.memdev));
    402
    403	rc = pcim_enable_device(pdev);
    404	if (rc)
    405		return rc;
    406
    407	cxlds = cxl_dev_state_create(&pdev->dev);
    408	if (IS_ERR(cxlds))
    409		return PTR_ERR(cxlds);
    410
    411	cxlds->serial = pci_get_dsn(pdev);
    412	cxlds->cxl_dvsec = pci_find_dvsec_capability(
    413		pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
    414	if (!cxlds->cxl_dvsec)
    415		dev_warn(&pdev->dev,
    416			 "Device DVSEC not present, skip CXL.mem init\n");
    417
    418	rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
    419	if (rc)
    420		return rc;
    421
    422	rc = cxl_map_regs(cxlds, &map);
    423	if (rc)
    424		return rc;
    425
    426	/*
    427	 * If the component registers can't be found, the cxl_pci driver may
    428	 * still be useful for management functions so don't return an error.
    429	 */
    430	cxlds->component_reg_phys = CXL_RESOURCE_NONE;
    431	rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
    432	if (rc)
    433		dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
    434
    435	cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
    436
    437	rc = cxl_pci_setup_mailbox(cxlds);
    438	if (rc)
    439		return rc;
    440
    441	rc = cxl_enumerate_cmds(cxlds);
    442	if (rc)
    443		return rc;
    444
    445	rc = cxl_dev_state_identify(cxlds);
    446	if (rc)
    447		return rc;
    448
    449	rc = cxl_mem_create_range_info(cxlds);
    450	if (rc)
    451		return rc;
    452
    453	cxlmd = devm_cxl_add_memdev(cxlds);
    454	if (IS_ERR(cxlmd))
    455		return PTR_ERR(cxlmd);
    456
    457	if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
    458		rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
    459
    460	return rc;
    461}
    462
    463static const struct pci_device_id cxl_mem_pci_tbl[] = {
    464	/* PCI class code for CXL.mem Type-3 Devices */
    465	{ PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
    466	{ /* terminate list */ },
    467};
    468MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
    469
    470static struct pci_driver cxl_pci_driver = {
    471	.name			= KBUILD_MODNAME,
    472	.id_table		= cxl_mem_pci_tbl,
    473	.probe			= cxl_pci_probe,
    474	.driver	= {
    475		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
    476	},
    477};
    478
    479MODULE_LICENSE("GPL v2");
    480module_pci_driver(cxl_pci_driver);
    481MODULE_IMPORT_NS(CXL);