cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

p2pdma.c (27082B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * PCI Peer 2 Peer DMA support.
      4 *
      5 * Copyright (c) 2016-2018, Logan Gunthorpe
      6 * Copyright (c) 2016-2017, Microsemi Corporation
      7 * Copyright (c) 2017, Christoph Hellwig
      8 * Copyright (c) 2018, Eideticom Inc.
      9 */
     10
     11#define pr_fmt(fmt) "pci-p2pdma: " fmt
     12#include <linux/ctype.h>
     13#include <linux/pci-p2pdma.h>
     14#include <linux/module.h>
     15#include <linux/slab.h>
     16#include <linux/genalloc.h>
     17#include <linux/memremap.h>
     18#include <linux/percpu-refcount.h>
     19#include <linux/random.h>
     20#include <linux/seq_buf.h>
     21#include <linux/xarray.h>
     22
     23enum pci_p2pdma_map_type {
     24	PCI_P2PDMA_MAP_UNKNOWN = 0,
     25	PCI_P2PDMA_MAP_NOT_SUPPORTED,
     26	PCI_P2PDMA_MAP_BUS_ADDR,
     27	PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
     28};
     29
     30struct pci_p2pdma {
     31	struct gen_pool *pool;
     32	bool p2pmem_published;
     33	struct xarray map_types;
     34};
     35
     36struct pci_p2pdma_pagemap {
     37	struct dev_pagemap pgmap;
     38	struct pci_dev *provider;
     39	u64 bus_offset;
     40};
     41
     42static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
     43{
     44	return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap);
     45}
     46
     47static ssize_t size_show(struct device *dev, struct device_attribute *attr,
     48			 char *buf)
     49{
     50	struct pci_dev *pdev = to_pci_dev(dev);
     51	struct pci_p2pdma *p2pdma;
     52	size_t size = 0;
     53
     54	rcu_read_lock();
     55	p2pdma = rcu_dereference(pdev->p2pdma);
     56	if (p2pdma && p2pdma->pool)
     57		size = gen_pool_size(p2pdma->pool);
     58	rcu_read_unlock();
     59
     60	return sysfs_emit(buf, "%zd\n", size);
     61}
     62static DEVICE_ATTR_RO(size);
     63
     64static ssize_t available_show(struct device *dev, struct device_attribute *attr,
     65			      char *buf)
     66{
     67	struct pci_dev *pdev = to_pci_dev(dev);
     68	struct pci_p2pdma *p2pdma;
     69	size_t avail = 0;
     70
     71	rcu_read_lock();
     72	p2pdma = rcu_dereference(pdev->p2pdma);
     73	if (p2pdma && p2pdma->pool)
     74		avail = gen_pool_avail(p2pdma->pool);
     75	rcu_read_unlock();
     76
     77	return sysfs_emit(buf, "%zd\n", avail);
     78}
     79static DEVICE_ATTR_RO(available);
     80
     81static ssize_t published_show(struct device *dev, struct device_attribute *attr,
     82			      char *buf)
     83{
     84	struct pci_dev *pdev = to_pci_dev(dev);
     85	struct pci_p2pdma *p2pdma;
     86	bool published = false;
     87
     88	rcu_read_lock();
     89	p2pdma = rcu_dereference(pdev->p2pdma);
     90	if (p2pdma)
     91		published = p2pdma->p2pmem_published;
     92	rcu_read_unlock();
     93
     94	return sysfs_emit(buf, "%d\n", published);
     95}
     96static DEVICE_ATTR_RO(published);
     97
     98static struct attribute *p2pmem_attrs[] = {
     99	&dev_attr_size.attr,
    100	&dev_attr_available.attr,
    101	&dev_attr_published.attr,
    102	NULL,
    103};
    104
    105static const struct attribute_group p2pmem_group = {
    106	.attrs = p2pmem_attrs,
    107	.name = "p2pmem",
    108};
    109
    110static void pci_p2pdma_release(void *data)
    111{
    112	struct pci_dev *pdev = data;
    113	struct pci_p2pdma *p2pdma;
    114
    115	p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
    116	if (!p2pdma)
    117		return;
    118
    119	/* Flush and disable pci_alloc_p2p_mem() */
    120	pdev->p2pdma = NULL;
    121	synchronize_rcu();
    122
    123	gen_pool_destroy(p2pdma->pool);
    124	sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
    125	xa_destroy(&p2pdma->map_types);
    126}
    127
    128static int pci_p2pdma_setup(struct pci_dev *pdev)
    129{
    130	int error = -ENOMEM;
    131	struct pci_p2pdma *p2p;
    132
    133	p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL);
    134	if (!p2p)
    135		return -ENOMEM;
    136
    137	xa_init(&p2p->map_types);
    138
    139	p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
    140	if (!p2p->pool)
    141		goto out;
    142
    143	error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
    144	if (error)
    145		goto out_pool_destroy;
    146
    147	error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group);
    148	if (error)
    149		goto out_pool_destroy;
    150
    151	rcu_assign_pointer(pdev->p2pdma, p2p);
    152	return 0;
    153
    154out_pool_destroy:
    155	gen_pool_destroy(p2p->pool);
    156out:
    157	devm_kfree(&pdev->dev, p2p);
    158	return error;
    159}
    160
    161/**
    162 * pci_p2pdma_add_resource - add memory for use as p2p memory
    163 * @pdev: the device to add the memory to
    164 * @bar: PCI BAR to add
    165 * @size: size of the memory to add, may be zero to use the whole BAR
    166 * @offset: offset into the PCI BAR
    167 *
    168 * The memory will be given ZONE_DEVICE struct pages so that it may
    169 * be used with any DMA request.
    170 */
    171int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
    172			    u64 offset)
    173{
    174	struct pci_p2pdma_pagemap *p2p_pgmap;
    175	struct dev_pagemap *pgmap;
    176	struct pci_p2pdma *p2pdma;
    177	void *addr;
    178	int error;
    179
    180	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
    181		return -EINVAL;
    182
    183	if (offset >= pci_resource_len(pdev, bar))
    184		return -EINVAL;
    185
    186	if (!size)
    187		size = pci_resource_len(pdev, bar) - offset;
    188
    189	if (size + offset > pci_resource_len(pdev, bar))
    190		return -EINVAL;
    191
    192	if (!pdev->p2pdma) {
    193		error = pci_p2pdma_setup(pdev);
    194		if (error)
    195			return error;
    196	}
    197
    198	p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
    199	if (!p2p_pgmap)
    200		return -ENOMEM;
    201
    202	pgmap = &p2p_pgmap->pgmap;
    203	pgmap->range.start = pci_resource_start(pdev, bar) + offset;
    204	pgmap->range.end = pgmap->range.start + size - 1;
    205	pgmap->nr_range = 1;
    206	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
    207
    208	p2p_pgmap->provider = pdev;
    209	p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
    210		pci_resource_start(pdev, bar);
    211
    212	addr = devm_memremap_pages(&pdev->dev, pgmap);
    213	if (IS_ERR(addr)) {
    214		error = PTR_ERR(addr);
    215		goto pgmap_free;
    216	}
    217
    218	p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
    219	error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr,
    220			pci_bus_address(pdev, bar) + offset,
    221			range_len(&pgmap->range), dev_to_node(&pdev->dev),
    222			&pgmap->ref);
    223	if (error)
    224		goto pages_free;
    225
    226	pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n",
    227		 pgmap->range.start, pgmap->range.end);
    228
    229	return 0;
    230
    231pages_free:
    232	devm_memunmap_pages(&pdev->dev, pgmap);
    233pgmap_free:
    234	devm_kfree(&pdev->dev, pgmap);
    235	return error;
    236}
    237EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
    238
    239/*
    240 * Note this function returns the parent PCI device with a
    241 * reference taken. It is the caller's responsibility to drop
    242 * the reference.
    243 */
    244static struct pci_dev *find_parent_pci_dev(struct device *dev)
    245{
    246	struct device *parent;
    247
    248	dev = get_device(dev);
    249
    250	while (dev) {
    251		if (dev_is_pci(dev))
    252			return to_pci_dev(dev);
    253
    254		parent = get_device(dev->parent);
    255		put_device(dev);
    256		dev = parent;
    257	}
    258
    259	return NULL;
    260}
    261
    262/*
    263 * Check if a PCI bridge has its ACS redirection bits set to redirect P2P
    264 * TLPs upstream via ACS. Returns 1 if the packets will be redirected
    265 * upstream, 0 otherwise.
    266 */
    267static int pci_bridge_has_acs_redir(struct pci_dev *pdev)
    268{
    269	int pos;
    270	u16 ctrl;
    271
    272	pos = pdev->acs_cap;
    273	if (!pos)
    274		return 0;
    275
    276	pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
    277
    278	if (ctrl & (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC))
    279		return 1;
    280
    281	return 0;
    282}
    283
    284static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
    285{
    286	if (!buf)
    287		return;
    288
    289	seq_buf_printf(buf, "%s;", pci_name(pdev));
    290}
    291
    292static bool cpu_supports_p2pdma(void)
    293{
    294#ifdef CONFIG_X86
    295	struct cpuinfo_x86 *c = &cpu_data(0);
    296
    297	/* Any AMD CPU whose family ID is Zen or newer supports p2pdma */
    298	if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17)
    299		return true;
    300#endif
    301
    302	return false;
    303}
    304
    305static const struct pci_p2pdma_whitelist_entry {
    306	unsigned short vendor;
    307	unsigned short device;
    308	enum {
    309		REQ_SAME_HOST_BRIDGE	= 1 << 0,
    310	} flags;
    311} pci_p2pdma_whitelist[] = {
    312	/* Intel Xeon E5/Core i7 */
    313	{PCI_VENDOR_ID_INTEL,	0x3c00, REQ_SAME_HOST_BRIDGE},
    314	{PCI_VENDOR_ID_INTEL,	0x3c01, REQ_SAME_HOST_BRIDGE},
    315	/* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */
    316	{PCI_VENDOR_ID_INTEL,	0x2f00, REQ_SAME_HOST_BRIDGE},
    317	{PCI_VENDOR_ID_INTEL,	0x2f01, REQ_SAME_HOST_BRIDGE},
    318	/* Intel SkyLake-E */
    319	{PCI_VENDOR_ID_INTEL,	0x2030, 0},
    320	{PCI_VENDOR_ID_INTEL,	0x2031, 0},
    321	{PCI_VENDOR_ID_INTEL,	0x2032, 0},
    322	{PCI_VENDOR_ID_INTEL,	0x2033, 0},
    323	{PCI_VENDOR_ID_INTEL,	0x2020, 0},
    324	{PCI_VENDOR_ID_INTEL,	0x09a2, 0},
    325	{}
    326};
    327
    328/*
    329 * If the first device on host's root bus is either devfn 00.0 or a PCIe
    330 * Root Port, return it.  Otherwise return NULL.
    331 *
    332 * We often use a devfn 00.0 "host bridge" in the pci_p2pdma_whitelist[]
    333 * (though there is no PCI/PCIe requirement for such a device).  On some
    334 * platforms, e.g., Intel Skylake, there is no such host bridge device, and
    335 * pci_p2pdma_whitelist[] may contain a Root Port at any devfn.
    336 *
    337 * This function is similar to pci_get_slot(host->bus, 0), but it does
    338 * not take the pci_bus_sem lock since __host_bridge_whitelist() must not
    339 * sleep.
    340 *
    341 * For this to be safe, the caller should hold a reference to a device on the
    342 * bridge, which should ensure the host_bridge device will not be freed
    343 * or removed from the head of the devices list.
    344 */
    345static struct pci_dev *pci_host_bridge_dev(struct pci_host_bridge *host)
    346{
    347	struct pci_dev *root;
    348
    349	root = list_first_entry_or_null(&host->bus->devices,
    350					struct pci_dev, bus_list);
    351
    352	if (!root)
    353		return NULL;
    354
    355	if (root->devfn == PCI_DEVFN(0, 0))
    356		return root;
    357
    358	if (pci_pcie_type(root) == PCI_EXP_TYPE_ROOT_PORT)
    359		return root;
    360
    361	return NULL;
    362}
    363
    364static bool __host_bridge_whitelist(struct pci_host_bridge *host,
    365				    bool same_host_bridge, bool warn)
    366{
    367	struct pci_dev *root = pci_host_bridge_dev(host);
    368	const struct pci_p2pdma_whitelist_entry *entry;
    369	unsigned short vendor, device;
    370
    371	if (!root)
    372		return false;
    373
    374	vendor = root->vendor;
    375	device = root->device;
    376
    377	for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) {
    378		if (vendor != entry->vendor || device != entry->device)
    379			continue;
    380		if (entry->flags & REQ_SAME_HOST_BRIDGE && !same_host_bridge)
    381			return false;
    382
    383		return true;
    384	}
    385
    386	if (warn)
    387		pci_warn(root, "Host bridge not in P2PDMA whitelist: %04x:%04x\n",
    388			 vendor, device);
    389
    390	return false;
    391}
    392
    393/*
    394 * If we can't find a common upstream bridge take a look at the root
    395 * complex and compare it to a whitelist of known good hardware.
    396 */
    397static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b,
    398				  bool warn)
    399{
    400	struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus);
    401	struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus);
    402
    403	if (host_a == host_b)
    404		return __host_bridge_whitelist(host_a, true, warn);
    405
    406	if (__host_bridge_whitelist(host_a, false, warn) &&
    407	    __host_bridge_whitelist(host_b, false, warn))
    408		return true;
    409
    410	return false;
    411}
    412
    413static unsigned long map_types_idx(struct pci_dev *client)
    414{
    415	return (pci_domain_nr(client->bus) << 16) |
    416		(client->bus->number << 8) | client->devfn;
    417}
    418
    419/*
    420 * Calculate the P2PDMA mapping type and distance between two PCI devices.
    421 *
    422 * If the two devices are the same PCI function, return
    423 * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 0.
    424 *
    425 * If they are two functions of the same device, return
    426 * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 2 (one hop up to the bridge,
    427 * then one hop back down to another function of the same device).
    428 *
    429 * In the case where two devices are connected to the same PCIe switch,
    430 * return a distance of 4. This corresponds to the following PCI tree:
    431 *
    432 *     -+  Root Port
    433 *      \+ Switch Upstream Port
    434 *       +-+ Switch Downstream Port 0
    435 *       + \- Device A
    436 *       \-+ Switch Downstream Port 1
    437 *         \- Device B
    438 *
    439 * The distance is 4 because we traverse from Device A to Downstream Port 0
    440 * to the common Switch Upstream Port, back down to Downstream Port 1 and
    441 * then to Device B. The mapping type returned depends on the ACS
    442 * redirection setting of the ports along the path.
    443 *
    444 * If ACS redirect is set on any port in the path, traffic between the
    445 * devices will go through the host bridge, so return
    446 * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; otherwise return
    447 * PCI_P2PDMA_MAP_BUS_ADDR.
    448 *
    449 * Any two devices that have a data path that goes through the host bridge
    450 * will consult a whitelist. If the host bridge is in the whitelist, return
    451 * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE with the distance set to the number of
    452 * ports per above. If the device is not in the whitelist, return
    453 * PCI_P2PDMA_MAP_NOT_SUPPORTED.
    454 */
    455static enum pci_p2pdma_map_type
    456calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client,
    457		int *dist, bool verbose)
    458{
    459	enum pci_p2pdma_map_type map_type = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
    460	struct pci_dev *a = provider, *b = client, *bb;
    461	bool acs_redirects = false;
    462	struct pci_p2pdma *p2pdma;
    463	struct seq_buf acs_list;
    464	int acs_cnt = 0;
    465	int dist_a = 0;
    466	int dist_b = 0;
    467	char buf[128];
    468
    469	seq_buf_init(&acs_list, buf, sizeof(buf));
    470
    471	/*
    472	 * Note, we don't need to take references to devices returned by
    473	 * pci_upstream_bridge() seeing we hold a reference to a child
    474	 * device which will already hold a reference to the upstream bridge.
    475	 */
    476	while (a) {
    477		dist_b = 0;
    478
    479		if (pci_bridge_has_acs_redir(a)) {
    480			seq_buf_print_bus_devfn(&acs_list, a);
    481			acs_cnt++;
    482		}
    483
    484		bb = b;
    485
    486		while (bb) {
    487			if (a == bb)
    488				goto check_b_path_acs;
    489
    490			bb = pci_upstream_bridge(bb);
    491			dist_b++;
    492		}
    493
    494		a = pci_upstream_bridge(a);
    495		dist_a++;
    496	}
    497
    498	*dist = dist_a + dist_b;
    499	goto map_through_host_bridge;
    500
    501check_b_path_acs:
    502	bb = b;
    503
    504	while (bb) {
    505		if (a == bb)
    506			break;
    507
    508		if (pci_bridge_has_acs_redir(bb)) {
    509			seq_buf_print_bus_devfn(&acs_list, bb);
    510			acs_cnt++;
    511		}
    512
    513		bb = pci_upstream_bridge(bb);
    514	}
    515
    516	*dist = dist_a + dist_b;
    517
    518	if (!acs_cnt) {
    519		map_type = PCI_P2PDMA_MAP_BUS_ADDR;
    520		goto done;
    521	}
    522
    523	if (verbose) {
    524		acs_list.buffer[acs_list.len-1] = 0; /* drop final semicolon */
    525		pci_warn(client, "ACS redirect is set between the client and provider (%s)\n",
    526			 pci_name(provider));
    527		pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
    528			 acs_list.buffer);
    529	}
    530	acs_redirects = true;
    531
    532map_through_host_bridge:
    533	if (!cpu_supports_p2pdma() &&
    534	    !host_bridge_whitelist(provider, client, acs_redirects)) {
    535		if (verbose)
    536			pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n",
    537				 pci_name(provider));
    538		map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
    539	}
    540done:
    541	rcu_read_lock();
    542	p2pdma = rcu_dereference(provider->p2pdma);
    543	if (p2pdma)
    544		xa_store(&p2pdma->map_types, map_types_idx(client),
    545			 xa_mk_value(map_type), GFP_KERNEL);
    546	rcu_read_unlock();
    547	return map_type;
    548}
    549
    550/**
    551 * pci_p2pdma_distance_many - Determine the cumulative distance between
    552 *	a p2pdma provider and the clients in use.
    553 * @provider: p2pdma provider to check against the client list
    554 * @clients: array of devices to check (NULL-terminated)
    555 * @num_clients: number of clients in the array
    556 * @verbose: if true, print warnings for devices when we return -1
    557 *
    558 * Returns -1 if any of the clients are not compatible, otherwise returns a
    559 * positive number where a lower number is the preferable choice. (If there's
    560 * one client that's the same as the provider it will return 0, which is best
    561 * choice).
    562 *
    563 * "compatible" means the provider and the clients are either all behind
    564 * the same PCI root port or the host bridges connected to each of the devices
    565 * are listed in the 'pci_p2pdma_whitelist'.
    566 */
    567int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
    568			     int num_clients, bool verbose)
    569{
    570	enum pci_p2pdma_map_type map;
    571	bool not_supported = false;
    572	struct pci_dev *pci_client;
    573	int total_dist = 0;
    574	int i, distance;
    575
    576	if (num_clients == 0)
    577		return -1;
    578
    579	for (i = 0; i < num_clients; i++) {
    580		pci_client = find_parent_pci_dev(clients[i]);
    581		if (!pci_client) {
    582			if (verbose)
    583				dev_warn(clients[i],
    584					 "cannot be used for peer-to-peer DMA as it is not a PCI device\n");
    585			return -1;
    586		}
    587
    588		map = calc_map_type_and_dist(provider, pci_client, &distance,
    589					     verbose);
    590
    591		pci_dev_put(pci_client);
    592
    593		if (map == PCI_P2PDMA_MAP_NOT_SUPPORTED)
    594			not_supported = true;
    595
    596		if (not_supported && !verbose)
    597			break;
    598
    599		total_dist += distance;
    600	}
    601
    602	if (not_supported)
    603		return -1;
    604
    605	return total_dist;
    606}
    607EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many);
    608
    609/**
    610 * pci_has_p2pmem - check if a given PCI device has published any p2pmem
    611 * @pdev: PCI device to check
    612 */
    613bool pci_has_p2pmem(struct pci_dev *pdev)
    614{
    615	struct pci_p2pdma *p2pdma;
    616	bool res;
    617
    618	rcu_read_lock();
    619	p2pdma = rcu_dereference(pdev->p2pdma);
    620	res = p2pdma && p2pdma->p2pmem_published;
    621	rcu_read_unlock();
    622
    623	return res;
    624}
    625EXPORT_SYMBOL_GPL(pci_has_p2pmem);
    626
    627/**
    628 * pci_p2pmem_find_many - find a peer-to-peer DMA memory device compatible with
    629 *	the specified list of clients and shortest distance (as determined
    630 *	by pci_p2pmem_dma())
    631 * @clients: array of devices to check (NULL-terminated)
    632 * @num_clients: number of client devices in the list
    633 *
    634 * If multiple devices are behind the same switch, the one "closest" to the
    635 * client devices in use will be chosen first. (So if one of the providers is
    636 * the same as one of the clients, that provider will be used ahead of any
    637 * other providers that are unrelated). If multiple providers are an equal
    638 * distance away, one will be chosen at random.
    639 *
    640 * Returns a pointer to the PCI device with a reference taken (use pci_dev_put
    641 * to return the reference) or NULL if no compatible device is found. The
    642 * found provider will also be assigned to the client list.
    643 */
    644struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients)
    645{
    646	struct pci_dev *pdev = NULL;
    647	int distance;
    648	int closest_distance = INT_MAX;
    649	struct pci_dev **closest_pdevs;
    650	int dev_cnt = 0;
    651	const int max_devs = PAGE_SIZE / sizeof(*closest_pdevs);
    652	int i;
    653
    654	closest_pdevs = kmalloc(PAGE_SIZE, GFP_KERNEL);
    655	if (!closest_pdevs)
    656		return NULL;
    657
    658	while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
    659		if (!pci_has_p2pmem(pdev))
    660			continue;
    661
    662		distance = pci_p2pdma_distance_many(pdev, clients,
    663						    num_clients, false);
    664		if (distance < 0 || distance > closest_distance)
    665			continue;
    666
    667		if (distance == closest_distance && dev_cnt >= max_devs)
    668			continue;
    669
    670		if (distance < closest_distance) {
    671			for (i = 0; i < dev_cnt; i++)
    672				pci_dev_put(closest_pdevs[i]);
    673
    674			dev_cnt = 0;
    675			closest_distance = distance;
    676		}
    677
    678		closest_pdevs[dev_cnt++] = pci_dev_get(pdev);
    679	}
    680
    681	if (dev_cnt)
    682		pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]);
    683
    684	for (i = 0; i < dev_cnt; i++)
    685		pci_dev_put(closest_pdevs[i]);
    686
    687	kfree(closest_pdevs);
    688	return pdev;
    689}
    690EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
    691
    692/**
    693 * pci_alloc_p2pmem - allocate peer-to-peer DMA memory
    694 * @pdev: the device to allocate memory from
    695 * @size: number of bytes to allocate
    696 *
    697 * Returns the allocated memory or NULL on error.
    698 */
    699void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
    700{
    701	void *ret = NULL;
    702	struct percpu_ref *ref;
    703	struct pci_p2pdma *p2pdma;
    704
    705	/*
    706	 * Pairs with synchronize_rcu() in pci_p2pdma_release() to
    707	 * ensure pdev->p2pdma is non-NULL for the duration of the
    708	 * read-lock.
    709	 */
    710	rcu_read_lock();
    711	p2pdma = rcu_dereference(pdev->p2pdma);
    712	if (unlikely(!p2pdma))
    713		goto out;
    714
    715	ret = (void *)gen_pool_alloc_owner(p2pdma->pool, size, (void **) &ref);
    716	if (!ret)
    717		goto out;
    718
    719	if (unlikely(!percpu_ref_tryget_live_rcu(ref))) {
    720		gen_pool_free(p2pdma->pool, (unsigned long) ret, size);
    721		ret = NULL;
    722		goto out;
    723	}
    724out:
    725	rcu_read_unlock();
    726	return ret;
    727}
    728EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
    729
    730/**
    731 * pci_free_p2pmem - free peer-to-peer DMA memory
    732 * @pdev: the device the memory was allocated from
    733 * @addr: address of the memory that was allocated
    734 * @size: number of bytes that were allocated
    735 */
    736void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
    737{
    738	struct percpu_ref *ref;
    739	struct pci_p2pdma *p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
    740
    741	gen_pool_free_owner(p2pdma->pool, (uintptr_t)addr, size,
    742			(void **) &ref);
    743	percpu_ref_put(ref);
    744}
    745EXPORT_SYMBOL_GPL(pci_free_p2pmem);
    746
    747/**
    748 * pci_p2pmem_virt_to_bus - return the PCI bus address for a given virtual
    749 *	address obtained with pci_alloc_p2pmem()
    750 * @pdev: the device the memory was allocated from
    751 * @addr: address of the memory that was allocated
    752 */
    753pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr)
    754{
    755	struct pci_p2pdma *p2pdma;
    756
    757	if (!addr)
    758		return 0;
    759
    760	p2pdma = rcu_dereference_protected(pdev->p2pdma, 1);
    761	if (!p2pdma)
    762		return 0;
    763
    764	/*
    765	 * Note: when we added the memory to the pool we used the PCI
    766	 * bus address as the physical address. So gen_pool_virt_to_phys()
    767	 * actually returns the bus address despite the misleading name.
    768	 */
    769	return gen_pool_virt_to_phys(p2pdma->pool, (unsigned long)addr);
    770}
    771EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus);
    772
    773/**
    774 * pci_p2pmem_alloc_sgl - allocate peer-to-peer DMA memory in a scatterlist
    775 * @pdev: the device to allocate memory from
    776 * @nents: the number of SG entries in the list
    777 * @length: number of bytes to allocate
    778 *
    779 * Return: %NULL on error or &struct scatterlist pointer and @nents on success
    780 */
    781struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
    782					 unsigned int *nents, u32 length)
    783{
    784	struct scatterlist *sg;
    785	void *addr;
    786
    787	sg = kmalloc(sizeof(*sg), GFP_KERNEL);
    788	if (!sg)
    789		return NULL;
    790
    791	sg_init_table(sg, 1);
    792
    793	addr = pci_alloc_p2pmem(pdev, length);
    794	if (!addr)
    795		goto out_free_sg;
    796
    797	sg_set_buf(sg, addr, length);
    798	*nents = 1;
    799	return sg;
    800
    801out_free_sg:
    802	kfree(sg);
    803	return NULL;
    804}
    805EXPORT_SYMBOL_GPL(pci_p2pmem_alloc_sgl);
    806
    807/**
    808 * pci_p2pmem_free_sgl - free a scatterlist allocated by pci_p2pmem_alloc_sgl()
    809 * @pdev: the device to allocate memory from
    810 * @sgl: the allocated scatterlist
    811 */
    812void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl)
    813{
    814	struct scatterlist *sg;
    815	int count;
    816
    817	for_each_sg(sgl, sg, INT_MAX, count) {
    818		if (!sg)
    819			break;
    820
    821		pci_free_p2pmem(pdev, sg_virt(sg), sg->length);
    822	}
    823	kfree(sgl);
    824}
    825EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl);
    826
    827/**
    828 * pci_p2pmem_publish - publish the peer-to-peer DMA memory for use by
    829 *	other devices with pci_p2pmem_find()
    830 * @pdev: the device with peer-to-peer DMA memory to publish
    831 * @publish: set to true to publish the memory, false to unpublish it
    832 *
    833 * Published memory can be used by other PCI device drivers for
    834 * peer-2-peer DMA operations. Non-published memory is reserved for
    835 * exclusive use of the device driver that registers the peer-to-peer
    836 * memory.
    837 */
    838void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
    839{
    840	struct pci_p2pdma *p2pdma;
    841
    842	rcu_read_lock();
    843	p2pdma = rcu_dereference(pdev->p2pdma);
    844	if (p2pdma)
    845		p2pdma->p2pmem_published = publish;
    846	rcu_read_unlock();
    847}
    848EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
    849
    850static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap,
    851						    struct device *dev)
    852{
    853	enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
    854	struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider;
    855	struct pci_dev *client;
    856	struct pci_p2pdma *p2pdma;
    857
    858	if (!provider->p2pdma)
    859		return PCI_P2PDMA_MAP_NOT_SUPPORTED;
    860
    861	if (!dev_is_pci(dev))
    862		return PCI_P2PDMA_MAP_NOT_SUPPORTED;
    863
    864	client = to_pci_dev(dev);
    865
    866	rcu_read_lock();
    867	p2pdma = rcu_dereference(provider->p2pdma);
    868
    869	if (p2pdma)
    870		type = xa_to_value(xa_load(&p2pdma->map_types,
    871					   map_types_idx(client)));
    872	rcu_read_unlock();
    873	return type;
    874}
    875
    876static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap,
    877		struct device *dev, struct scatterlist *sg, int nents)
    878{
    879	struct scatterlist *s;
    880	int i;
    881
    882	for_each_sg(sg, s, nents, i) {
    883		s->dma_address = sg_phys(s) + p2p_pgmap->bus_offset;
    884		sg_dma_len(s) = s->length;
    885	}
    886
    887	return nents;
    888}
    889
    890/**
    891 * pci_p2pdma_map_sg_attrs - map a PCI peer-to-peer scatterlist for DMA
    892 * @dev: device doing the DMA request
    893 * @sg: scatter list to map
    894 * @nents: elements in the scatterlist
    895 * @dir: DMA direction
    896 * @attrs: DMA attributes passed to dma_map_sg() (if called)
    897 *
    898 * Scatterlists mapped with this function should be unmapped using
    899 * pci_p2pdma_unmap_sg_attrs().
    900 *
    901 * Returns the number of SG entries mapped or 0 on error.
    902 */
    903int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
    904		int nents, enum dma_data_direction dir, unsigned long attrs)
    905{
    906	struct pci_p2pdma_pagemap *p2p_pgmap =
    907		to_p2p_pgmap(sg_page(sg)->pgmap);
    908
    909	switch (pci_p2pdma_map_type(sg_page(sg)->pgmap, dev)) {
    910	case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
    911		return dma_map_sg_attrs(dev, sg, nents, dir, attrs);
    912	case PCI_P2PDMA_MAP_BUS_ADDR:
    913		return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents);
    914	default:
    915		WARN_ON_ONCE(1);
    916		return 0;
    917	}
    918}
    919EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs);
    920
    921/**
    922 * pci_p2pdma_unmap_sg_attrs - unmap a PCI peer-to-peer scatterlist that was
    923 *	mapped with pci_p2pdma_map_sg()
    924 * @dev: device doing the DMA request
    925 * @sg: scatter list to map
    926 * @nents: number of elements returned by pci_p2pdma_map_sg()
    927 * @dir: DMA direction
    928 * @attrs: DMA attributes passed to dma_unmap_sg() (if called)
    929 */
    930void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
    931		int nents, enum dma_data_direction dir, unsigned long attrs)
    932{
    933	enum pci_p2pdma_map_type map_type;
    934
    935	map_type = pci_p2pdma_map_type(sg_page(sg)->pgmap, dev);
    936
    937	if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
    938		dma_unmap_sg_attrs(dev, sg, nents, dir, attrs);
    939}
    940EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
    941
    942/**
    943 * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
    944 *		to enable p2pdma
    945 * @page: contents of the value to be stored
    946 * @p2p_dev: returns the PCI device that was selected to be used
    947 *		(if one was specified in the stored value)
    948 * @use_p2pdma: returns whether to enable p2pdma or not
    949 *
    950 * Parses an attribute value to decide whether to enable p2pdma.
    951 * The value can select a PCI device (using its full BDF device
    952 * name) or a boolean (in any format kstrtobool() accepts). A false
    953 * value disables p2pdma, a true value expects the caller
    954 * to automatically find a compatible device and specifying a PCI device
    955 * expects the caller to use the specific provider.
    956 *
    957 * pci_p2pdma_enable_show() should be used as the show operation for
    958 * the attribute.
    959 *
    960 * Returns 0 on success
    961 */
    962int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
    963			    bool *use_p2pdma)
    964{
    965	struct device *dev;
    966
    967	dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
    968	if (dev) {
    969		*use_p2pdma = true;
    970		*p2p_dev = to_pci_dev(dev);
    971
    972		if (!pci_has_p2pmem(*p2p_dev)) {
    973			pci_err(*p2p_dev,
    974				"PCI device has no peer-to-peer memory: %s\n",
    975				page);
    976			pci_dev_put(*p2p_dev);
    977			return -ENODEV;
    978		}
    979
    980		return 0;
    981	} else if ((page[0] == '0' || page[0] == '1') && !iscntrl(page[1])) {
    982		/*
    983		 * If the user enters a PCI device that  doesn't exist
    984		 * like "0000:01:00.1", we don't want kstrtobool to think
    985		 * it's a '0' when it's clearly not what the user wanted.
    986		 * So we require 0's and 1's to be exactly one character.
    987		 */
    988	} else if (!kstrtobool(page, use_p2pdma)) {
    989		return 0;
    990	}
    991
    992	pr_err("No such PCI device: %.*s\n", (int)strcspn(page, "\n"), page);
    993	return -ENODEV;
    994}
    995EXPORT_SYMBOL_GPL(pci_p2pdma_enable_store);
    996
    997/**
    998 * pci_p2pdma_enable_show - show a configfs/sysfs attribute indicating
    999 *		whether p2pdma is enabled
   1000 * @page: contents of the stored value
   1001 * @p2p_dev: the selected p2p device (NULL if no device is selected)
   1002 * @use_p2pdma: whether p2pdma has been enabled
   1003 *
   1004 * Attributes that use pci_p2pdma_enable_store() should use this function
   1005 * to show the value of the attribute.
   1006 *
   1007 * Returns 0 on success
   1008 */
   1009ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
   1010			       bool use_p2pdma)
   1011{
   1012	if (!use_p2pdma)
   1013		return sprintf(page, "0\n");
   1014
   1015	if (!p2p_dev)
   1016		return sprintf(page, "1\n");
   1017
   1018	return sprintf(page, "%s\n", pci_name(p2p_dev));
   1019}
   1020EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show);