cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dart_iommu.c (11243B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * arch/powerpc/sysdev/dart_iommu.c
      4 *
      5 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
      6 * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>,
      7 *                    IBM Corporation
      8 *
      9 * Based on pSeries_iommu.c:
     10 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
     11 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
     12 *
     13 * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu.
     14 */
     15
     16#include <linux/init.h>
     17#include <linux/types.h>
     18#include <linux/mm.h>
     19#include <linux/spinlock.h>
     20#include <linux/string.h>
     21#include <linux/pci.h>
     22#include <linux/dma-mapping.h>
     23#include <linux/vmalloc.h>
     24#include <linux/suspend.h>
     25#include <linux/memblock.h>
     26#include <linux/gfp.h>
     27#include <linux/kmemleak.h>
     28#include <linux/of_address.h>
     29#include <asm/io.h>
     30#include <asm/iommu.h>
     31#include <asm/pci-bridge.h>
     32#include <asm/machdep.h>
     33#include <asm/cacheflush.h>
     34#include <asm/ppc-pci.h>
     35
     36#include "dart.h"
     37
     38/* DART table address and size */
     39static u32 *dart_tablebase;
     40static unsigned long dart_tablesize;
     41
     42/* Mapped base address for the dart */
     43static unsigned int __iomem *dart;
     44
     45/* Dummy val that entries are set to when unused */
     46static unsigned int dart_emptyval;
     47
     48static struct iommu_table iommu_table_dart;
     49static int iommu_table_dart_inited;
     50static int dart_dirty;
     51static int dart_is_u4;
     52
     53#define DART_U4_BYPASS_BASE	0x8000000000ull
     54
     55#define DBG(...)
     56
     57static DEFINE_SPINLOCK(invalidate_lock);
     58
     59static inline void dart_tlb_invalidate_all(void)
     60{
     61	unsigned long l = 0;
     62	unsigned int reg, inv_bit;
     63	unsigned long limit;
     64	unsigned long flags;
     65
     66	spin_lock_irqsave(&invalidate_lock, flags);
     67
     68	DBG("dart: flush\n");
     69
     70	/* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
     71	 * control register and wait for it to clear.
     72	 *
     73	 * Gotcha: Sometimes, the DART won't detect that the bit gets
     74	 * set. If so, clear it and set it again.
     75	 */
     76
     77	limit = 0;
     78
     79	inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB;
     80retry:
     81	l = 0;
     82	reg = DART_IN(DART_CNTL);
     83	reg |= inv_bit;
     84	DART_OUT(DART_CNTL, reg);
     85
     86	while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit))
     87		l++;
     88	if (l == (1L << limit)) {
     89		if (limit < 4) {
     90			limit++;
     91			reg = DART_IN(DART_CNTL);
     92			reg &= ~inv_bit;
     93			DART_OUT(DART_CNTL, reg);
     94			goto retry;
     95		} else
     96			panic("DART: TLB did not flush after waiting a long "
     97			      "time. Buggy U3 ?");
     98	}
     99
    100	spin_unlock_irqrestore(&invalidate_lock, flags);
    101}
    102
    103static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
    104{
    105	unsigned int reg;
    106	unsigned int l, limit;
    107	unsigned long flags;
    108
    109	spin_lock_irqsave(&invalidate_lock, flags);
    110
    111	reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE |
    112		(bus_rpn & DART_CNTL_U4_IONE_MASK);
    113	DART_OUT(DART_CNTL, reg);
    114
    115	limit = 0;
    116wait_more:
    117	l = 0;
    118	while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) {
    119		rmb();
    120		l++;
    121	}
    122
    123	if (l == (1L << limit)) {
    124		if (limit < 4) {
    125			limit++;
    126			goto wait_more;
    127		} else
    128			panic("DART: TLB did not flush after waiting a long "
    129			      "time. Buggy U4 ?");
    130	}
    131
    132	spin_unlock_irqrestore(&invalidate_lock, flags);
    133}
    134
    135static void dart_cache_sync(unsigned int *base, unsigned int count)
    136{
    137	/*
    138	 * We add 1 to the number of entries to flush, following a
    139	 * comment in Darwin indicating that the memory controller
    140	 * can prefetch unmapped memory under some circumstances.
    141	 */
    142	unsigned long start = (unsigned long)base;
    143	unsigned long end = start + (count + 1) * sizeof(unsigned int);
    144	unsigned int tmp;
    145
    146	/* Perform a standard cache flush */
    147	flush_dcache_range(start, end);
    148
    149	/*
    150	 * Perform the sequence described in the CPC925 manual to
    151	 * ensure all the data gets to a point the cache incoherent
    152	 * DART hardware will see.
    153	 */
    154	asm volatile(" sync;"
    155		     " isync;"
    156		     " dcbf 0,%1;"
    157		     " sync;"
    158		     " isync;"
    159		     " lwz %0,0(%1);"
    160		     " isync" : "=r" (tmp) : "r" (end) : "memory");
    161}
    162
    163static void dart_flush(struct iommu_table *tbl)
    164{
    165	mb();
    166	if (dart_dirty) {
    167		dart_tlb_invalidate_all();
    168		dart_dirty = 0;
    169	}
    170}
    171
    172static int dart_build(struct iommu_table *tbl, long index,
    173		       long npages, unsigned long uaddr,
    174		       enum dma_data_direction direction,
    175		       unsigned long attrs)
    176{
    177	unsigned int *dp, *orig_dp;
    178	unsigned int rpn;
    179	long l;
    180
    181	DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
    182
    183	orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
    184
    185	/* On U3, all memory is contiguous, so we can move this
    186	 * out of the loop.
    187	 */
    188	l = npages;
    189	while (l--) {
    190		rpn = __pa(uaddr) >> DART_PAGE_SHIFT;
    191
    192		*(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
    193
    194		uaddr += DART_PAGE_SIZE;
    195	}
    196	dart_cache_sync(orig_dp, npages);
    197
    198	if (dart_is_u4) {
    199		rpn = index;
    200		while (npages--)
    201			dart_tlb_invalidate_one(rpn++);
    202	} else {
    203		dart_dirty = 1;
    204	}
    205	return 0;
    206}
    207
    208
    209static void dart_free(struct iommu_table *tbl, long index, long npages)
    210{
    211	unsigned int *dp, *orig_dp;
    212	long orig_npages = npages;
    213
    214	/* We don't worry about flushing the TLB cache. The only drawback of
    215	 * not doing it is that we won't catch buggy device drivers doing
    216	 * bad DMAs, but then no 32-bit architecture ever does either.
    217	 */
    218
    219	DBG("dart: free at: %lx, %lx\n", index, npages);
    220
    221	orig_dp = dp  = ((unsigned int *)tbl->it_base) + index;
    222
    223	while (npages--)
    224		*(dp++) = dart_emptyval;
    225
    226	dart_cache_sync(orig_dp, orig_npages);
    227}
    228
    229static void __init allocate_dart(void)
    230{
    231	unsigned long tmp;
    232
    233	/* 512 pages (2MB) is max DART tablesize. */
    234	dart_tablesize = 1UL << 21;
    235
    236	/*
    237	 * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
    238	 * will blow up an entire large page anyway in the kernel mapping.
    239	 */
    240	dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M,
    241					MEMBLOCK_LOW_LIMIT, SZ_2G,
    242					NUMA_NO_NODE);
    243	if (!dart_tablebase)
    244		panic("Failed to allocate 16MB below 2GB for DART table\n");
    245
    246	/* There is no point scanning the DART space for leaks*/
    247	kmemleak_no_scan((void *)dart_tablebase);
    248
    249	/* Allocate a spare page to map all invalid DART pages. We need to do
    250	 * that to work around what looks like a problem with the HT bridge
    251	 * prefetching into invalid pages and corrupting data
    252	 */
    253	tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
    254	if (!tmp)
    255		panic("DART: table allocation failed\n");
    256
    257	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
    258					 DARTMAP_RPNMASK);
    259
    260	printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
    261}
    262
    263static int __init dart_init(struct device_node *dart_node)
    264{
    265	unsigned int i;
    266	unsigned long base, size;
    267	struct resource r;
    268
    269	/* IOMMU disabled by the user ? bail out */
    270	if (iommu_is_off)
    271		return -ENODEV;
    272
    273	/*
    274	 * Only use the DART if the machine has more than 1GB of RAM
    275	 * or if requested with iommu=on on cmdline.
    276	 *
    277	 * 1GB of RAM is picked as limit because some default devices
    278	 * (i.e. Airport Extreme) have 30 bit address range limits.
    279	 */
    280
    281	if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
    282		return -ENODEV;
    283
    284	/* Get DART registers */
    285	if (of_address_to_resource(dart_node, 0, &r))
    286		panic("DART: can't get register base ! ");
    287
    288	/* Map in DART registers */
    289	dart = ioremap(r.start, resource_size(&r));
    290	if (dart == NULL)
    291		panic("DART: Cannot map registers!");
    292
    293	/* Allocate the DART and dummy page */
    294	allocate_dart();
    295
    296	/* Fill initial table */
    297	for (i = 0; i < dart_tablesize/4; i++)
    298		dart_tablebase[i] = dart_emptyval;
    299
    300	/* Push to memory */
    301	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
    302
    303	/* Initialize DART with table base and enable it. */
    304	base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
    305	size = dart_tablesize >> DART_PAGE_SHIFT;
    306	if (dart_is_u4) {
    307		size &= DART_SIZE_U4_SIZE_MASK;
    308		DART_OUT(DART_BASE_U4, base);
    309		DART_OUT(DART_SIZE_U4, size);
    310		DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE);
    311	} else {
    312		size &= DART_CNTL_U3_SIZE_MASK;
    313		DART_OUT(DART_CNTL,
    314			 DART_CNTL_U3_ENABLE |
    315			 (base << DART_CNTL_U3_BASE_SHIFT) |
    316			 (size << DART_CNTL_U3_SIZE_SHIFT));
    317	}
    318
    319	/* Invalidate DART to get rid of possible stale TLBs */
    320	dart_tlb_invalidate_all();
    321
    322	printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n",
    323	       dart_is_u4 ? "U4" : "U3");
    324
    325	return 0;
    326}
    327
    328static struct iommu_table_ops iommu_dart_ops = {
    329	.set = dart_build,
    330	.clear = dart_free,
    331	.flush = dart_flush,
    332};
    333
    334static void iommu_table_dart_setup(void)
    335{
    336	iommu_table_dart.it_busno = 0;
    337	iommu_table_dart.it_offset = 0;
    338	/* it_size is in number of entries */
    339	iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
    340	iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
    341
    342	/* Initialize the common IOMMU code */
    343	iommu_table_dart.it_base = (unsigned long)dart_tablebase;
    344	iommu_table_dart.it_index = 0;
    345	iommu_table_dart.it_blocksize = 1;
    346	iommu_table_dart.it_ops = &iommu_dart_ops;
    347	if (!iommu_init_table(&iommu_table_dart, -1, 0, 0))
    348		panic("Failed to initialize iommu table");
    349
    350	/* Reserve the last page of the DART to avoid possible prefetch
    351	 * past the DART mapped area
    352	 */
    353	set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
    354}
    355
    356static void pci_dma_bus_setup_dart(struct pci_bus *bus)
    357{
    358	if (!iommu_table_dart_inited) {
    359		iommu_table_dart_inited = 1;
    360		iommu_table_dart_setup();
    361	}
    362}
    363
    364static bool dart_device_on_pcie(struct device *dev)
    365{
    366	struct device_node *np = of_node_get(dev->of_node);
    367
    368	while(np) {
    369		if (of_device_is_compatible(np, "U4-pcie") ||
    370		    of_device_is_compatible(np, "u4-pcie")) {
    371			of_node_put(np);
    372			return true;
    373		}
    374		np = of_get_next_parent(np);
    375	}
    376	return false;
    377}
    378
    379static void pci_dma_dev_setup_dart(struct pci_dev *dev)
    380{
    381	if (dart_is_u4 && dart_device_on_pcie(&dev->dev))
    382		dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE;
    383	set_iommu_table_base(&dev->dev, &iommu_table_dart);
    384}
    385
    386static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask)
    387{
    388	return dart_is_u4 &&
    389		dart_device_on_pcie(&dev->dev) &&
    390		mask >= DMA_BIT_MASK(40);
    391}
    392
    393void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
    394{
    395	struct device_node *dn;
    396
    397	/* Find the DART in the device-tree */
    398	dn = of_find_compatible_node(NULL, "dart", "u3-dart");
    399	if (dn == NULL) {
    400		dn = of_find_compatible_node(NULL, "dart", "u4-dart");
    401		if (dn == NULL)
    402			return;	/* use default direct_dma_ops */
    403		dart_is_u4 = 1;
    404	}
    405
    406	/* Initialize the DART HW */
    407	if (dart_init(dn) != 0) {
    408		of_node_put(dn);
    409		return;
    410	}
    411	/*
    412	 * U4 supports a DART bypass, we use it for 64-bit capable devices to
    413	 * improve performance.  However, that only works for devices connected
    414	 * to the U4 own PCIe interface, not bridged through hypertransport.
    415	 * We need the device to support at least 40 bits of addresses.
    416	 */
    417	controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
    418	controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
    419	controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart;
    420
    421	/* Setup pci_dma ops */
    422	set_pci_dma_ops(&dma_iommu_ops);
    423	of_node_put(dn);
    424}
    425
    426#ifdef CONFIG_PM
    427static void iommu_dart_restore(void)
    428{
    429	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
    430	dart_tlb_invalidate_all();
    431}
    432
    433static int __init iommu_init_late_dart(void)
    434{
    435	if (!dart_tablebase)
    436		return 0;
    437
    438	ppc_md.iommu_restore = iommu_dart_restore;
    439
    440	return 0;
    441}
    442
    443late_initcall(iommu_init_late_dart);
    444#endif /* CONFIG_PM */