cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

io_pgtable.c (11978B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * CPU-agnostic AMD IO page table allocator.
      4 *
      5 * Copyright (C) 2020 Advanced Micro Devices, Inc.
      6 * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
      7 */
      8
      9#define pr_fmt(fmt)     "AMD-Vi: " fmt
     10#define dev_fmt(fmt)    pr_fmt(fmt)
     11
     12#include <linux/atomic.h>
     13#include <linux/bitops.h>
     14#include <linux/io-pgtable.h>
     15#include <linux/kernel.h>
     16#include <linux/sizes.h>
     17#include <linux/slab.h>
     18#include <linux/types.h>
     19#include <linux/dma-mapping.h>
     20
     21#include <asm/barrier.h>
     22
     23#include "amd_iommu_types.h"
     24#include "amd_iommu.h"
     25
     26static void v1_tlb_flush_all(void *cookie)
     27{
     28}
     29
     30static void v1_tlb_flush_walk(unsigned long iova, size_t size,
     31				  size_t granule, void *cookie)
     32{
     33}
     34
     35static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
     36					 unsigned long iova, size_t granule,
     37					 void *cookie)
     38{
     39}
     40
     41static const struct iommu_flush_ops v1_flush_ops = {
     42	.tlb_flush_all	= v1_tlb_flush_all,
     43	.tlb_flush_walk = v1_tlb_flush_walk,
     44	.tlb_add_page	= v1_tlb_add_page,
     45};
     46
     47/*
     48 * Helper function to get the first pte of a large mapping
     49 */
     50static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
     51			 unsigned long *count)
     52{
     53	unsigned long pte_mask, pg_size, cnt;
     54	u64 *fpte;
     55
     56	pg_size  = PTE_PAGE_SIZE(*pte);
     57	cnt      = PAGE_SIZE_PTE_COUNT(pg_size);
     58	pte_mask = ~((cnt << 3) - 1);
     59	fpte     = (u64 *)(((unsigned long)pte) & pte_mask);
     60
     61	if (page_size)
     62		*page_size = pg_size;
     63
     64	if (count)
     65		*count = cnt;
     66
     67	return fpte;
     68}
     69
     70/****************************************************************************
     71 *
     72 * The functions below are used the create the page table mappings for
     73 * unity mapped regions.
     74 *
     75 ****************************************************************************/
     76
     77static void free_pt_page(u64 *pt, struct list_head *freelist)
     78{
     79	struct page *p = virt_to_page(pt);
     80
     81	list_add_tail(&p->lru, freelist);
     82}
     83
     84static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
     85{
     86	u64 *p;
     87	int i;
     88
     89	for (i = 0; i < 512; ++i) {
     90		/* PTE present? */
     91		if (!IOMMU_PTE_PRESENT(pt[i]))
     92			continue;
     93
     94		/* Large PTE? */
     95		if (PM_PTE_LEVEL(pt[i]) == 0 ||
     96		    PM_PTE_LEVEL(pt[i]) == 7)
     97			continue;
     98
     99		/*
    100		 * Free the next level. No need to look at l1 tables here since
    101		 * they can only contain leaf PTEs; just free them directly.
    102		 */
    103		p = IOMMU_PTE_PAGE(pt[i]);
    104		if (lvl > 2)
    105			free_pt_lvl(p, freelist, lvl - 1);
    106		else
    107			free_pt_page(p, freelist);
    108	}
    109
    110	free_pt_page(pt, freelist);
    111}
    112
    113static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
    114{
    115	switch (mode) {
    116	case PAGE_MODE_NONE:
    117	case PAGE_MODE_7_LEVEL:
    118		break;
    119	case PAGE_MODE_1_LEVEL:
    120		free_pt_page(root, freelist);
    121		break;
    122	case PAGE_MODE_2_LEVEL:
    123	case PAGE_MODE_3_LEVEL:
    124	case PAGE_MODE_4_LEVEL:
    125	case PAGE_MODE_5_LEVEL:
    126	case PAGE_MODE_6_LEVEL:
    127		free_pt_lvl(root, freelist, mode);
    128		break;
    129	default:
    130		BUG();
    131	}
    132}
    133
    134void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
    135				  u64 *root, int mode)
    136{
    137	u64 pt_root;
    138
    139	/* lowest 3 bits encode pgtable mode */
    140	pt_root = mode & 7;
    141	pt_root |= (u64)root;
    142
    143	amd_iommu_domain_set_pt_root(domain, pt_root);
    144}
    145
    146/*
    147 * This function is used to add another level to an IO page table. Adding
    148 * another level increases the size of the address space by 9 bits to a size up
    149 * to 64 bits.
    150 */
    151static bool increase_address_space(struct protection_domain *domain,
    152				   unsigned long address,
    153				   gfp_t gfp)
    154{
    155	unsigned long flags;
    156	bool ret = true;
    157	u64 *pte;
    158
    159	pte = (void *)get_zeroed_page(gfp);
    160	if (!pte)
    161		return false;
    162
    163	spin_lock_irqsave(&domain->lock, flags);
    164
    165	if (address <= PM_LEVEL_SIZE(domain->iop.mode))
    166		goto out;
    167
    168	ret = false;
    169	if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
    170		goto out;
    171
    172	*pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
    173
    174	domain->iop.root  = pte;
    175	domain->iop.mode += 1;
    176	amd_iommu_update_and_flush_device_table(domain);
    177	amd_iommu_domain_flush_complete(domain);
    178
    179	/*
    180	 * Device Table needs to be updated and flushed before the new root can
    181	 * be published.
    182	 */
    183	amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
    184
    185	pte = NULL;
    186	ret = true;
    187
    188out:
    189	spin_unlock_irqrestore(&domain->lock, flags);
    190	free_page((unsigned long)pte);
    191
    192	return ret;
    193}
    194
    195static u64 *alloc_pte(struct protection_domain *domain,
    196		      unsigned long address,
    197		      unsigned long page_size,
    198		      u64 **pte_page,
    199		      gfp_t gfp,
    200		      bool *updated)
    201{
    202	int level, end_lvl;
    203	u64 *pte, *page;
    204
    205	BUG_ON(!is_power_of_2(page_size));
    206
    207	while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
    208		/*
    209		 * Return an error if there is no memory to update the
    210		 * page-table.
    211		 */
    212		if (!increase_address_space(domain, address, gfp))
    213			return NULL;
    214	}
    215
    216
    217	level   = domain->iop.mode - 1;
    218	pte     = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
    219	address = PAGE_SIZE_ALIGN(address, page_size);
    220	end_lvl = PAGE_SIZE_LEVEL(page_size);
    221
    222	while (level > end_lvl) {
    223		u64 __pte, __npte;
    224		int pte_level;
    225
    226		__pte     = *pte;
    227		pte_level = PM_PTE_LEVEL(__pte);
    228
    229		/*
    230		 * If we replace a series of large PTEs, we need
    231		 * to tear down all of them.
    232		 */
    233		if (IOMMU_PTE_PRESENT(__pte) &&
    234		    pte_level == PAGE_MODE_7_LEVEL) {
    235			unsigned long count, i;
    236			u64 *lpte;
    237
    238			lpte = first_pte_l7(pte, NULL, &count);
    239
    240			/*
    241			 * Unmap the replicated PTEs that still match the
    242			 * original large mapping
    243			 */
    244			for (i = 0; i < count; ++i)
    245				cmpxchg64(&lpte[i], __pte, 0ULL);
    246
    247			*updated = true;
    248			continue;
    249		}
    250
    251		if (!IOMMU_PTE_PRESENT(__pte) ||
    252		    pte_level == PAGE_MODE_NONE) {
    253			page = (u64 *)get_zeroed_page(gfp);
    254
    255			if (!page)
    256				return NULL;
    257
    258			__npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
    259
    260			/* pte could have been changed somewhere. */
    261			if (cmpxchg64(pte, __pte, __npte) != __pte)
    262				free_page((unsigned long)page);
    263			else if (IOMMU_PTE_PRESENT(__pte))
    264				*updated = true;
    265
    266			continue;
    267		}
    268
    269		/* No level skipping support yet */
    270		if (pte_level != level)
    271			return NULL;
    272
    273		level -= 1;
    274
    275		pte = IOMMU_PTE_PAGE(__pte);
    276
    277		if (pte_page && level == end_lvl)
    278			*pte_page = pte;
    279
    280		pte = &pte[PM_LEVEL_INDEX(level, address)];
    281	}
    282
    283	return pte;
    284}
    285
    286/*
    287 * This function checks if there is a PTE for a given dma address. If
    288 * there is one, it returns the pointer to it.
    289 */
    290static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
    291		      unsigned long address,
    292		      unsigned long *page_size)
    293{
    294	int level;
    295	u64 *pte;
    296
    297	*page_size = 0;
    298
    299	if (address > PM_LEVEL_SIZE(pgtable->mode))
    300		return NULL;
    301
    302	level	   =  pgtable->mode - 1;
    303	pte	   = &pgtable->root[PM_LEVEL_INDEX(level, address)];
    304	*page_size =  PTE_LEVEL_PAGE_SIZE(level);
    305
    306	while (level > 0) {
    307
    308		/* Not Present */
    309		if (!IOMMU_PTE_PRESENT(*pte))
    310			return NULL;
    311
    312		/* Large PTE */
    313		if (PM_PTE_LEVEL(*pte) == 7 ||
    314		    PM_PTE_LEVEL(*pte) == 0)
    315			break;
    316
    317		/* No level skipping support yet */
    318		if (PM_PTE_LEVEL(*pte) != level)
    319			return NULL;
    320
    321		level -= 1;
    322
    323		/* Walk to the next level */
    324		pte	   = IOMMU_PTE_PAGE(*pte);
    325		pte	   = &pte[PM_LEVEL_INDEX(level, address)];
    326		*page_size = PTE_LEVEL_PAGE_SIZE(level);
    327	}
    328
    329	/*
    330	 * If we have a series of large PTEs, make
    331	 * sure to return a pointer to the first one.
    332	 */
    333	if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
    334		pte = first_pte_l7(pte, page_size, NULL);
    335
    336	return pte;
    337}
    338
    339static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
    340{
    341	u64 *pt;
    342	int mode;
    343
    344	while (cmpxchg64(pte, pteval, 0) != pteval) {
    345		pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
    346		pteval = *pte;
    347	}
    348
    349	if (!IOMMU_PTE_PRESENT(pteval))
    350		return;
    351
    352	pt   = IOMMU_PTE_PAGE(pteval);
    353	mode = IOMMU_PTE_MODE(pteval);
    354
    355	free_sub_pt(pt, mode, freelist);
    356}
    357
    358/*
    359 * Generic mapping functions. It maps a physical address into a DMA
    360 * address space. It allocates the page table pages if necessary.
    361 * In the future it can be extended to a generic mapping function
    362 * supporting all features of AMD IOMMU page tables like level skipping
    363 * and full 64 bit address spaces.
    364 */
    365static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
    366			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
    367{
    368	struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
    369	LIST_HEAD(freelist);
    370	bool updated = false;
    371	u64 __pte, *pte;
    372	int ret, i, count;
    373
    374	BUG_ON(!IS_ALIGNED(iova, size));
    375	BUG_ON(!IS_ALIGNED(paddr, size));
    376
    377	ret = -EINVAL;
    378	if (!(prot & IOMMU_PROT_MASK))
    379		goto out;
    380
    381	count = PAGE_SIZE_PTE_COUNT(size);
    382	pte   = alloc_pte(dom, iova, size, NULL, gfp, &updated);
    383
    384	ret = -ENOMEM;
    385	if (!pte)
    386		goto out;
    387
    388	for (i = 0; i < count; ++i)
    389		free_clear_pte(&pte[i], pte[i], &freelist);
    390
    391	if (!list_empty(&freelist))
    392		updated = true;
    393
    394	if (count > 1) {
    395		__pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
    396		__pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
    397	} else
    398		__pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
    399
    400	if (prot & IOMMU_PROT_IR)
    401		__pte |= IOMMU_PTE_IR;
    402	if (prot & IOMMU_PROT_IW)
    403		__pte |= IOMMU_PTE_IW;
    404
    405	for (i = 0; i < count; ++i)
    406		pte[i] = __pte;
    407
    408	ret = 0;
    409
    410out:
    411	if (updated) {
    412		unsigned long flags;
    413
    414		spin_lock_irqsave(&dom->lock, flags);
    415		/*
    416		 * Flush domain TLB(s) and wait for completion. Any Device-Table
    417		 * Updates and flushing already happened in
    418		 * increase_address_space().
    419		 */
    420		amd_iommu_domain_flush_tlb_pde(dom);
    421		amd_iommu_domain_flush_complete(dom);
    422		spin_unlock_irqrestore(&dom->lock, flags);
    423	}
    424
    425	/* Everything flushed out, free pages now */
    426	put_pages_list(&freelist);
    427
    428	return ret;
    429}
    430
    431static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
    432				      unsigned long iova,
    433				      size_t size,
    434				      struct iommu_iotlb_gather *gather)
    435{
    436	struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
    437	unsigned long long unmapped;
    438	unsigned long unmap_size;
    439	u64 *pte;
    440
    441	BUG_ON(!is_power_of_2(size));
    442
    443	unmapped = 0;
    444
    445	while (unmapped < size) {
    446		pte = fetch_pte(pgtable, iova, &unmap_size);
    447		if (pte) {
    448			int i, count;
    449
    450			count = PAGE_SIZE_PTE_COUNT(unmap_size);
    451			for (i = 0; i < count; i++)
    452				pte[i] = 0ULL;
    453		}
    454
    455		iova = (iova & ~(unmap_size - 1)) + unmap_size;
    456		unmapped += unmap_size;
    457	}
    458
    459	BUG_ON(unmapped && !is_power_of_2(unmapped));
    460
    461	return unmapped;
    462}
    463
    464static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
    465{
    466	struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
    467	unsigned long offset_mask, pte_pgsize;
    468	u64 *pte, __pte;
    469
    470	pte = fetch_pte(pgtable, iova, &pte_pgsize);
    471
    472	if (!pte || !IOMMU_PTE_PRESENT(*pte))
    473		return 0;
    474
    475	offset_mask = pte_pgsize - 1;
    476	__pte	    = __sme_clr(*pte & PM_ADDR_MASK);
    477
    478	return (__pte & ~offset_mask) | (iova & offset_mask);
    479}
    480
    481/*
    482 * ----------------------------------------------------
    483 */
    484static void v1_free_pgtable(struct io_pgtable *iop)
    485{
    486	struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
    487	struct protection_domain *dom;
    488	LIST_HEAD(freelist);
    489
    490	if (pgtable->mode == PAGE_MODE_NONE)
    491		return;
    492
    493	dom = container_of(pgtable, struct protection_domain, iop);
    494
    495	/* Page-table is not visible to IOMMU anymore, so free it */
    496	BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
    497	       pgtable->mode > PAGE_MODE_6_LEVEL);
    498
    499	free_sub_pt(pgtable->root, pgtable->mode, &freelist);
    500
    501	/* Update data structure */
    502	amd_iommu_domain_clr_pt_root(dom);
    503
    504	/* Make changes visible to IOMMUs */
    505	amd_iommu_domain_update(dom);
    506
    507	put_pages_list(&freelist);
    508}
    509
    510static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
    511{
    512	struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
    513
    514	cfg->pgsize_bitmap  = AMD_IOMMU_PGSIZES,
    515	cfg->ias            = IOMMU_IN_ADDR_BIT_SIZE,
    516	cfg->oas            = IOMMU_OUT_ADDR_BIT_SIZE,
    517	cfg->tlb            = &v1_flush_ops;
    518
    519	pgtable->iop.ops.map          = iommu_v1_map_page;
    520	pgtable->iop.ops.unmap        = iommu_v1_unmap_page;
    521	pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
    522
    523	return &pgtable->iop;
    524}
    525
    526struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
    527	.alloc	= v1_alloc_pgtable,
    528	.free	= v1_free_pgtable,
    529};