cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mem_encrypt_amd.c (14243B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * AMD Memory Encryption Support
      4 *
      5 * Copyright (C) 2016 Advanced Micro Devices, Inc.
      6 *
      7 * Author: Tom Lendacky <thomas.lendacky@amd.com>
      8 */
      9
     10#define DISABLE_BRANCH_PROFILING
     11
     12#include <linux/linkage.h>
     13#include <linux/init.h>
     14#include <linux/mm.h>
     15#include <linux/dma-direct.h>
     16#include <linux/swiotlb.h>
     17#include <linux/mem_encrypt.h>
     18#include <linux/device.h>
     19#include <linux/kernel.h>
     20#include <linux/bitops.h>
     21#include <linux/dma-mapping.h>
     22#include <linux/virtio_config.h>
     23#include <linux/cc_platform.h>
     24#include <linux/platform-feature.h>
     25
     26#include <asm/tlbflush.h>
     27#include <asm/fixmap.h>
     28#include <asm/setup.h>
     29#include <asm/bootparam.h>
     30#include <asm/set_memory.h>
     31#include <asm/cacheflush.h>
     32#include <asm/processor-flags.h>
     33#include <asm/msr.h>
     34#include <asm/cmdline.h>
     35#include <asm/sev.h>
     36
     37#include "mm_internal.h"
     38
     39/*
     40 * Since SME related variables are set early in the boot process they must
     41 * reside in the .data section so as not to be zeroed out when the .bss
     42 * section is later cleared.
     43 */
     44u64 sme_me_mask __section(".data") = 0;
     45u64 sev_status __section(".data") = 0;
     46u64 sev_check_data __section(".data") = 0;
     47EXPORT_SYMBOL(sme_me_mask);
     48
     49/* Buffer used for early in-place encryption by BSP, no locking needed */
     50static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
     51
     52/*
     53 * SNP-specific routine which needs to additionally change the page state from
     54 * private to shared before copying the data from the source to destination and
     55 * restore after the copy.
     56 */
     57static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
     58				     unsigned long paddr, bool decrypt)
     59{
     60	unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
     61
     62	if (decrypt) {
     63		/*
     64		 * @paddr needs to be accessed decrypted, mark the page shared in
     65		 * the RMP table before copying it.
     66		 */
     67		early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages);
     68
     69		memcpy(dst, src, sz);
     70
     71		/* Restore the page state after the memcpy. */
     72		early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages);
     73	} else {
     74		/*
     75		 * @paddr need to be accessed encrypted, no need for the page state
     76		 * change.
     77		 */
     78		memcpy(dst, src, sz);
     79	}
     80}
     81
     82/*
     83 * This routine does not change the underlying encryption setting of the
     84 * page(s) that map this memory. It assumes that eventually the memory is
     85 * meant to be accessed as either encrypted or decrypted but the contents
     86 * are currently not in the desired state.
     87 *
     88 * This routine follows the steps outlined in the AMD64 Architecture
     89 * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
     90 */
     91static void __init __sme_early_enc_dec(resource_size_t paddr,
     92				       unsigned long size, bool enc)
     93{
     94	void *src, *dst;
     95	size_t len;
     96
     97	if (!sme_me_mask)
     98		return;
     99
    100	wbinvd();
    101
    102	/*
    103	 * There are limited number of early mapping slots, so map (at most)
    104	 * one page at time.
    105	 */
    106	while (size) {
    107		len = min_t(size_t, sizeof(sme_early_buffer), size);
    108
    109		/*
    110		 * Create mappings for the current and desired format of
    111		 * the memory. Use a write-protected mapping for the source.
    112		 */
    113		src = enc ? early_memremap_decrypted_wp(paddr, len) :
    114			    early_memremap_encrypted_wp(paddr, len);
    115
    116		dst = enc ? early_memremap_encrypted(paddr, len) :
    117			    early_memremap_decrypted(paddr, len);
    118
    119		/*
    120		 * If a mapping can't be obtained to perform the operation,
    121		 * then eventual access of that area in the desired mode
    122		 * will cause a crash.
    123		 */
    124		BUG_ON(!src || !dst);
    125
    126		/*
    127		 * Use a temporary buffer, of cache-line multiple size, to
    128		 * avoid data corruption as documented in the APM.
    129		 */
    130		if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
    131			snp_memcpy(sme_early_buffer, src, len, paddr, enc);
    132			snp_memcpy(dst, sme_early_buffer, len, paddr, !enc);
    133		} else {
    134			memcpy(sme_early_buffer, src, len);
    135			memcpy(dst, sme_early_buffer, len);
    136		}
    137
    138		early_memunmap(dst, len);
    139		early_memunmap(src, len);
    140
    141		paddr += len;
    142		size -= len;
    143	}
    144}
    145
    146void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
    147{
    148	__sme_early_enc_dec(paddr, size, true);
    149}
    150
    151void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
    152{
    153	__sme_early_enc_dec(paddr, size, false);
    154}
    155
    156static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size,
    157					     bool map)
    158{
    159	unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET;
    160	pmdval_t pmd_flags, pmd;
    161
    162	/* Use early_pmd_flags but remove the encryption mask */
    163	pmd_flags = __sme_clr(early_pmd_flags);
    164
    165	do {
    166		pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0;
    167		__early_make_pgtable((unsigned long)vaddr, pmd);
    168
    169		vaddr += PMD_SIZE;
    170		paddr += PMD_SIZE;
    171		size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE;
    172	} while (size);
    173
    174	flush_tlb_local();
    175}
    176
    177void __init sme_unmap_bootdata(char *real_mode_data)
    178{
    179	struct boot_params *boot_data;
    180	unsigned long cmdline_paddr;
    181
    182	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
    183		return;
    184
    185	/* Get the command line address before unmapping the real_mode_data */
    186	boot_data = (struct boot_params *)real_mode_data;
    187	cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
    188
    189	__sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false);
    190
    191	if (!cmdline_paddr)
    192		return;
    193
    194	__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false);
    195}
    196
    197void __init sme_map_bootdata(char *real_mode_data)
    198{
    199	struct boot_params *boot_data;
    200	unsigned long cmdline_paddr;
    201
    202	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
    203		return;
    204
    205	__sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true);
    206
    207	/* Get the command line address after mapping the real_mode_data */
    208	boot_data = (struct boot_params *)real_mode_data;
    209	cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
    210
    211	if (!cmdline_paddr)
    212		return;
    213
    214	__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
    215}
    216
    217void __init sev_setup_arch(void)
    218{
    219	phys_addr_t total_mem = memblock_phys_mem_size();
    220	unsigned long size;
    221
    222	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
    223		return;
    224
    225	/*
    226	 * For SEV, all DMA has to occur via shared/unencrypted pages.
    227	 * SEV uses SWIOTLB to make this happen without changing device
    228	 * drivers. However, depending on the workload being run, the
    229	 * default 64MB of SWIOTLB may not be enough and SWIOTLB may
    230	 * run out of buffers for DMA, resulting in I/O errors and/or
    231	 * performance degradation especially with high I/O workloads.
    232	 *
    233	 * Adjust the default size of SWIOTLB for SEV guests using
    234	 * a percentage of guest memory for SWIOTLB buffers.
    235	 * Also, as the SWIOTLB bounce buffer memory is allocated
    236	 * from low memory, ensure that the adjusted size is within
    237	 * the limits of low available memory.
    238	 *
    239	 * The percentage of guest memory used here for SWIOTLB buffers
    240	 * is more of an approximation of the static adjustment which
    241	 * 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
    242	 */
    243	size = total_mem * 6 / 100;
    244	size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
    245	swiotlb_adjust_size(size);
    246
    247	/* Set restricted memory access for virtio. */
    248	platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS);
    249}
    250
    251static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
    252{
    253	unsigned long pfn = 0;
    254	pgprot_t prot;
    255
    256	switch (level) {
    257	case PG_LEVEL_4K:
    258		pfn = pte_pfn(*kpte);
    259		prot = pte_pgprot(*kpte);
    260		break;
    261	case PG_LEVEL_2M:
    262		pfn = pmd_pfn(*(pmd_t *)kpte);
    263		prot = pmd_pgprot(*(pmd_t *)kpte);
    264		break;
    265	case PG_LEVEL_1G:
    266		pfn = pud_pfn(*(pud_t *)kpte);
    267		prot = pud_pgprot(*(pud_t *)kpte);
    268		break;
    269	default:
    270		WARN_ONCE(1, "Invalid level for kpte\n");
    271		return 0;
    272	}
    273
    274	if (ret_prot)
    275		*ret_prot = prot;
    276
    277	return pfn;
    278}
    279
    280static bool amd_enc_tlb_flush_required(bool enc)
    281{
    282	return true;
    283}
    284
    285static bool amd_enc_cache_flush_required(void)
    286{
    287	return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
    288}
    289
    290static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
    291{
    292#ifdef CONFIG_PARAVIRT
    293	unsigned long sz = npages << PAGE_SHIFT;
    294	unsigned long vaddr_end = vaddr + sz;
    295
    296	while (vaddr < vaddr_end) {
    297		int psize, pmask, level;
    298		unsigned long pfn;
    299		pte_t *kpte;
    300
    301		kpte = lookup_address(vaddr, &level);
    302		if (!kpte || pte_none(*kpte)) {
    303			WARN_ONCE(1, "kpte lookup for vaddr\n");
    304			return;
    305		}
    306
    307		pfn = pg_level_to_pfn(level, kpte, NULL);
    308		if (!pfn)
    309			continue;
    310
    311		psize = page_level_size(level);
    312		pmask = page_level_mask(level);
    313
    314		notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
    315
    316		vaddr = (vaddr & pmask) + psize;
    317	}
    318#endif
    319}
    320
    321static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
    322{
    323	/*
    324	 * To maintain the security guarantees of SEV-SNP guests, make sure
    325	 * to invalidate the memory before encryption attribute is cleared.
    326	 */
    327	if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
    328		snp_set_memory_shared(vaddr, npages);
    329}
    330
    331/* Return true unconditionally: return value doesn't matter for the SEV side */
    332static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
    333{
    334	/*
    335	 * After memory is mapped encrypted in the page table, validate it
    336	 * so that it is consistent with the page table updates.
    337	 */
    338	if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && enc)
    339		snp_set_memory_private(vaddr, npages);
    340
    341	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
    342		enc_dec_hypercall(vaddr, npages, enc);
    343
    344	return true;
    345}
    346
    347static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
    348{
    349	pgprot_t old_prot, new_prot;
    350	unsigned long pfn, pa, size;
    351	pte_t new_pte;
    352
    353	pfn = pg_level_to_pfn(level, kpte, &old_prot);
    354	if (!pfn)
    355		return;
    356
    357	new_prot = old_prot;
    358	if (enc)
    359		pgprot_val(new_prot) |= _PAGE_ENC;
    360	else
    361		pgprot_val(new_prot) &= ~_PAGE_ENC;
    362
    363	/* If prot is same then do nothing. */
    364	if (pgprot_val(old_prot) == pgprot_val(new_prot))
    365		return;
    366
    367	pa = pfn << PAGE_SHIFT;
    368	size = page_level_size(level);
    369
    370	/*
    371	 * We are going to perform in-place en-/decryption and change the
    372	 * physical page attribute from C=1 to C=0 or vice versa. Flush the
    373	 * caches to ensure that data gets accessed with the correct C-bit.
    374	 */
    375	clflush_cache_range(__va(pa), size);
    376
    377	/* Encrypt/decrypt the contents in-place */
    378	if (enc) {
    379		sme_early_encrypt(pa, size);
    380	} else {
    381		sme_early_decrypt(pa, size);
    382
    383		/*
    384		 * ON SNP, the page state in the RMP table must happen
    385		 * before the page table updates.
    386		 */
    387		early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
    388	}
    389
    390	/* Change the page encryption mask. */
    391	new_pte = pfn_pte(pfn, new_prot);
    392	set_pte_atomic(kpte, new_pte);
    393
    394	/*
    395	 * If page is set encrypted in the page table, then update the RMP table to
    396	 * add this page as private.
    397	 */
    398	if (enc)
    399		early_snp_set_memory_private((unsigned long)__va(pa), pa, 1);
    400}
    401
    402static int __init early_set_memory_enc_dec(unsigned long vaddr,
    403					   unsigned long size, bool enc)
    404{
    405	unsigned long vaddr_end, vaddr_next, start;
    406	unsigned long psize, pmask;
    407	int split_page_size_mask;
    408	int level, ret;
    409	pte_t *kpte;
    410
    411	start = vaddr;
    412	vaddr_next = vaddr;
    413	vaddr_end = vaddr + size;
    414
    415	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
    416		kpte = lookup_address(vaddr, &level);
    417		if (!kpte || pte_none(*kpte)) {
    418			ret = 1;
    419			goto out;
    420		}
    421
    422		if (level == PG_LEVEL_4K) {
    423			__set_clr_pte_enc(kpte, level, enc);
    424			vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE;
    425			continue;
    426		}
    427
    428		psize = page_level_size(level);
    429		pmask = page_level_mask(level);
    430
    431		/*
    432		 * Check whether we can change the large page in one go.
    433		 * We request a split when the address is not aligned and
    434		 * the number of pages to set/clear encryption bit is smaller
    435		 * than the number of pages in the large page.
    436		 */
    437		if (vaddr == (vaddr & pmask) &&
    438		    ((vaddr_end - vaddr) >= psize)) {
    439			__set_clr_pte_enc(kpte, level, enc);
    440			vaddr_next = (vaddr & pmask) + psize;
    441			continue;
    442		}
    443
    444		/*
    445		 * The virtual address is part of a larger page, create the next
    446		 * level page table mapping (4K or 2M). If it is part of a 2M
    447		 * page then we request a split of the large page into 4K
    448		 * chunks. A 1GB large page is split into 2M pages, resp.
    449		 */
    450		if (level == PG_LEVEL_2M)
    451			split_page_size_mask = 0;
    452		else
    453			split_page_size_mask = 1 << PG_LEVEL_2M;
    454
    455		/*
    456		 * kernel_physical_mapping_change() does not flush the TLBs, so
    457		 * a TLB flush is required after we exit from the for loop.
    458		 */
    459		kernel_physical_mapping_change(__pa(vaddr & pmask),
    460					       __pa((vaddr_end & pmask) + psize),
    461					       split_page_size_mask);
    462	}
    463
    464	ret = 0;
    465
    466	early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
    467out:
    468	__flush_tlb_all();
    469	return ret;
    470}
    471
    472int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size)
    473{
    474	return early_set_memory_enc_dec(vaddr, size, false);
    475}
    476
    477int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
    478{
    479	return early_set_memory_enc_dec(vaddr, size, true);
    480}
    481
    482void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
    483{
    484	enc_dec_hypercall(vaddr, npages, enc);
    485}
    486
    487void __init sme_early_init(void)
    488{
    489	unsigned int i;
    490
    491	if (!sme_me_mask)
    492		return;
    493
    494	early_pmd_flags = __sme_set(early_pmd_flags);
    495
    496	__supported_pte_mask = __sme_set(__supported_pte_mask);
    497
    498	/* Update the protection map with memory encryption mask */
    499	for (i = 0; i < ARRAY_SIZE(protection_map); i++)
    500		protection_map[i] = pgprot_encrypted(protection_map[i]);
    501
    502	x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare;
    503	x86_platform.guest.enc_status_change_finish  = amd_enc_status_change_finish;
    504	x86_platform.guest.enc_tlb_flush_required    = amd_enc_tlb_flush_required;
    505	x86_platform.guest.enc_cache_flush_required  = amd_enc_cache_flush_required;
    506}
    507
    508void __init mem_encrypt_free_decrypted_mem(void)
    509{
    510	unsigned long vaddr, vaddr_end, npages;
    511	int r;
    512
    513	vaddr = (unsigned long)__start_bss_decrypted_unused;
    514	vaddr_end = (unsigned long)__end_bss_decrypted;
    515	npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
    516
    517	/*
    518	 * The unused memory range was mapped decrypted, change the encryption
    519	 * attribute from decrypted to encrypted before freeing it.
    520	 */
    521	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
    522		r = set_memory_encrypted(vaddr, npages);
    523		if (r) {
    524			pr_warn("failed to free unused decrypted pages\n");
    525			return;
    526		}
    527	}
    528
    529	free_init_pages("unused decrypted", vaddr, vaddr_end);
    530}