cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ident_map_64.c (10901B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * This code is used on x86_64 to create page table identity mappings on
      4 * demand by building up a new set of page tables (or appending to the
      5 * existing ones), and then switching over to them when ready.
      6 *
      7 * Copyright (C) 2015-2016  Yinghai Lu
      8 * Copyright (C)      2016  Kees Cook
      9 */
     10
     11/*
     12 * Since we're dealing with identity mappings, physical and virtual
     13 * addresses are the same, so override these defines which are ultimately
     14 * used by the headers in misc.h.
     15 */
     16#define __pa(x)  ((unsigned long)(x))
     17#define __va(x)  ((void *)((unsigned long)(x)))
     18
     19/* No PAGE_TABLE_ISOLATION support needed either: */
     20#undef CONFIG_PAGE_TABLE_ISOLATION
     21
     22#include "error.h"
     23#include "misc.h"
     24
     25/* These actually do the work of building the kernel identity maps. */
     26#include <linux/pgtable.h>
     27#include <asm/cmpxchg.h>
     28#include <asm/trap_pf.h>
     29#include <asm/trapnr.h>
     30#include <asm/init.h>
     31/* Use the static base for this part of the boot process */
     32#undef __PAGE_OFFSET
     33#define __PAGE_OFFSET __PAGE_OFFSET_BASE
     34#include "../../mm/ident_map.c"
     35
     36#define _SETUP
     37#include <asm/setup.h>	/* For COMMAND_LINE_SIZE */
     38#undef _SETUP
     39
     40extern unsigned long get_cmd_line_ptr(void);
     41
     42/* Used by PAGE_KERN* macros: */
     43pteval_t __default_kernel_pte_mask __read_mostly = ~0;
     44
     45/* Used to track our page table allocation area. */
     46struct alloc_pgt_data {
     47	unsigned char *pgt_buf;
     48	unsigned long pgt_buf_size;
     49	unsigned long pgt_buf_offset;
     50};
     51
     52/*
     53 * Allocates space for a page table entry, using struct alloc_pgt_data
     54 * above. Besides the local callers, this is used as the allocation
     55 * callback in mapping_info below.
     56 */
     57static void *alloc_pgt_page(void *context)
     58{
     59	struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
     60	unsigned char *entry;
     61
     62	/* Validate there is space available for a new page. */
     63	if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
     64		debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
     65		debug_putaddr(pages->pgt_buf_offset);
     66		debug_putaddr(pages->pgt_buf_size);
     67		return NULL;
     68	}
     69
     70	entry = pages->pgt_buf + pages->pgt_buf_offset;
     71	pages->pgt_buf_offset += PAGE_SIZE;
     72
     73	return entry;
     74}
     75
     76/* Used to track our allocated page tables. */
     77static struct alloc_pgt_data pgt_data;
     78
     79/* The top level page table entry pointer. */
     80static unsigned long top_level_pgt;
     81
     82phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
     83
     84/*
     85 * Mapping information structure passed to kernel_ident_mapping_init().
     86 * Due to relocation, pointers must be assigned at run time not build time.
     87 */
     88static struct x86_mapping_info mapping_info;
     89
     90/*
     91 * Adds the specified range to the identity mappings.
     92 */
     93void kernel_add_identity_map(unsigned long start, unsigned long end)
     94{
     95	int ret;
     96
     97	/* Align boundary to 2M. */
     98	start = round_down(start, PMD_SIZE);
     99	end = round_up(end, PMD_SIZE);
    100	if (start >= end)
    101		return;
    102
    103	/* Build the mapping. */
    104	ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end);
    105	if (ret)
    106		error("Error: kernel_ident_mapping_init() failed\n");
    107}
    108
    109/* Locates and clears a region for a new top level page table. */
    110void initialize_identity_maps(void *rmode)
    111{
    112	unsigned long cmdline;
    113	struct setup_data *sd;
    114
    115	/* Exclude the encryption mask from __PHYSICAL_MASK */
    116	physical_mask &= ~sme_me_mask;
    117
    118	/* Init mapping_info with run-time function/buffer pointers. */
    119	mapping_info.alloc_pgt_page = alloc_pgt_page;
    120	mapping_info.context = &pgt_data;
    121	mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
    122	mapping_info.kernpg_flag = _KERNPG_TABLE;
    123
    124	/*
    125	 * It should be impossible for this not to already be true,
    126	 * but since calling this a second time would rewind the other
    127	 * counters, let's just make sure this is reset too.
    128	 */
    129	pgt_data.pgt_buf_offset = 0;
    130
    131	/*
    132	 * If we came here via startup_32(), cr3 will be _pgtable already
    133	 * and we must append to the existing area instead of entirely
    134	 * overwriting it.
    135	 *
    136	 * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
    137	 * the top-level page table is allocated separately.
    138	 *
    139	 * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
    140	 * cases. On 4-level paging it's equal to 'top_level_pgt'.
    141	 */
    142	top_level_pgt = read_cr3_pa();
    143	if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
    144		pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
    145		pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
    146		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
    147	} else {
    148		pgt_data.pgt_buf = _pgtable;
    149		pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
    150		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
    151		top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
    152	}
    153
    154	/*
    155	 * New page-table is set up - map the kernel image, boot_params and the
    156	 * command line. The uncompressed kernel requires boot_params and the
    157	 * command line to be mapped in the identity mapping. Map them
    158	 * explicitly here in case the compressed kernel does not touch them,
    159	 * or does not touch all the pages covering them.
    160	 */
    161	kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
    162	boot_params = rmode;
    163	kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
    164	cmdline = get_cmd_line_ptr();
    165	kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
    166
    167	/*
    168	 * Also map the setup_data entries passed via boot_params in case they
    169	 * need to be accessed by uncompressed kernel via the identity mapping.
    170	 */
    171	sd = (struct setup_data *)boot_params->hdr.setup_data;
    172	while (sd) {
    173		unsigned long sd_addr = (unsigned long)sd;
    174
    175		kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len);
    176		sd = (struct setup_data *)sd->next;
    177	}
    178
    179	sev_prep_identity_maps(top_level_pgt);
    180
    181	/* Load the new page-table. */
    182	write_cr3(top_level_pgt);
    183}
    184
    185static pte_t *split_large_pmd(struct x86_mapping_info *info,
    186			      pmd_t *pmdp, unsigned long __address)
    187{
    188	unsigned long page_flags;
    189	unsigned long address;
    190	pte_t *pte;
    191	pmd_t pmd;
    192	int i;
    193
    194	pte = (pte_t *)info->alloc_pgt_page(info->context);
    195	if (!pte)
    196		return NULL;
    197
    198	address     = __address & PMD_MASK;
    199	/* No large page - clear PSE flag */
    200	page_flags  = info->page_flag & ~_PAGE_PSE;
    201
    202	/* Populate the PTEs */
    203	for (i = 0; i < PTRS_PER_PMD; i++) {
    204		set_pte(&pte[i], __pte(address | page_flags));
    205		address += PAGE_SIZE;
    206	}
    207
    208	/*
    209	 * Ideally we need to clear the large PMD first and do a TLB
    210	 * flush before we write the new PMD. But the 2M range of the
    211	 * PMD might contain the code we execute and/or the stack
    212	 * we are on, so we can't do that. But that should be safe here
    213	 * because we are going from large to small mappings and we are
    214	 * also the only user of the page-table, so there is no chance
    215	 * of a TLB multihit.
    216	 */
    217	pmd = __pmd((unsigned long)pte | info->kernpg_flag);
    218	set_pmd(pmdp, pmd);
    219	/* Flush TLB to establish the new PMD */
    220	write_cr3(top_level_pgt);
    221
    222	return pte + pte_index(__address);
    223}
    224
    225static void clflush_page(unsigned long address)
    226{
    227	unsigned int flush_size;
    228	char *cl, *start, *end;
    229
    230	/*
    231	 * Hardcode cl-size to 64 - CPUID can't be used here because that might
    232	 * cause another #VC exception and the GHCB is not ready to use yet.
    233	 */
    234	flush_size = 64;
    235	start      = (char *)(address & PAGE_MASK);
    236	end        = start + PAGE_SIZE;
    237
    238	/*
    239	 * First make sure there are no pending writes on the cache-lines to
    240	 * flush.
    241	 */
    242	asm volatile("mfence" : : : "memory");
    243
    244	for (cl = start; cl != end; cl += flush_size)
    245		clflush(cl);
    246}
    247
    248static int set_clr_page_flags(struct x86_mapping_info *info,
    249			      unsigned long address,
    250			      pteval_t set, pteval_t clr)
    251{
    252	pgd_t *pgdp = (pgd_t *)top_level_pgt;
    253	p4d_t *p4dp;
    254	pud_t *pudp;
    255	pmd_t *pmdp;
    256	pte_t *ptep, pte;
    257
    258	/*
    259	 * First make sure there is a PMD mapping for 'address'.
    260	 * It should already exist, but keep things generic.
    261	 *
    262	 * To map the page just read from it and fault it in if there is no
    263	 * mapping yet. kernel_add_identity_map() can't be called here because
    264	 * that would unconditionally map the address on PMD level, destroying
    265	 * any PTE-level mappings that might already exist. Use assembly here
    266	 * so the access won't be optimized away.
    267	 */
    268	asm volatile("mov %[address], %%r9"
    269		     :: [address] "g" (*(unsigned long *)address)
    270		     : "r9", "memory");
    271
    272	/*
    273	 * The page is mapped at least with PMD size - so skip checks and walk
    274	 * directly to the PMD.
    275	 */
    276	p4dp = p4d_offset(pgdp, address);
    277	pudp = pud_offset(p4dp, address);
    278	pmdp = pmd_offset(pudp, address);
    279
    280	if (pmd_large(*pmdp))
    281		ptep = split_large_pmd(info, pmdp, address);
    282	else
    283		ptep = pte_offset_kernel(pmdp, address);
    284
    285	if (!ptep)
    286		return -ENOMEM;
    287
    288	/*
    289	 * Changing encryption attributes of a page requires to flush it from
    290	 * the caches.
    291	 */
    292	if ((set | clr) & _PAGE_ENC) {
    293		clflush_page(address);
    294
    295		/*
    296		 * If the encryption attribute is being cleared, change the page state
    297		 * to shared in the RMP table.
    298		 */
    299		if (clr)
    300			snp_set_page_shared(__pa(address & PAGE_MASK));
    301	}
    302
    303	/* Update PTE */
    304	pte = *ptep;
    305	pte = pte_set_flags(pte, set);
    306	pte = pte_clear_flags(pte, clr);
    307	set_pte(ptep, pte);
    308
    309	/*
    310	 * If the encryption attribute is being set, then change the page state to
    311	 * private in the RMP entry. The page state change must be done after the PTE
    312	 * is updated.
    313	 */
    314	if (set & _PAGE_ENC)
    315		snp_set_page_private(__pa(address & PAGE_MASK));
    316
    317	/* Flush TLB after changing encryption attribute */
    318	write_cr3(top_level_pgt);
    319
    320	return 0;
    321}
    322
    323int set_page_decrypted(unsigned long address)
    324{
    325	return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC);
    326}
    327
    328int set_page_encrypted(unsigned long address)
    329{
    330	return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0);
    331}
    332
    333int set_page_non_present(unsigned long address)
    334{
    335	return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT);
    336}
    337
    338static void do_pf_error(const char *msg, unsigned long error_code,
    339			unsigned long address, unsigned long ip)
    340{
    341	error_putstr(msg);
    342
    343	error_putstr("\nError Code: ");
    344	error_puthex(error_code);
    345	error_putstr("\nCR2: 0x");
    346	error_puthex(address);
    347	error_putstr("\nRIP relative to _head: 0x");
    348	error_puthex(ip - (unsigned long)_head);
    349	error_putstr("\n");
    350
    351	error("Stopping.\n");
    352}
    353
    354void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
    355{
    356	unsigned long address = native_read_cr2();
    357	unsigned long end;
    358	bool ghcb_fault;
    359
    360	ghcb_fault = sev_es_check_ghcb_fault(address);
    361
    362	address   &= PMD_MASK;
    363	end        = address + PMD_SIZE;
    364
    365	/*
    366	 * Check for unexpected error codes. Unexpected are:
    367	 *	- Faults on present pages
    368	 *	- User faults
    369	 *	- Reserved bits set
    370	 */
    371	if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD))
    372		do_pf_error("Unexpected page-fault:", error_code, address, regs->ip);
    373	else if (ghcb_fault)
    374		do_pf_error("Page-fault on GHCB page:", error_code, address, regs->ip);
    375
    376	/*
    377	 * Error code is sane - now identity map the 2M region around
    378	 * the faulting address.
    379	 */
    380	kernel_add_identity_map(address, end);
    381}