cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dump_pagetables.c (12462B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Debug helper to dump the current kernel pagetables of the system
      4 * so that we can see what the various memory ranges are set to.
      5 *
      6 * (C) Copyright 2008 Intel Corporation
      7 *
      8 * Author: Arjan van de Ven <arjan@linux.intel.com>
      9 */
     10
     11#include <linux/debugfs.h>
     12#include <linux/kasan.h>
     13#include <linux/mm.h>
     14#include <linux/init.h>
     15#include <linux/sched.h>
     16#include <linux/seq_file.h>
     17#include <linux/highmem.h>
     18#include <linux/pci.h>
     19#include <linux/ptdump.h>
     20
     21#include <asm/e820/types.h>
     22
     23/*
     24 * The dumper groups pagetable entries of the same type into one, and for
     25 * that it needs to keep some state when walking, and flush this state
     26 * when a "break" in the continuity is found.
     27 */
     28struct pg_state {
     29	struct ptdump_state ptdump;
     30	int level;
     31	pgprotval_t current_prot;
     32	pgprotval_t effective_prot;
     33	pgprotval_t prot_levels[5];
     34	unsigned long start_address;
     35	const struct addr_marker *marker;
     36	unsigned long lines;
     37	bool to_dmesg;
     38	bool check_wx;
     39	unsigned long wx_pages;
     40	struct seq_file *seq;
     41};
     42
     43struct addr_marker {
     44	unsigned long start_address;
     45	const char *name;
     46	unsigned long max_lines;
     47};
     48
     49/* Address space markers hints */
     50
     51#ifdef CONFIG_X86_64
     52
     53enum address_markers_idx {
     54	USER_SPACE_NR = 0,
     55	KERNEL_SPACE_NR,
     56#ifdef CONFIG_MODIFY_LDT_SYSCALL
     57	LDT_NR,
     58#endif
     59	LOW_KERNEL_NR,
     60	VMALLOC_START_NR,
     61	VMEMMAP_START_NR,
     62#ifdef CONFIG_KASAN
     63	KASAN_SHADOW_START_NR,
     64	KASAN_SHADOW_END_NR,
     65#endif
     66	CPU_ENTRY_AREA_NR,
     67#ifdef CONFIG_X86_ESPFIX64
     68	ESPFIX_START_NR,
     69#endif
     70#ifdef CONFIG_EFI
     71	EFI_END_NR,
     72#endif
     73	HIGH_KERNEL_NR,
     74	MODULES_VADDR_NR,
     75	MODULES_END_NR,
     76	FIXADDR_START_NR,
     77	END_OF_SPACE_NR,
     78};
     79
     80static struct addr_marker address_markers[] = {
     81	[USER_SPACE_NR]		= { 0,			"User Space" },
     82	[KERNEL_SPACE_NR]	= { (1UL << 63),	"Kernel Space" },
     83	[LOW_KERNEL_NR]		= { 0UL,		"Low Kernel Mapping" },
     84	[VMALLOC_START_NR]	= { 0UL,		"vmalloc() Area" },
     85	[VMEMMAP_START_NR]	= { 0UL,		"Vmemmap" },
     86#ifdef CONFIG_KASAN
     87	/*
     88	 * These fields get initialized with the (dynamic)
     89	 * KASAN_SHADOW_{START,END} values in pt_dump_init().
     90	 */
     91	[KASAN_SHADOW_START_NR]	= { 0UL,		"KASAN shadow" },
     92	[KASAN_SHADOW_END_NR]	= { 0UL,		"KASAN shadow end" },
     93#endif
     94#ifdef CONFIG_MODIFY_LDT_SYSCALL
     95	[LDT_NR]		= { 0UL,		"LDT remap" },
     96#endif
     97	[CPU_ENTRY_AREA_NR]	= { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
     98#ifdef CONFIG_X86_ESPFIX64
     99	[ESPFIX_START_NR]	= { ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
    100#endif
    101#ifdef CONFIG_EFI
    102	[EFI_END_NR]		= { EFI_VA_END,		"EFI Runtime Services" },
    103#endif
    104	[HIGH_KERNEL_NR]	= { __START_KERNEL_map,	"High Kernel Mapping" },
    105	[MODULES_VADDR_NR]	= { MODULES_VADDR,	"Modules" },
    106	[MODULES_END_NR]	= { MODULES_END,	"End Modules" },
    107	[FIXADDR_START_NR]	= { FIXADDR_START,	"Fixmap Area" },
    108	[END_OF_SPACE_NR]	= { -1,			NULL }
    109};
    110
    111#define INIT_PGD	((pgd_t *) &init_top_pgt)
    112
    113#else /* CONFIG_X86_64 */
    114
    115enum address_markers_idx {
    116	USER_SPACE_NR = 0,
    117	KERNEL_SPACE_NR,
    118	VMALLOC_START_NR,
    119	VMALLOC_END_NR,
    120#ifdef CONFIG_HIGHMEM
    121	PKMAP_BASE_NR,
    122#endif
    123#ifdef CONFIG_MODIFY_LDT_SYSCALL
    124	LDT_NR,
    125#endif
    126	CPU_ENTRY_AREA_NR,
    127	FIXADDR_START_NR,
    128	END_OF_SPACE_NR,
    129};
    130
    131static struct addr_marker address_markers[] = {
    132	[USER_SPACE_NR]		= { 0,			"User Space" },
    133	[KERNEL_SPACE_NR]	= { PAGE_OFFSET,	"Kernel Mapping" },
    134	[VMALLOC_START_NR]	= { 0UL,		"vmalloc() Area" },
    135	[VMALLOC_END_NR]	= { 0UL,		"vmalloc() End" },
    136#ifdef CONFIG_HIGHMEM
    137	[PKMAP_BASE_NR]		= { 0UL,		"Persistent kmap() Area" },
    138#endif
    139#ifdef CONFIG_MODIFY_LDT_SYSCALL
    140	[LDT_NR]		= { 0UL,		"LDT remap" },
    141#endif
    142	[CPU_ENTRY_AREA_NR]	= { 0UL,		"CPU entry area" },
    143	[FIXADDR_START_NR]	= { 0UL,		"Fixmap area" },
    144	[END_OF_SPACE_NR]	= { -1,			NULL }
    145};
    146
    147#define INIT_PGD	(swapper_pg_dir)
    148
    149#endif /* !CONFIG_X86_64 */
    150
    151/* Multipliers for offsets within the PTEs */
    152#define PTE_LEVEL_MULT (PAGE_SIZE)
    153#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
    154#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
    155#define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
    156#define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT)
    157
    158#define pt_dump_seq_printf(m, to_dmesg, fmt, args...)		\
    159({								\
    160	if (to_dmesg)					\
    161		printk(KERN_INFO fmt, ##args);			\
    162	else							\
    163		if (m)						\
    164			seq_printf(m, fmt, ##args);		\
    165})
    166
    167#define pt_dump_cont_printf(m, to_dmesg, fmt, args...)		\
    168({								\
    169	if (to_dmesg)					\
    170		printk(KERN_CONT fmt, ##args);			\
    171	else							\
    172		if (m)						\
    173			seq_printf(m, fmt, ##args);		\
    174})
    175
    176/*
    177 * Print a readable form of a pgprot_t to the seq_file
    178 */
    179static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg)
    180{
    181	static const char * const level_name[] =
    182		{ "pgd", "p4d", "pud", "pmd", "pte" };
    183
    184	if (!(pr & _PAGE_PRESENT)) {
    185		/* Not present */
    186		pt_dump_cont_printf(m, dmsg, "                              ");
    187	} else {
    188		if (pr & _PAGE_USER)
    189			pt_dump_cont_printf(m, dmsg, "USR ");
    190		else
    191			pt_dump_cont_printf(m, dmsg, "    ");
    192		if (pr & _PAGE_RW)
    193			pt_dump_cont_printf(m, dmsg, "RW ");
    194		else
    195			pt_dump_cont_printf(m, dmsg, "ro ");
    196		if (pr & _PAGE_PWT)
    197			pt_dump_cont_printf(m, dmsg, "PWT ");
    198		else
    199			pt_dump_cont_printf(m, dmsg, "    ");
    200		if (pr & _PAGE_PCD)
    201			pt_dump_cont_printf(m, dmsg, "PCD ");
    202		else
    203			pt_dump_cont_printf(m, dmsg, "    ");
    204
    205		/* Bit 7 has a different meaning on level 3 vs 4 */
    206		if (level <= 3 && pr & _PAGE_PSE)
    207			pt_dump_cont_printf(m, dmsg, "PSE ");
    208		else
    209			pt_dump_cont_printf(m, dmsg, "    ");
    210		if ((level == 4 && pr & _PAGE_PAT) ||
    211		    ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
    212			pt_dump_cont_printf(m, dmsg, "PAT ");
    213		else
    214			pt_dump_cont_printf(m, dmsg, "    ");
    215		if (pr & _PAGE_GLOBAL)
    216			pt_dump_cont_printf(m, dmsg, "GLB ");
    217		else
    218			pt_dump_cont_printf(m, dmsg, "    ");
    219		if (pr & _PAGE_NX)
    220			pt_dump_cont_printf(m, dmsg, "NX ");
    221		else
    222			pt_dump_cont_printf(m, dmsg, "x  ");
    223	}
    224	pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
    225}
    226
    227static void note_wx(struct pg_state *st, unsigned long addr)
    228{
    229	unsigned long npages;
    230
    231	npages = (addr - st->start_address) / PAGE_SIZE;
    232
    233#ifdef CONFIG_PCI_BIOS
    234	/*
    235	 * If PCI BIOS is enabled, the PCI BIOS area is forced to WX.
    236	 * Inform about it, but avoid the warning.
    237	 */
    238	if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
    239	    addr <= PAGE_OFFSET + BIOS_END) {
    240		pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages);
    241		return;
    242	}
    243#endif
    244	/* Account the WX pages */
    245	st->wx_pages += npages;
    246	WARN_ONCE(__supported_pte_mask & _PAGE_NX,
    247		  "x86/mm: Found insecure W+X mapping at address %pS\n",
    248		  (void *)st->start_address);
    249}
    250
    251static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
    252{
    253	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
    254	pgprotval_t prot = val & PTE_FLAGS_MASK;
    255	pgprotval_t effective;
    256
    257	if (level > 0) {
    258		pgprotval_t higher_prot = st->prot_levels[level - 1];
    259
    260		effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
    261			    ((higher_prot | prot) & _PAGE_NX);
    262	} else {
    263		effective = prot;
    264	}
    265
    266	st->prot_levels[level] = effective;
    267}
    268
    269/*
    270 * This function gets called on a break in a continuous series
    271 * of PTE entries; the next one is different so we need to
    272 * print what we collected so far.
    273 */
    274static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
    275		      u64 val)
    276{
    277	struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
    278	pgprotval_t new_prot, new_eff;
    279	pgprotval_t cur, eff;
    280	static const char units[] = "BKMGTPE";
    281	struct seq_file *m = st->seq;
    282
    283	new_prot = val & PTE_FLAGS_MASK;
    284	if (!val)
    285		new_eff = 0;
    286	else
    287		new_eff = st->prot_levels[level];
    288
    289	/*
    290	 * If we have a "break" in the series, we need to flush the state that
    291	 * we have now. "break" is either changing perms, levels or
    292	 * address space marker.
    293	 */
    294	cur = st->current_prot;
    295	eff = st->effective_prot;
    296
    297	if (st->level == -1) {
    298		/* First entry */
    299		st->current_prot = new_prot;
    300		st->effective_prot = new_eff;
    301		st->level = level;
    302		st->marker = address_markers;
    303		st->lines = 0;
    304		pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
    305				   st->marker->name);
    306	} else if (new_prot != cur || new_eff != eff || level != st->level ||
    307		   addr >= st->marker[1].start_address) {
    308		const char *unit = units;
    309		unsigned long delta;
    310		int width = sizeof(unsigned long) * 2;
    311
    312		if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
    313			note_wx(st, addr);
    314
    315		/*
    316		 * Now print the actual finished series
    317		 */
    318		if (!st->marker->max_lines ||
    319		    st->lines < st->marker->max_lines) {
    320			pt_dump_seq_printf(m, st->to_dmesg,
    321					   "0x%0*lx-0x%0*lx   ",
    322					   width, st->start_address,
    323					   width, addr);
    324
    325			delta = addr - st->start_address;
    326			while (!(delta & 1023) && unit[1]) {
    327				delta >>= 10;
    328				unit++;
    329			}
    330			pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ",
    331					    delta, *unit);
    332			printk_prot(m, st->current_prot, st->level,
    333				    st->to_dmesg);
    334		}
    335		st->lines++;
    336
    337		/*
    338		 * We print markers for special areas of address space,
    339		 * such as the start of vmalloc space etc.
    340		 * This helps in the interpretation.
    341		 */
    342		if (addr >= st->marker[1].start_address) {
    343			if (st->marker->max_lines &&
    344			    st->lines > st->marker->max_lines) {
    345				unsigned long nskip =
    346					st->lines - st->marker->max_lines;
    347				pt_dump_seq_printf(m, st->to_dmesg,
    348						   "... %lu entr%s skipped ... \n",
    349						   nskip,
    350						   nskip == 1 ? "y" : "ies");
    351			}
    352			st->marker++;
    353			st->lines = 0;
    354			pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
    355					   st->marker->name);
    356		}
    357
    358		st->start_address = addr;
    359		st->current_prot = new_prot;
    360		st->effective_prot = new_eff;
    361		st->level = level;
    362	}
    363}
    364
    365static void ptdump_walk_pgd_level_core(struct seq_file *m,
    366				       struct mm_struct *mm, pgd_t *pgd,
    367				       bool checkwx, bool dmesg)
    368{
    369	const struct ptdump_range ptdump_ranges[] = {
    370#ifdef CONFIG_X86_64
    371	{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
    372	{GUARD_HOLE_END_ADDR, ~0UL},
    373#else
    374	{0, ~0UL},
    375#endif
    376	{0, 0}
    377};
    378
    379	struct pg_state st = {
    380		.ptdump = {
    381			.note_page	= note_page,
    382			.effective_prot = effective_prot,
    383			.range		= ptdump_ranges
    384		},
    385		.level = -1,
    386		.to_dmesg	= dmesg,
    387		.check_wx	= checkwx,
    388		.seq		= m
    389	};
    390
    391	ptdump_walk_pgd(&st.ptdump, mm, pgd);
    392
    393	if (!checkwx)
    394		return;
    395	if (st.wx_pages)
    396		pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
    397			st.wx_pages);
    398	else
    399		pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
    400}
    401
    402void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm)
    403{
    404	ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true);
    405}
    406
    407void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
    408				   bool user)
    409{
    410	pgd_t *pgd = mm->pgd;
    411#ifdef CONFIG_PAGE_TABLE_ISOLATION
    412	if (user && boot_cpu_has(X86_FEATURE_PTI))
    413		pgd = kernel_to_user_pgdp(pgd);
    414#endif
    415	ptdump_walk_pgd_level_core(m, mm, pgd, false, false);
    416}
    417EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
    418
    419void ptdump_walk_user_pgd_level_checkwx(void)
    420{
    421#ifdef CONFIG_PAGE_TABLE_ISOLATION
    422	pgd_t *pgd = INIT_PGD;
    423
    424	if (!(__supported_pte_mask & _PAGE_NX) ||
    425	    !boot_cpu_has(X86_FEATURE_PTI))
    426		return;
    427
    428	pr_info("x86/mm: Checking user space page tables\n");
    429	pgd = kernel_to_user_pgdp(pgd);
    430	ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false);
    431#endif
    432}
    433
    434void ptdump_walk_pgd_level_checkwx(void)
    435{
    436	ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false);
    437}
    438
    439static int __init pt_dump_init(void)
    440{
    441	/*
    442	 * Various markers are not compile-time constants, so assign them
    443	 * here.
    444	 */
    445#ifdef CONFIG_X86_64
    446	address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
    447	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
    448	address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
    449#ifdef CONFIG_MODIFY_LDT_SYSCALL
    450	address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
    451#endif
    452#ifdef CONFIG_KASAN
    453	address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
    454	address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
    455#endif
    456#endif
    457#ifdef CONFIG_X86_32
    458	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
    459	address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
    460# ifdef CONFIG_HIGHMEM
    461	address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
    462# endif
    463	address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
    464	address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
    465# ifdef CONFIG_MODIFY_LDT_SYSCALL
    466	address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
    467# endif
    468#endif
    469	return 0;
    470}
    471__initcall(pt_dump_init);