cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

machine_kexec.c (9570B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * kexec for arm64
      4 *
      5 * Copyright (C) Linaro.
      6 * Copyright (C) Huawei Futurewei Technologies.
      7 */
      8
      9#include <linux/interrupt.h>
     10#include <linux/irq.h>
     11#include <linux/kernel.h>
     12#include <linux/kexec.h>
     13#include <linux/page-flags.h>
     14#include <linux/set_memory.h>
     15#include <linux/smp.h>
     16
     17#include <asm/cacheflush.h>
     18#include <asm/cpu_ops.h>
     19#include <asm/daifflags.h>
     20#include <asm/memory.h>
     21#include <asm/mmu.h>
     22#include <asm/mmu_context.h>
     23#include <asm/page.h>
     24#include <asm/sections.h>
     25#include <asm/trans_pgd.h>
     26
     27/**
     28 * kexec_image_info - For debugging output.
     29 */
     30#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
     31static void _kexec_image_info(const char *func, int line,
     32	const struct kimage *kimage)
     33{
     34	unsigned long i;
     35
     36	pr_debug("%s:%d:\n", func, line);
     37	pr_debug("  kexec kimage info:\n");
     38	pr_debug("    type:        %d\n", kimage->type);
     39	pr_debug("    start:       %lx\n", kimage->start);
     40	pr_debug("    head:        %lx\n", kimage->head);
     41	pr_debug("    nr_segments: %lu\n", kimage->nr_segments);
     42	pr_debug("    dtb_mem: %pa\n", &kimage->arch.dtb_mem);
     43	pr_debug("    kern_reloc: %pa\n", &kimage->arch.kern_reloc);
     44	pr_debug("    el2_vectors: %pa\n", &kimage->arch.el2_vectors);
     45
     46	for (i = 0; i < kimage->nr_segments; i++) {
     47		pr_debug("      segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
     48			i,
     49			kimage->segment[i].mem,
     50			kimage->segment[i].mem + kimage->segment[i].memsz,
     51			kimage->segment[i].memsz,
     52			kimage->segment[i].memsz /  PAGE_SIZE);
     53	}
     54}
     55
     56void machine_kexec_cleanup(struct kimage *kimage)
     57{
     58	/* Empty routine needed to avoid build errors. */
     59}
     60
     61/**
     62 * machine_kexec_prepare - Prepare for a kexec reboot.
     63 *
     64 * Called from the core kexec code when a kernel image is loaded.
     65 * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
     66 * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
     67 */
     68int machine_kexec_prepare(struct kimage *kimage)
     69{
     70	if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
     71		pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
     72		return -EBUSY;
     73	}
     74
     75	return 0;
     76}
     77
     78/**
     79 * kexec_segment_flush - Helper to flush the kimage segments to PoC.
     80 */
     81static void kexec_segment_flush(const struct kimage *kimage)
     82{
     83	unsigned long i;
     84
     85	pr_debug("%s:\n", __func__);
     86
     87	for (i = 0; i < kimage->nr_segments; i++) {
     88		pr_debug("  segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
     89			i,
     90			kimage->segment[i].mem,
     91			kimage->segment[i].mem + kimage->segment[i].memsz,
     92			kimage->segment[i].memsz,
     93			kimage->segment[i].memsz /  PAGE_SIZE);
     94
     95		dcache_clean_inval_poc(
     96			(unsigned long)phys_to_virt(kimage->segment[i].mem),
     97			(unsigned long)phys_to_virt(kimage->segment[i].mem) +
     98				kimage->segment[i].memsz);
     99	}
    100}
    101
    102/* Allocates pages for kexec page table */
    103static void *kexec_page_alloc(void *arg)
    104{
    105	struct kimage *kimage = (struct kimage *)arg;
    106	struct page *page = kimage_alloc_control_pages(kimage, 0);
    107	void *vaddr = NULL;
    108
    109	if (!page)
    110		return NULL;
    111
    112	vaddr = page_address(page);
    113	memset(vaddr, 0, PAGE_SIZE);
    114
    115	return vaddr;
    116}
    117
    118int machine_kexec_post_load(struct kimage *kimage)
    119{
    120	int rc;
    121	pgd_t *trans_pgd;
    122	void *reloc_code = page_to_virt(kimage->control_code_page);
    123	long reloc_size;
    124	struct trans_pgd_info info = {
    125		.trans_alloc_page	= kexec_page_alloc,
    126		.trans_alloc_arg	= kimage,
    127	};
    128
    129	/* If in place, relocation is not used, only flush next kernel */
    130	if (kimage->head & IND_DONE) {
    131		kexec_segment_flush(kimage);
    132		kexec_image_info(kimage);
    133		return 0;
    134	}
    135
    136	kimage->arch.el2_vectors = 0;
    137	if (is_hyp_nvhe()) {
    138		rc = trans_pgd_copy_el2_vectors(&info,
    139						&kimage->arch.el2_vectors);
    140		if (rc)
    141			return rc;
    142	}
    143
    144	/* Create a copy of the linear map */
    145	trans_pgd = kexec_page_alloc(kimage);
    146	if (!trans_pgd)
    147		return -ENOMEM;
    148	rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END);
    149	if (rc)
    150		return rc;
    151	kimage->arch.ttbr1 = __pa(trans_pgd);
    152	kimage->arch.zero_page = __pa_symbol(empty_zero_page);
    153
    154	reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
    155	memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);
    156	kimage->arch.kern_reloc = __pa(reloc_code);
    157	rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0,
    158				  &kimage->arch.t0sz, reloc_code);
    159	if (rc)
    160		return rc;
    161	kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage;
    162
    163	/* Flush the reloc_code in preparation for its execution. */
    164	dcache_clean_inval_poc((unsigned long)reloc_code,
    165			       (unsigned long)reloc_code + reloc_size);
    166	icache_inval_pou((uintptr_t)reloc_code,
    167			 (uintptr_t)reloc_code + reloc_size);
    168	kexec_image_info(kimage);
    169
    170	return 0;
    171}
    172
    173/**
    174 * machine_kexec - Do the kexec reboot.
    175 *
    176 * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
    177 */
    178void machine_kexec(struct kimage *kimage)
    179{
    180	bool in_kexec_crash = (kimage == kexec_crash_image);
    181	bool stuck_cpus = cpus_are_stuck_in_kernel();
    182
    183	/*
    184	 * New cpus may have become stuck_in_kernel after we loaded the image.
    185	 */
    186	BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
    187	WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
    188		"Some CPUs may be stale, kdump will be unreliable.\n");
    189
    190	pr_info("Bye!\n");
    191
    192	local_daif_mask();
    193
    194	/*
    195	 * Both restart and kernel_reloc will shutdown the MMU, disable data
    196	 * caches. However, restart will start new kernel or purgatory directly,
    197	 * kernel_reloc contains the body of arm64_relocate_new_kernel
    198	 * In kexec case, kimage->start points to purgatory assuming that
    199	 * kernel entry and dtb address are embedded in purgatory by
    200	 * userspace (kexec-tools).
    201	 * In kexec_file case, the kernel starts directly without purgatory.
    202	 */
    203	if (kimage->head & IND_DONE) {
    204		typeof(cpu_soft_restart) *restart;
    205
    206		cpu_install_idmap();
    207		restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart));
    208		restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem,
    209			0, 0);
    210	} else {
    211		void (*kernel_reloc)(struct kimage *kimage);
    212
    213		if (is_hyp_nvhe())
    214			__hyp_set_vectors(kimage->arch.el2_vectors);
    215		cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz);
    216		kernel_reloc = (void *)kimage->arch.kern_reloc;
    217		kernel_reloc(kimage);
    218	}
    219
    220	BUG(); /* Should never get here. */
    221}
    222
    223static void machine_kexec_mask_interrupts(void)
    224{
    225	unsigned int i;
    226	struct irq_desc *desc;
    227
    228	for_each_irq_desc(i, desc) {
    229		struct irq_chip *chip;
    230		int ret;
    231
    232		chip = irq_desc_get_chip(desc);
    233		if (!chip)
    234			continue;
    235
    236		/*
    237		 * First try to remove the active state. If this
    238		 * fails, try to EOI the interrupt.
    239		 */
    240		ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
    241
    242		if (ret && irqd_irq_inprogress(&desc->irq_data) &&
    243		    chip->irq_eoi)
    244			chip->irq_eoi(&desc->irq_data);
    245
    246		if (chip->irq_mask)
    247			chip->irq_mask(&desc->irq_data);
    248
    249		if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
    250			chip->irq_disable(&desc->irq_data);
    251	}
    252}
    253
    254/**
    255 * machine_crash_shutdown - shutdown non-crashing cpus and save registers
    256 */
    257void machine_crash_shutdown(struct pt_regs *regs)
    258{
    259	local_irq_disable();
    260
    261	/* shutdown non-crashing cpus */
    262	crash_smp_send_stop();
    263
    264	/* for crashing cpu */
    265	crash_save_cpu(regs, smp_processor_id());
    266	machine_kexec_mask_interrupts();
    267
    268	pr_info("Starting crashdump kernel...\n");
    269}
    270
    271void arch_kexec_protect_crashkres(void)
    272{
    273	int i;
    274
    275	for (i = 0; i < kexec_crash_image->nr_segments; i++)
    276		set_memory_valid(
    277			__phys_to_virt(kexec_crash_image->segment[i].mem),
    278			kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
    279}
    280
    281void arch_kexec_unprotect_crashkres(void)
    282{
    283	int i;
    284
    285	for (i = 0; i < kexec_crash_image->nr_segments; i++)
    286		set_memory_valid(
    287			__phys_to_virt(kexec_crash_image->segment[i].mem),
    288			kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
    289}
    290
    291#ifdef CONFIG_HIBERNATION
    292/*
    293 * To preserve the crash dump kernel image, the relevant memory segments
    294 * should be mapped again around the hibernation.
    295 */
    296void crash_prepare_suspend(void)
    297{
    298	if (kexec_crash_image)
    299		arch_kexec_unprotect_crashkres();
    300}
    301
    302void crash_post_resume(void)
    303{
    304	if (kexec_crash_image)
    305		arch_kexec_protect_crashkres();
    306}
    307
    308/*
    309 * crash_is_nosave
    310 *
    311 * Return true only if a page is part of reserved memory for crash dump kernel,
    312 * but does not hold any data of loaded kernel image.
    313 *
    314 * Note that all the pages in crash dump kernel memory have been initially
    315 * marked as Reserved as memory was allocated via memblock_reserve().
    316 *
    317 * In hibernation, the pages which are Reserved and yet "nosave" are excluded
    318 * from the hibernation iamge. crash_is_nosave() does thich check for crash
    319 * dump kernel and will reduce the total size of hibernation image.
    320 */
    321
    322bool crash_is_nosave(unsigned long pfn)
    323{
    324	int i;
    325	phys_addr_t addr;
    326
    327	if (!crashk_res.end)
    328		return false;
    329
    330	/* in reserved memory? */
    331	addr = __pfn_to_phys(pfn);
    332	if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
    333		if (!crashk_low_res.end)
    334			return false;
    335
    336		if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
    337			return false;
    338	}
    339
    340	if (!kexec_crash_image)
    341		return true;
    342
    343	/* not part of loaded kernel image? */
    344	for (i = 0; i < kexec_crash_image->nr_segments; i++)
    345		if (addr >= kexec_crash_image->segment[i].mem &&
    346				addr < (kexec_crash_image->segment[i].mem +
    347					kexec_crash_image->segment[i].memsz))
    348			return false;
    349
    350	return true;
    351}
    352
    353void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
    354{
    355	unsigned long addr;
    356	struct page *page;
    357
    358	for (addr = begin; addr < end; addr += PAGE_SIZE) {
    359		page = phys_to_page(addr);
    360		free_reserved_page(page);
    361	}
    362}
    363#endif /* CONFIG_HIBERNATION */