cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vdso.c (11545B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2
      3/*
      4 *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
      5 *			 <benh@kernel.crashing.org>
      6 */
      7
      8#include <linux/errno.h>
      9#include <linux/sched.h>
     10#include <linux/kernel.h>
     11#include <linux/mm.h>
     12#include <linux/smp.h>
     13#include <linux/stddef.h>
     14#include <linux/unistd.h>
     15#include <linux/slab.h>
     16#include <linux/user.h>
     17#include <linux/elf.h>
     18#include <linux/security.h>
     19#include <linux/memblock.h>
     20#include <linux/syscalls.h>
     21#include <linux/time_namespace.h>
     22#include <vdso/datapage.h>
     23
     24#include <asm/syscall.h>
     25#include <asm/processor.h>
     26#include <asm/mmu.h>
     27#include <asm/mmu_context.h>
     28#include <asm/machdep.h>
     29#include <asm/cputable.h>
     30#include <asm/sections.h>
     31#include <asm/firmware.h>
     32#include <asm/vdso.h>
     33#include <asm/vdso_datapage.h>
     34#include <asm/setup.h>
     35
     36/* The alignment of the vDSO */
     37#define VDSO_ALIGNMENT	(1 << 16)
     38
     39extern char vdso32_start, vdso32_end;
     40extern char vdso64_start, vdso64_end;
     41
     42/*
     43 * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
     44 * Once the early boot kernel code no longer needs to muck around
     45 * with it, it will become dynamically allocated
     46 */
     47static union {
     48	struct vdso_arch_data	data;
     49	u8			page[PAGE_SIZE];
     50} vdso_data_store __page_aligned_data;
     51struct vdso_arch_data *vdso_data = &vdso_data_store.data;
     52
     53enum vvar_pages {
     54	VVAR_DATA_PAGE_OFFSET,
     55	VVAR_TIMENS_PAGE_OFFSET,
     56	VVAR_NR_PAGES,
     57};
     58
     59static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma,
     60		       unsigned long text_size)
     61{
     62	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
     63
     64	if (new_size != text_size)
     65		return -EINVAL;
     66
     67	current->mm->context.vdso = (void __user *)new_vma->vm_start;
     68
     69	return 0;
     70}
     71
     72static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
     73{
     74	return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start);
     75}
     76
     77static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
     78{
     79	return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
     80}
     81
     82static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
     83			     struct vm_area_struct *vma, struct vm_fault *vmf);
     84
     85static struct vm_special_mapping vvar_spec __ro_after_init = {
     86	.name = "[vvar]",
     87	.fault = vvar_fault,
     88};
     89
     90static struct vm_special_mapping vdso32_spec __ro_after_init = {
     91	.name = "[vdso]",
     92	.mremap = vdso32_mremap,
     93};
     94
     95static struct vm_special_mapping vdso64_spec __ro_after_init = {
     96	.name = "[vdso]",
     97	.mremap = vdso64_mremap,
     98};
     99
    100#ifdef CONFIG_TIME_NS
    101struct vdso_data *arch_get_vdso_data(void *vvar_page)
    102{
    103	return ((struct vdso_arch_data *)vvar_page)->data;
    104}
    105
    106/*
    107 * The vvar mapping contains data for a specific time namespace, so when a task
    108 * changes namespace we must unmap its vvar data for the old namespace.
    109 * Subsequent faults will map in data for the new namespace.
    110 *
    111 * For more details see timens_setup_vdso_data().
    112 */
    113int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
    114{
    115	struct mm_struct *mm = task->mm;
    116	struct vm_area_struct *vma;
    117
    118	mmap_read_lock(mm);
    119
    120	for (vma = mm->mmap; vma; vma = vma->vm_next) {
    121		unsigned long size = vma->vm_end - vma->vm_start;
    122
    123		if (vma_is_special_mapping(vma, &vvar_spec))
    124			zap_page_range(vma, vma->vm_start, size);
    125	}
    126
    127	mmap_read_unlock(mm);
    128	return 0;
    129}
    130
    131static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
    132{
    133	if (likely(vma->vm_mm == current->mm))
    134		return current->nsproxy->time_ns->vvar_page;
    135
    136	/*
    137	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
    138	 * through interfaces like /proc/$pid/mem or
    139	 * process_vm_{readv,writev}() as long as there's no .access()
    140	 * in special_mapping_vmops.
    141	 * For more details check_vma_flags() and __access_remote_vm()
    142	 */
    143	WARN(1, "vvar_page accessed remotely");
    144
    145	return NULL;
    146}
    147#else
    148static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
    149{
    150	return NULL;
    151}
    152#endif
    153
    154static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
    155			     struct vm_area_struct *vma, struct vm_fault *vmf)
    156{
    157	struct page *timens_page = find_timens_vvar_page(vma);
    158	unsigned long pfn;
    159
    160	switch (vmf->pgoff) {
    161	case VVAR_DATA_PAGE_OFFSET:
    162		if (timens_page)
    163			pfn = page_to_pfn(timens_page);
    164		else
    165			pfn = virt_to_pfn(vdso_data);
    166		break;
    167#ifdef CONFIG_TIME_NS
    168	case VVAR_TIMENS_PAGE_OFFSET:
    169		/*
    170		 * If a task belongs to a time namespace then a namespace
    171		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
    172		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
    173		 * offset.
    174		 * See also the comment near timens_setup_vdso_data().
    175		 */
    176		if (!timens_page)
    177			return VM_FAULT_SIGBUS;
    178		pfn = virt_to_pfn(vdso_data);
    179		break;
    180#endif /* CONFIG_TIME_NS */
    181	default:
    182		return VM_FAULT_SIGBUS;
    183	}
    184
    185	return vmf_insert_pfn(vma, vmf->address, pfn);
    186}
    187
    188/*
    189 * This is called from binfmt_elf, we create the special vma for the
    190 * vDSO and insert it into the mm struct tree
    191 */
    192static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
    193{
    194	unsigned long vdso_size, vdso_base, mappings_size;
    195	struct vm_special_mapping *vdso_spec;
    196	unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE;
    197	struct mm_struct *mm = current->mm;
    198	struct vm_area_struct *vma;
    199
    200	if (is_32bit_task()) {
    201		vdso_spec = &vdso32_spec;
    202		vdso_size = &vdso32_end - &vdso32_start;
    203		vdso_base = VDSO32_MBASE;
    204	} else {
    205		vdso_spec = &vdso64_spec;
    206		vdso_size = &vdso64_end - &vdso64_start;
    207		/*
    208		 * On 64bit we don't have a preferred map address. This
    209		 * allows get_unmapped_area to find an area near other mmaps
    210		 * and most likely share a SLB entry.
    211		 */
    212		vdso_base = 0;
    213	}
    214
    215	mappings_size = vdso_size + vvar_size;
    216	mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK;
    217
    218	/*
    219	 * pick a base address for the vDSO in process space. We try to put it
    220	 * at vdso_base which is the "natural" base for it, but we might fail
    221	 * and end up putting it elsewhere.
    222	 * Add enough to the size so that the result can be aligned.
    223	 */
    224	vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0);
    225	if (IS_ERR_VALUE(vdso_base))
    226		return vdso_base;
    227
    228	/* Add required alignment. */
    229	vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
    230
    231	/*
    232	 * Put vDSO base into mm struct. We need to do this before calling
    233	 * install_special_mapping or the perf counter mmap tracking code
    234	 * will fail to recognise it as a vDSO.
    235	 */
    236	mm->context.vdso = (void __user *)vdso_base + vvar_size;
    237
    238	vma = _install_special_mapping(mm, vdso_base, vvar_size,
    239				       VM_READ | VM_MAYREAD | VM_IO |
    240				       VM_DONTDUMP | VM_PFNMAP, &vvar_spec);
    241	if (IS_ERR(vma))
    242		return PTR_ERR(vma);
    243
    244	/*
    245	 * our vma flags don't have VM_WRITE so by default, the process isn't
    246	 * allowed to write those pages.
    247	 * gdb can break that with ptrace interface, and thus trigger COW on
    248	 * those pages but it's then your responsibility to never do that on
    249	 * the "data" page of the vDSO or you'll stop getting kernel updates
    250	 * and your nice userland gettimeofday will be totally dead.
    251	 * It's fine to use that for setting breakpoints in the vDSO code
    252	 * pages though.
    253	 */
    254	vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
    255				       VM_READ | VM_EXEC | VM_MAYREAD |
    256				       VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
    257	if (IS_ERR(vma))
    258		do_munmap(mm, vdso_base, vvar_size, NULL);
    259
    260	return PTR_ERR_OR_ZERO(vma);
    261}
    262
    263int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
    264{
    265	struct mm_struct *mm = current->mm;
    266	int rc;
    267
    268	mm->context.vdso = NULL;
    269
    270	if (mmap_write_lock_killable(mm))
    271		return -EINTR;
    272
    273	rc = __arch_setup_additional_pages(bprm, uses_interp);
    274	if (rc)
    275		mm->context.vdso = NULL;
    276
    277	mmap_write_unlock(mm);
    278	return rc;
    279}
    280
    281#define VDSO_DO_FIXUPS(type, value, bits, sec) do {					\
    282	void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start);	\
    283	void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end);	\
    284											\
    285	do_##type##_fixups((value), __start, __end);					\
    286} while (0)
    287
    288static void __init vdso_fixup_features(void)
    289{
    290#ifdef CONFIG_PPC64
    291	VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup);
    292	VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup);
    293	VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup);
    294	VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup);
    295#endif /* CONFIG_PPC64 */
    296
    297#ifdef CONFIG_VDSO32
    298	VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup);
    299	VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup);
    300#ifdef CONFIG_PPC64
    301	VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup);
    302#endif /* CONFIG_PPC64 */
    303	VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup);
    304#endif
    305}
    306
    307/*
    308 * Called from setup_arch to initialize the bitmap of available
    309 * syscalls in the systemcfg page
    310 */
    311static void __init vdso_setup_syscall_map(void)
    312{
    313	unsigned int i;
    314
    315	for (i = 0; i < NR_syscalls; i++) {
    316		if (sys_call_table[i] != (unsigned long)&sys_ni_syscall)
    317			vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
    318		if (IS_ENABLED(CONFIG_COMPAT) &&
    319		    compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall)
    320			vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
    321	}
    322}
    323
    324#ifdef CONFIG_PPC64
    325int vdso_getcpu_init(void)
    326{
    327	unsigned long cpu, node, val;
    328
    329	/*
    330	 * SPRG_VDSO contains the CPU in the bottom 16 bits and the NUMA node
    331	 * in the next 16 bits.  The VDSO uses this to implement getcpu().
    332	 */
    333	cpu = get_cpu();
    334	WARN_ON_ONCE(cpu > 0xffff);
    335
    336	node = cpu_to_node(cpu);
    337	WARN_ON_ONCE(node > 0xffff);
    338
    339	val = (cpu & 0xffff) | ((node & 0xffff) << 16);
    340	mtspr(SPRN_SPRG_VDSO_WRITE, val);
    341	get_paca()->sprg_vdso = val;
    342
    343	put_cpu();
    344
    345	return 0;
    346}
    347/* We need to call this before SMP init */
    348early_initcall(vdso_getcpu_init);
    349#endif
    350
    351static struct page ** __init vdso_setup_pages(void *start, void *end)
    352{
    353	int i;
    354	struct page **pagelist;
    355	int pages = (end - start) >> PAGE_SHIFT;
    356
    357	pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
    358	if (!pagelist)
    359		panic("%s: Cannot allocate page list for VDSO", __func__);
    360
    361	for (i = 0; i < pages; i++)
    362		pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
    363
    364	return pagelist;
    365}
    366
    367static int __init vdso_init(void)
    368{
    369#ifdef CONFIG_PPC64
    370	/*
    371	 * Fill up the "systemcfg" stuff for backward compatibility
    372	 */
    373	strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
    374	vdso_data->version.major = SYSTEMCFG_MAJOR;
    375	vdso_data->version.minor = SYSTEMCFG_MINOR;
    376	vdso_data->processor = mfspr(SPRN_PVR);
    377	/*
    378	 * Fake the old platform number for pSeries and add
    379	 * in LPAR bit if necessary
    380	 */
    381	vdso_data->platform = 0x100;
    382	if (firmware_has_feature(FW_FEATURE_LPAR))
    383		vdso_data->platform |= 1;
    384	vdso_data->physicalMemorySize = memblock_phys_mem_size();
    385	vdso_data->dcache_size = ppc64_caches.l1d.size;
    386	vdso_data->dcache_line_size = ppc64_caches.l1d.line_size;
    387	vdso_data->icache_size = ppc64_caches.l1i.size;
    388	vdso_data->icache_line_size = ppc64_caches.l1i.line_size;
    389	vdso_data->dcache_block_size = ppc64_caches.l1d.block_size;
    390	vdso_data->icache_block_size = ppc64_caches.l1i.block_size;
    391	vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size;
    392	vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size;
    393#endif /* CONFIG_PPC64 */
    394
    395	vdso_setup_syscall_map();
    396
    397	vdso_fixup_features();
    398
    399	if (IS_ENABLED(CONFIG_VDSO32))
    400		vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end);
    401
    402	if (IS_ENABLED(CONFIG_PPC64))
    403		vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end);
    404
    405	smp_wmb();
    406
    407	return 0;
    408}
    409arch_initcall(vdso_init);