cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

proc-xsc3.S (14217B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * linux/arch/arm/mm/proc-xsc3.S
      4 *
      5 * Original Author: Matthew Gilbert
      6 * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org>
      7 *
      8 * Copyright 2004 (C) Intel Corp.
      9 * Copyright 2005 (C) MontaVista Software, Inc.
     10 *
     11 * MMU functions for the Intel XScale3 Core (XSC3).  The XSC3 core is
     12 * an extension to Intel's original XScale core that adds the following
     13 * features:
     14 *
     15 * - ARMv6 Supersections
     16 * - Low Locality Reference pages (replaces mini-cache)
     17 * - 36-bit addressing
     18 * - L2 cache
     19 * - Cache coherency if chipset supports it
     20 *
     21 * Based on original XScale code by Nicolas Pitre.
     22 */
     23
     24#include <linux/linkage.h>
     25#include <linux/init.h>
     26#include <linux/pgtable.h>
     27#include <asm/assembler.h>
     28#include <asm/hwcap.h>
     29#include <asm/pgtable-hwdef.h>
     30#include <asm/page.h>
     31#include <asm/ptrace.h>
     32#include "proc-macros.S"
     33
     34/*
     35 * This is the maximum size of an area which will be flushed.  If the
     36 * area is larger than this, then we flush the whole cache.
     37 */
     38#define MAX_AREA_SIZE	32768
     39
     40/*
     41 * The cache line size of the L1 I, L1 D and unified L2 cache.
     42 */
     43#define CACHELINESIZE	32
     44
     45/*
     46 * The size of the L1 D cache.
     47 */
     48#define CACHESIZE	32768
     49
     50/*
     51 * This macro is used to wait for a CP15 write and is needed when we
     52 * have to ensure that the last operation to the coprocessor was
     53 * completed before continuing with operation.
     54 */
     55	.macro	cpwait_ret, lr, rd
     56	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
     57	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
     58						@ flush instruction pipeline
     59	.endm
     60
     61/*
     62 * This macro cleans and invalidates the entire L1 D cache.
     63 */
     64
     65 	.macro  clean_d_cache rd, rs
     66	mov	\rd, #0x1f00
     67	orr	\rd, \rd, #0x00e0
     681:	mcr	p15, 0, \rd, c7, c14, 2		@ clean/invalidate L1 D line
     69	adds	\rd, \rd, #0x40000000
     70	bcc	1b
     71	subs	\rd, \rd, #0x20
     72	bpl	1b
     73	.endm
     74
     75	.text
     76
     77/*
     78 * cpu_xsc3_proc_init()
     79 *
     80 * Nothing too exciting at the moment
     81 */
     82ENTRY(cpu_xsc3_proc_init)
     83	ret	lr
     84
     85/*
     86 * cpu_xsc3_proc_fin()
     87 */
     88ENTRY(cpu_xsc3_proc_fin)
     89	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
     90	bic	r0, r0, #0x1800			@ ...IZ...........
     91	bic	r0, r0, #0x0006			@ .............CA.
     92	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
     93	ret	lr
     94
     95/*
     96 * cpu_xsc3_reset(loc)
     97 *
     98 * Perform a soft reset of the system.  Put the CPU into the
     99 * same state as it would be if it had been reset, and branch
    100 * to what would be the reset vector.
    101 *
    102 * loc: location to jump to for soft reset
    103 */
    104	.align	5
    105	.pushsection	.idmap.text, "ax"
    106ENTRY(cpu_xsc3_reset)
    107	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
    108	msr	cpsr_c, r1			@ reset CPSR
    109	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
    110	bic	r1, r1, #0x3900			@ ..VIZ..S........
    111	bic	r1, r1, #0x0086			@ ........B....CA.
    112	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
    113	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
    114	bic	r1, r1, #0x0001			@ ...............M
    115	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
    116	@ CAUTION: MMU turned off from this point.  We count on the pipeline
    117	@ already containing those two last instructions to survive.
    118	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
    119	ret	r0
    120ENDPROC(cpu_xsc3_reset)
    121	.popsection
    122
    123/*
    124 * cpu_xsc3_do_idle()
    125 *
    126 * Cause the processor to idle
    127 *
    128 * For now we do nothing but go to idle mode for every case
    129 *
    130 * XScale supports clock switching, but using idle mode support
    131 * allows external hardware to react to system state changes.
    132 */
    133	.align	5
    134
    135ENTRY(cpu_xsc3_do_idle)
    136	mov	r0, #1
    137	mcr	p14, 0, r0, c7, c0, 0		@ go to idle
    138	ret	lr
    139
    140/* ================================= CACHE ================================ */
    141
    142/*
    143 *	flush_icache_all()
    144 *
    145 *	Unconditionally clean and invalidate the entire icache.
    146 */
    147ENTRY(xsc3_flush_icache_all)
    148	mov	r0, #0
    149	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
    150	ret	lr
    151ENDPROC(xsc3_flush_icache_all)
    152
    153/*
    154 *	flush_user_cache_all()
    155 *
    156 *	Invalidate all cache entries in a particular address
    157 *	space.
    158 */
    159ENTRY(xsc3_flush_user_cache_all)
    160	/* FALLTHROUGH */
    161
    162/*
    163 *	flush_kern_cache_all()
    164 *
    165 *	Clean and invalidate the entire cache.
    166 */
    167ENTRY(xsc3_flush_kern_cache_all)
    168	mov	r2, #VM_EXEC
    169	mov	ip, #0
    170__flush_whole_cache:
    171	clean_d_cache r0, r1
    172	tst	r2, #VM_EXEC
    173	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
    174	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
    175	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
    176	ret	lr
    177
    178/*
    179 *	flush_user_cache_range(start, end, vm_flags)
    180 *
    181 *	Invalidate a range of cache entries in the specified
    182 *	address space.
    183 *
    184 *	- start - start address (may not be aligned)
    185 *	- end	- end address (exclusive, may not be aligned)
    186 *	- vma	- vma_area_struct describing address space
    187 */
    188	.align	5
    189ENTRY(xsc3_flush_user_cache_range)
    190	mov	ip, #0
    191	sub	r3, r1, r0			@ calculate total size
    192	cmp	r3, #MAX_AREA_SIZE
    193	bhs	__flush_whole_cache
    194
    1951:	tst	r2, #VM_EXEC
    196	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate L1 I line
    197	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
    198	add	r0, r0, #CACHELINESIZE
    199	cmp	r0, r1
    200	blo	1b
    201	tst	r2, #VM_EXEC
    202	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
    203	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
    204	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
    205	ret	lr
    206
    207/*
    208 *	coherent_kern_range(start, end)
    209 *
    210 *	Ensure coherency between the I cache and the D cache in the
    211 *	region described by start.  If you have non-snooping
    212 *	Harvard caches, you need to implement this function.
    213 *
    214 *	- start  - virtual start address
    215 *	- end	 - virtual end address
    216 *
    217 *	Note: single I-cache line invalidation isn't used here since
    218 *	it also trashes the mini I-cache used by JTAG debuggers.
    219 */
    220ENTRY(xsc3_coherent_kern_range)
    221/* FALLTHROUGH */
    222ENTRY(xsc3_coherent_user_range)
    223	bic	r0, r0, #CACHELINESIZE - 1
    2241:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
    225	add	r0, r0, #CACHELINESIZE
    226	cmp	r0, r1
    227	blo	1b
    228	mov	r0, #0
    229	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
    230	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
    231	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
    232	ret	lr
    233
    234/*
    235 *	flush_kern_dcache_area(void *addr, size_t size)
    236 *
    237 *	Ensure no D cache aliasing occurs, either with itself or
    238 *	the I cache.
    239 *
    240 *	- addr	- kernel address
    241 *	- size	- region size
    242 */
    243ENTRY(xsc3_flush_kern_dcache_area)
    244	add	r1, r0, r1
    2451:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
    246	add	r0, r0, #CACHELINESIZE
    247	cmp	r0, r1
    248	blo	1b
    249	mov	r0, #0
    250	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
    251	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
    252	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
    253	ret	lr
    254
    255/*
    256 *	dma_inv_range(start, end)
    257 *
    258 *	Invalidate (discard) the specified virtual address range.
    259 *	May not write back any entries.  If 'start' or 'end'
    260 *	are not cache line aligned, those lines must be written
    261 *	back.
    262 *
    263 *	- start  - virtual start address
    264 *	- end	 - virtual end address
    265 */
    266xsc3_dma_inv_range:
    267	tst	r0, #CACHELINESIZE - 1
    268	bic	r0, r0, #CACHELINESIZE - 1
    269	mcrne	p15, 0, r0, c7, c10, 1		@ clean L1 D line
    270	tst	r1, #CACHELINESIZE - 1
    271	mcrne	p15, 0, r1, c7, c10, 1		@ clean L1 D line
    2721:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate L1 D line
    273	add	r0, r0, #CACHELINESIZE
    274	cmp	r0, r1
    275	blo	1b
    276	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
    277	ret	lr
    278
    279/*
    280 *	dma_clean_range(start, end)
    281 *
    282 *	Clean the specified virtual address range.
    283 *
    284 *	- start  - virtual start address
    285 *	- end	 - virtual end address
    286 */
    287xsc3_dma_clean_range:
    288	bic	r0, r0, #CACHELINESIZE - 1
    2891:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
    290	add	r0, r0, #CACHELINESIZE
    291	cmp	r0, r1
    292	blo	1b
    293	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
    294	ret	lr
    295
    296/*
    297 *	dma_flush_range(start, end)
    298 *
    299 *	Clean and invalidate the specified virtual address range.
    300 *
    301 *	- start  - virtual start address
    302 *	- end	 - virtual end address
    303 */
    304ENTRY(xsc3_dma_flush_range)
    305	bic	r0, r0, #CACHELINESIZE - 1
    3061:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
    307	add	r0, r0, #CACHELINESIZE
    308	cmp	r0, r1
    309	blo	1b
    310	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
    311	ret	lr
    312
    313/*
    314 *	dma_map_area(start, size, dir)
    315 *	- start	- kernel virtual start address
    316 *	- size	- size of region
    317 *	- dir	- DMA direction
    318 */
    319ENTRY(xsc3_dma_map_area)
    320	add	r1, r1, r0
    321	cmp	r2, #DMA_TO_DEVICE
    322	beq	xsc3_dma_clean_range
    323	bcs	xsc3_dma_inv_range
    324	b	xsc3_dma_flush_range
    325ENDPROC(xsc3_dma_map_area)
    326
    327/*
    328 *	dma_unmap_area(start, size, dir)
    329 *	- start	- kernel virtual start address
    330 *	- size	- size of region
    331 *	- dir	- DMA direction
    332 */
    333ENTRY(xsc3_dma_unmap_area)
    334	ret	lr
    335ENDPROC(xsc3_dma_unmap_area)
    336
    337	.globl	xsc3_flush_kern_cache_louis
    338	.equ	xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all
    339
    340	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
    341	define_cache_functions xsc3
    342
    343ENTRY(cpu_xsc3_dcache_clean_area)
    3441:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
    345	add	r0, r0, #CACHELINESIZE
    346	subs	r1, r1, #CACHELINESIZE
    347	bhi	1b
    348	ret	lr
    349
    350/* =============================== PageTable ============================== */
    351
    352/*
    353 * cpu_xsc3_switch_mm(pgd)
    354 *
    355 * Set the translation base pointer to be as described by pgd.
    356 *
    357 * pgd: new page tables
    358 */
    359	.align	5
    360ENTRY(cpu_xsc3_switch_mm)
    361	clean_d_cache r1, r2
    362	mcr	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
    363	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
    364	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
    365	orr	r0, r0, #0x18			@ cache the page table in L2
    366	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
    367	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
    368	cpwait_ret lr, ip
    369
    370/*
    371 * cpu_xsc3_set_pte_ext(ptep, pte, ext)
    372 *
    373 * Set a PTE and flush it out
    374 */
    375cpu_xsc3_mt_table:
    376	.long	0x00						@ L_PTE_MT_UNCACHED
    377	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
    378	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE			@ L_PTE_MT_WRITETHROUGH
    379	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
    380	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
    381	.long	0x00						@ unused
    382	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
    383	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC (not present?)
    384	.long	0x00						@ unused
    385	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
    386	.long	0x00						@ unused
    387	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
    388	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
    389	.long	0x00						@ unused
    390	.long	0x00						@ unused
    391	.long	0x00						@ unused
    392
    393	.align	5
    394ENTRY(cpu_xsc3_set_pte_ext)
    395	xscale_set_pte_ext_prologue
    396
    397	tst	r1, #L_PTE_SHARED		@ shared?
    398	and	r1, r1, #L_PTE_MT_MASK
    399	adr	ip, cpu_xsc3_mt_table
    400	ldr	ip, [ip, r1]
    401	orrne	r2, r2, #PTE_EXT_COHERENT	@ interlock: mask in coherent bit
    402	bic	r2, r2, #0x0c			@ clear old C,B bits
    403	orr	r2, r2, ip
    404
    405	xscale_set_pte_ext_epilogue
    406	ret	lr
    407
    408	.ltorg
    409	.align
    410
    411.globl	cpu_xsc3_suspend_size
    412.equ	cpu_xsc3_suspend_size, 4 * 6
    413#ifdef CONFIG_ARM_CPU_SUSPEND
    414ENTRY(cpu_xsc3_do_suspend)
    415	stmfd	sp!, {r4 - r9, lr}
    416	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
    417	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
    418	mrc	p15, 0, r6, c13, c0, 0	@ PID
    419	mrc 	p15, 0, r7, c3, c0, 0	@ domain ID
    420	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
    421	mrc 	p15, 0, r9, c1, c0, 0	@ control reg
    422	bic	r4, r4, #2		@ clear frequency change bit
    423	stmia	r0, {r4 - r9}		@ store cp regs
    424	ldmia	sp!, {r4 - r9, pc}
    425ENDPROC(cpu_xsc3_do_suspend)
    426
    427ENTRY(cpu_xsc3_do_resume)
    428	ldmia	r0, {r4 - r9}		@ load cp regs
    429	mov	ip, #0
    430	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
    431	mcr	p15, 0, ip, c7, c10, 4	@ drain write (&fill) buffer
    432	mcr	p15, 0, ip, c7, c5, 4	@ flush prefetch buffer
    433	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
    434	mcr	p14, 0, r4, c6, c0, 0	@ clock configuration, turbo mode.
    435	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
    436	mcr	p15, 0, r6, c13, c0, 0	@ PID
    437	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
    438	orr	r1, r1, #0x18		@ cache the page table in L2
    439	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
    440	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
    441	mov	r0, r9			@ control register
    442	b	cpu_resume_mmu
    443ENDPROC(cpu_xsc3_do_resume)
    444#endif
    445
    446	.type	__xsc3_setup, #function
    447__xsc3_setup:
    448	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
    449	msr	cpsr_c, r0
    450	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
    451	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
    452	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
    453	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
    454	orr	r4, r4, #0x18			@ cache the page table in L2
    455	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
    456
    457	mov	r0, #1 << 6			@ cp6 access for early sched_clock
    458	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
    459
    460	mrc	p15, 0, r0, c1, c0, 1		@ get auxiliary control reg
    461	and	r0, r0, #2			@ preserve bit P bit setting
    462	orr	r0, r0, #(1 << 10)		@ enable L2 for LLR cache
    463	mcr	p15, 0, r0, c1, c0, 1		@ set auxiliary control reg
    464
    465	adr	r5, xsc3_crval
    466	ldmia	r5, {r5, r6}
    467
    468#ifdef CONFIG_CACHE_XSC3L2
    469	mrc	p15, 1, r0, c0, c0, 1		@ get L2 present information
    470	ands	r0, r0, #0xf8
    471	orrne	r6, r6, #(1 << 26)		@ enable L2 if present
    472#endif
    473
    474	mrc	p15, 0, r0, c1, c0, 0		@ get control register
    475	bic	r0, r0, r5			@ ..V. ..R. .... ..A.
    476	orr	r0, r0, r6			@ ..VI Z..S .... .C.M (mmu)
    477						@ ...I Z..S .... .... (uc)
    478	ret	lr
    479
    480	.size	__xsc3_setup, . - __xsc3_setup
    481
    482	.type	xsc3_crval, #object
    483xsc3_crval:
    484	crval	clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
    485
    486	__INITDATA
    487
    488	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
    489	define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
    490
    491	.section ".rodata"
    492
    493	string	cpu_arch_name, "armv5te"
    494	string	cpu_elf_name, "v5"
    495	string	cpu_xsc3_name, "XScale-V3 based processor"
    496
    497	.align
    498
    499	.section ".proc.info.init", "a"
    500
    501.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
    502	.type	__\name\()_proc_info,#object
    503__\name\()_proc_info:
    504	.long	\cpu_val
    505	.long	\cpu_mask
    506	.long	PMD_TYPE_SECT | \
    507		PMD_SECT_BUFFERABLE | \
    508		PMD_SECT_CACHEABLE | \
    509		PMD_SECT_AP_WRITE | \
    510		PMD_SECT_AP_READ
    511	.long	PMD_TYPE_SECT | \
    512		PMD_SECT_AP_WRITE | \
    513		PMD_SECT_AP_READ
    514	initfn	__xsc3_setup, __\name\()_proc_info
    515	.long	cpu_arch_name
    516	.long	cpu_elf_name
    517	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
    518	.long	cpu_xsc3_name
    519	.long	xsc3_processor_functions
    520	.long	v4wbi_tlb_fns
    521	.long	xsc3_mc_user_fns
    522	.long	xsc3_cache_fns
    523	.size	__\name\()_proc_info, . - __\name\()_proc_info
    524.endm
    525
    526	xsc3_proc_info xsc3, 0x69056000, 0xffffe000
    527
    528/* Note: PXA935 changed its implementor ID from Intel to Marvell */
    529	xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000