cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cache-v7.S (12863B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 *  linux/arch/arm/mm/cache-v7.S
      4 *
      5 *  Copyright (C) 2001 Deep Blue Solutions Ltd.
      6 *  Copyright (C) 2005 ARM Ltd.
      7 *
      8 *  This is the "shell" of the ARMv7 processor support.
      9 */
     10#include <linux/linkage.h>
     11#include <linux/init.h>
     12#include <asm/assembler.h>
     13#include <asm/errno.h>
     14#include <asm/unwind.h>
     15#include <asm/hardware/cache-b15-rac.h>
     16
     17#include "proc-macros.S"
     18
     19#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
     20.globl icache_size
     21	.data
     22	.align	2
     23icache_size:
     24	.long	64
     25	.text
     26#endif
     27/*
     28 * The secondary kernel init calls v7_flush_dcache_all before it enables
     29 * the L1; however, the L1 comes out of reset in an undefined state, so
     30 * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
     31 * of cache lines with uninitialized data and uninitialized tags to get
     32 * written out to memory, which does really unpleasant things to the main
     33 * processor.  We fix this by performing an invalidate, rather than a
     34 * clean + invalidate, before jumping into the kernel.
     35 *
     36 * This function needs to be called for both secondary cores startup and
     37 * primary core resume procedures.
     38 */
     39ENTRY(v7_invalidate_l1)
     40	mov	r0, #0
     41	mcr	p15, 2, r0, c0, c0, 0	@ select L1 data cache in CSSELR
     42	isb
     43	mrc	p15, 1, r0, c0, c0, 0	@ read cache geometry from CCSIDR
     44
     45	movw	r3, #0x3ff
     46	and	r3, r3, r0, lsr #3	@ 'Associativity' in CCSIDR[12:3]
     47	clz	r1, r3			@ WayShift
     48	mov	r2, #1
     49	mov	r3, r3, lsl r1		@ NumWays-1 shifted into bits [31:...]
     50	movs	r1, r2, lsl r1		@ #1 shifted left by same amount
     51	moveq	r1, #1			@ r1 needs value > 0 even if only 1 way
     52
     53	and	r2, r0, #0x7
     54	add	r2, r2, #4		@ SetShift
     55
     561:	movw	ip, #0x7fff
     57	and	r0, ip, r0, lsr #13	@ 'NumSets' in CCSIDR[27:13]
     58
     592:	mov	ip, r0, lsl r2		@ NumSet << SetShift
     60	orr	ip, ip, r3		@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
     61	mcr	p15, 0, ip, c7, c6, 2
     62	subs	r0, r0, #1		@ Set--
     63	bpl	2b
     64	subs	r3, r3, r1		@ Way--
     65	bcc	3f
     66	mrc	p15, 1, r0, c0, c0, 0	@ re-read cache geometry from CCSIDR
     67	b	1b
     683:	dsb	st
     69	isb
     70	ret	lr
     71ENDPROC(v7_invalidate_l1)
     72
     73/*
     74 *	v7_flush_icache_all()
     75 *
     76 *	Flush the whole I-cache.
     77 *
     78 *	Registers:
     79 *	r0 - set to 0
     80 */
     81ENTRY(v7_flush_icache_all)
     82	mov	r0, #0
     83	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)		@ invalidate I-cache inner shareable
     84	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)		@ I+BTB cache invalidate
     85	ret	lr
     86ENDPROC(v7_flush_icache_all)
     87
     88 /*
     89 *     v7_flush_dcache_louis()
     90 *
     91 *     Flush the D-cache up to the Level of Unification Inner Shareable
     92 *
     93 *     Corrupted registers: r0-r6, r9-r10
     94 */
     95
     96ENTRY(v7_flush_dcache_louis)
     97	dmb					@ ensure ordering with previous memory accesses
     98	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
     99ALT_SMP(mov	r3, r0, lsr #20)		@ move LoUIS into position
    100ALT_UP(	mov	r3, r0, lsr #26)		@ move LoUU into position
    101	ands	r3, r3, #7 << 1 		@ extract LoU*2 field from clidr
    102	bne	start_flush_levels		@ LoU != 0, start flushing
    103#ifdef CONFIG_ARM_ERRATA_643719
    104ALT_SMP(mrc	p15, 0, r2, c0, c0, 0)		@ read main ID register
    105ALT_UP(	ret	lr)				@ LoUU is zero, so nothing to do
    106	movw	r1, #:lower16:(0x410fc090 >> 4)	@ ID of ARM Cortex A9 r0p?
    107	movt	r1, #:upper16:(0x410fc090 >> 4)
    108	teq	r1, r2, lsr #4			@ test for errata affected core and if so...
    109	moveq	r3, #1 << 1			@   fix LoUIS value
    110	beq	start_flush_levels		@   start flushing cache levels
    111#endif
    112	ret	lr
    113ENDPROC(v7_flush_dcache_louis)
    114
    115/*
    116 *	v7_flush_dcache_all()
    117 *
    118 *	Flush the whole D-cache.
    119 *
    120 *	Corrupted registers: r0-r6, r9-r10
    121 *
    122 *	- mm    - mm_struct describing address space
    123 */
    124ENTRY(v7_flush_dcache_all)
    125	dmb					@ ensure ordering with previous memory accesses
    126	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
    127	mov	r3, r0, lsr #23			@ move LoC into position
    128	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
    129	beq	finished			@ if loc is 0, then no need to clean
    130start_flush_levels:
    131	mov	r10, #0				@ start clean at cache level 0
    132flush_levels:
    133	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
    134	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
    135	and	r1, r1, #7			@ mask of the bits for current cache only
    136	cmp	r1, #2				@ see what cache we have at this level
    137	blt	skip				@ skip if no cache, or just i-cache
    138#ifdef CONFIG_PREEMPTION
    139	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
    140#endif
    141	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
    142	isb					@ isb to sych the new cssr&csidr
    143	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
    144#ifdef CONFIG_PREEMPTION
    145	restore_irqs_notrace r9
    146#endif
    147	and	r2, r1, #7			@ extract the length of the cache lines
    148	add	r2, r2, #4			@ add 4 (line length offset)
    149	movw	r4, #0x3ff
    150	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
    151	clz	r5, r4				@ find bit position of way size increment
    152	movw	r6, #0x7fff
    153	and	r1, r6, r1, lsr #13		@ extract max number of the index size
    154	mov	r6, #1
    155	movne	r4, r4, lsl r5			@ # of ways shifted into bits [31:...]
    156	movne	r6, r6, lsl r5			@ 1 shifted left by same amount
    157loop1:
    158	mov	r9, r1				@ create working copy of max index
    159loop2:
    160	mov	r5, r9, lsl r2			@ factor set number into r5
    161	orr	r5, r5, r4			@ factor way number into r5
    162	orr	r5, r5, r10			@ factor cache level into r5
    163	mcr	p15, 0, r5, c7, c14, 2		@ clean & invalidate by set/way
    164	subs	r9, r9, #1			@ decrement the index
    165	bge	loop2
    166	subs	r4, r4, r6			@ decrement the way
    167	bcs	loop1
    168skip:
    169	add	r10, r10, #2			@ increment cache number
    170	cmp	r3, r10
    171#ifdef CONFIG_ARM_ERRATA_814220
    172	dsb
    173#endif
    174	bgt	flush_levels
    175finished:
    176	mov	r10, #0				@ switch back to cache level 0
    177	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
    178	dsb	st
    179	isb
    180	ret	lr
    181ENDPROC(v7_flush_dcache_all)
    182
    183/*
    184 *	v7_flush_cache_all()
    185 *
    186 *	Flush the entire cache system.
    187 *  The data cache flush is now achieved using atomic clean / invalidates
    188 *  working outwards from L1 cache. This is done using Set/Way based cache
    189 *  maintenance instructions.
    190 *  The instruction cache can still be invalidated back to the point of
    191 *  unification in a single instruction.
    192 *
    193 */
    194ENTRY(v7_flush_kern_cache_all)
    195	stmfd	sp!, {r4-r6, r9-r10, lr}
    196	bl	v7_flush_dcache_all
    197	mov	r0, #0
    198	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
    199	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
    200	ldmfd	sp!, {r4-r6, r9-r10, lr}
    201	ret	lr
    202ENDPROC(v7_flush_kern_cache_all)
    203
    204 /*
    205 *     v7_flush_kern_cache_louis(void)
    206 *
    207 *     Flush the data cache up to Level of Unification Inner Shareable.
    208 *     Invalidate the I-cache to the point of unification.
    209 */
    210ENTRY(v7_flush_kern_cache_louis)
    211	stmfd	sp!, {r4-r6, r9-r10, lr}
    212	bl	v7_flush_dcache_louis
    213	mov	r0, #0
    214	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
    215	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
    216	ldmfd	sp!, {r4-r6, r9-r10, lr}
    217	ret	lr
    218ENDPROC(v7_flush_kern_cache_louis)
    219
    220/*
    221 *	v7_flush_cache_all()
    222 *
    223 *	Flush all TLB entries in a particular address space
    224 *
    225 *	- mm    - mm_struct describing address space
    226 */
    227ENTRY(v7_flush_user_cache_all)
    228	/*FALLTHROUGH*/
    229
    230/*
    231 *	v7_flush_cache_range(start, end, flags)
    232 *
    233 *	Flush a range of TLB entries in the specified address space.
    234 *
    235 *	- start - start address (may not be aligned)
    236 *	- end   - end address (exclusive, may not be aligned)
    237 *	- flags	- vm_area_struct flags describing address space
    238 *
    239 *	It is assumed that:
    240 *	- we have a VIPT cache.
    241 */
    242ENTRY(v7_flush_user_cache_range)
    243	ret	lr
    244ENDPROC(v7_flush_user_cache_all)
    245ENDPROC(v7_flush_user_cache_range)
    246
    247/*
    248 *	v7_coherent_kern_range(start,end)
    249 *
    250 *	Ensure that the I and D caches are coherent within specified
    251 *	region.  This is typically used when code has been written to
    252 *	a memory region, and will be executed.
    253 *
    254 *	- start   - virtual start address of region
    255 *	- end     - virtual end address of region
    256 *
    257 *	It is assumed that:
    258 *	- the Icache does not read data from the write buffer
    259 */
    260ENTRY(v7_coherent_kern_range)
    261	/* FALLTHROUGH */
    262
    263/*
    264 *	v7_coherent_user_range(start,end)
    265 *
    266 *	Ensure that the I and D caches are coherent within specified
    267 *	region.  This is typically used when code has been written to
    268 *	a memory region, and will be executed.
    269 *
    270 *	- start   - virtual start address of region
    271 *	- end     - virtual end address of region
    272 *
    273 *	It is assumed that:
    274 *	- the Icache does not read data from the write buffer
    275 */
    276ENTRY(v7_coherent_user_range)
    277 UNWIND(.fnstart		)
    278	dcache_line_size r2, r3
    279	sub	r3, r2, #1
    280	bic	r12, r0, r3
    281#ifdef CONFIG_ARM_ERRATA_764369
    282	ALT_SMP(W(dsb))
    283	ALT_UP(W(nop))
    284#endif
    2851:
    286 USER(	mcr	p15, 0, r12, c7, c11, 1	)	@ clean D line to the point of unification
    287	add	r12, r12, r2
    288	cmp	r12, r1
    289	blo	1b
    290	dsb	ishst
    291#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
    292	ldr	r3, =icache_size
    293	ldr	r2, [r3, #0]
    294#else
    295	icache_line_size r2, r3
    296#endif
    297	sub	r3, r2, #1
    298	bic	r12, r0, r3
    2992:
    300 USER(	mcr	p15, 0, r12, c7, c5, 1	)	@ invalidate I line
    301	add	r12, r12, r2
    302	cmp	r12, r1
    303	blo	2b
    304	mov	r0, #0
    305	ALT_SMP(mcr	p15, 0, r0, c7, c1, 6)	@ invalidate BTB Inner Shareable
    306	ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
    307	dsb	ishst
    308	isb
    309	ret	lr
    310
    311/*
    312 * Fault handling for the cache operation above. If the virtual address in r0
    313 * isn't mapped, fail with -EFAULT.
    314 */
    3159001:
    316#ifdef CONFIG_ARM_ERRATA_775420
    317	dsb
    318#endif
    319	mov	r0, #-EFAULT
    320	ret	lr
    321 UNWIND(.fnend		)
    322ENDPROC(v7_coherent_kern_range)
    323ENDPROC(v7_coherent_user_range)
    324
    325/*
    326 *	v7_flush_kern_dcache_area(void *addr, size_t size)
    327 *
    328 *	Ensure that the data held in the page kaddr is written back
    329 *	to the page in question.
    330 *
    331 *	- addr	- kernel address
    332 *	- size	- region size
    333 */
    334ENTRY(v7_flush_kern_dcache_area)
    335	dcache_line_size r2, r3
    336	add	r1, r0, r1
    337	sub	r3, r2, #1
    338	bic	r0, r0, r3
    339#ifdef CONFIG_ARM_ERRATA_764369
    340	ALT_SMP(W(dsb))
    341	ALT_UP(W(nop))
    342#endif
    3431:
    344	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line / unified line
    345	add	r0, r0, r2
    346	cmp	r0, r1
    347	blo	1b
    348	dsb	st
    349	ret	lr
    350ENDPROC(v7_flush_kern_dcache_area)
    351
    352/*
    353 *	v7_dma_inv_range(start,end)
    354 *
    355 *	Invalidate the data cache within the specified region; we will
    356 *	be performing a DMA operation in this region and we want to
    357 *	purge old data in the cache.
    358 *
    359 *	- start   - virtual start address of region
    360 *	- end     - virtual end address of region
    361 */
    362v7_dma_inv_range:
    363	dcache_line_size r2, r3
    364	sub	r3, r2, #1
    365	tst	r0, r3
    366	bic	r0, r0, r3
    367#ifdef CONFIG_ARM_ERRATA_764369
    368	ALT_SMP(W(dsb))
    369	ALT_UP(W(nop))
    370#endif
    371	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
    372	addne	r0, r0, r2
    373
    374	tst	r1, r3
    375	bic	r1, r1, r3
    376	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D / U line
    377	cmp	r0, r1
    3781:
    379	mcrlo	p15, 0, r0, c7, c6, 1		@ invalidate D / U line
    380	addlo	r0, r0, r2
    381	cmplo	r0, r1
    382	blo	1b
    383	dsb	st
    384	ret	lr
    385ENDPROC(v7_dma_inv_range)
    386
    387/*
    388 *	v7_dma_clean_range(start,end)
    389 *	- start   - virtual start address of region
    390 *	- end     - virtual end address of region
    391 */
    392v7_dma_clean_range:
    393	dcache_line_size r2, r3
    394	sub	r3, r2, #1
    395	bic	r0, r0, r3
    396#ifdef CONFIG_ARM_ERRATA_764369
    397	ALT_SMP(W(dsb))
    398	ALT_UP(W(nop))
    399#endif
    4001:
    401	mcr	p15, 0, r0, c7, c10, 1		@ clean D / U line
    402	add	r0, r0, r2
    403	cmp	r0, r1
    404	blo	1b
    405	dsb	st
    406	ret	lr
    407ENDPROC(v7_dma_clean_range)
    408
    409/*
    410 *	v7_dma_flush_range(start,end)
    411 *	- start   - virtual start address of region
    412 *	- end     - virtual end address of region
    413 */
    414ENTRY(v7_dma_flush_range)
    415	dcache_line_size r2, r3
    416	sub	r3, r2, #1
    417	bic	r0, r0, r3
    418#ifdef CONFIG_ARM_ERRATA_764369
    419	ALT_SMP(W(dsb))
    420	ALT_UP(W(nop))
    421#endif
    4221:
    423	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
    424	add	r0, r0, r2
    425	cmp	r0, r1
    426	blo	1b
    427	dsb	st
    428	ret	lr
    429ENDPROC(v7_dma_flush_range)
    430
    431/*
    432 *	dma_map_area(start, size, dir)
    433 *	- start	- kernel virtual start address
    434 *	- size	- size of region
    435 *	- dir	- DMA direction
    436 */
    437ENTRY(v7_dma_map_area)
    438	add	r1, r1, r0
    439	teq	r2, #DMA_FROM_DEVICE
    440	beq	v7_dma_inv_range
    441	b	v7_dma_clean_range
    442ENDPROC(v7_dma_map_area)
    443
    444/*
    445 *	dma_unmap_area(start, size, dir)
    446 *	- start	- kernel virtual start address
    447 *	- size	- size of region
    448 *	- dir	- DMA direction
    449 */
    450ENTRY(v7_dma_unmap_area)
    451	add	r1, r1, r0
    452	teq	r2, #DMA_TO_DEVICE
    453	bne	v7_dma_inv_range
    454	ret	lr
    455ENDPROC(v7_dma_unmap_area)
    456
    457	__INITDATA
    458
    459	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
    460	define_cache_functions v7
    461
    462	/* The Broadcom Brahma-B15 read-ahead cache requires some modifications
    463	 * to the v7_cache_fns, we only override the ones we need
    464	 */
    465#ifndef CONFIG_CACHE_B15_RAC
    466	globl_equ	b15_flush_kern_cache_all,	v7_flush_kern_cache_all
    467#endif
    468	globl_equ	b15_flush_icache_all,		v7_flush_icache_all
    469	globl_equ	b15_flush_kern_cache_louis,	v7_flush_kern_cache_louis
    470	globl_equ	b15_flush_user_cache_all,	v7_flush_user_cache_all
    471	globl_equ	b15_flush_user_cache_range,	v7_flush_user_cache_range
    472	globl_equ	b15_coherent_kern_range,	v7_coherent_kern_range
    473	globl_equ	b15_coherent_user_range,	v7_coherent_user_range
    474	globl_equ	b15_flush_kern_dcache_area,	v7_flush_kern_dcache_area
    475
    476	globl_equ	b15_dma_map_area,		v7_dma_map_area
    477	globl_equ	b15_dma_unmap_area,		v7_dma_unmap_area
    478	globl_equ	b15_dma_flush_range,		v7_dma_flush_range
    479
    480	define_cache_functions b15