cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

copy_32.S (9813B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * Memory copy functions for 32-bit PowerPC.
      4 *
      5 * Copyright (C) 1996-2005 Paul Mackerras.
      6 */
      7#include <asm/processor.h>
      8#include <asm/cache.h>
      9#include <asm/errno.h>
     10#include <asm/ppc_asm.h>
     11#include <asm/export.h>
     12#include <asm/code-patching-asm.h>
     13#include <asm/kasan.h>
     14
     15#define COPY_16_BYTES		\
     16	lwz	r7,4(r4);	\
     17	lwz	r8,8(r4);	\
     18	lwz	r9,12(r4);	\
     19	lwzu	r10,16(r4);	\
     20	stw	r7,4(r6);	\
     21	stw	r8,8(r6);	\
     22	stw	r9,12(r6);	\
     23	stwu	r10,16(r6)
     24
     25#define COPY_16_BYTES_WITHEX(n)	\
     268 ## n ## 0:			\
     27	lwz	r7,4(r4);	\
     288 ## n ## 1:			\
     29	lwz	r8,8(r4);	\
     308 ## n ## 2:			\
     31	lwz	r9,12(r4);	\
     328 ## n ## 3:			\
     33	lwzu	r10,16(r4);	\
     348 ## n ## 4:			\
     35	stw	r7,4(r6);	\
     368 ## n ## 5:			\
     37	stw	r8,8(r6);	\
     388 ## n ## 6:			\
     39	stw	r9,12(r6);	\
     408 ## n ## 7:			\
     41	stwu	r10,16(r6)
     42
     43#define COPY_16_BYTES_EXCODE(n)			\
     449 ## n ## 0:					\
     45	addi	r5,r5,-(16 * n);		\
     46	b	104f;				\
     479 ## n ## 1:					\
     48	addi	r5,r5,-(16 * n);		\
     49	b	105f;				\
     50	EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);	\
     51	EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);	\
     52	EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);	\
     53	EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);	\
     54	EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);	\
     55	EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);	\
     56	EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);	\
     57	EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
     58
     59	.text
     60
     61CACHELINE_BYTES = L1_CACHE_BYTES
     62LG_CACHELINE_BYTES = L1_CACHE_SHIFT
     63CACHELINE_MASK = (L1_CACHE_BYTES-1)
     64
     65#ifndef CONFIG_KASAN
     66_GLOBAL(memset16)
     67	rlwinm.	r0 ,r5, 31, 1, 31
     68	addi	r6, r3, -4
     69	beq-	2f
     70	rlwimi	r4 ,r4 ,16 ,0 ,15
     71	mtctr	r0
     721:	stwu	r4, 4(r6)
     73	bdnz	1b
     742:	andi.	r0, r5, 1
     75	beqlr
     76	sth	r4, 4(r6)
     77	blr
     78EXPORT_SYMBOL(memset16)
     79#endif
     80
     81/*
     82 * Use dcbz on the complete cache lines in the destination
     83 * to set them to zero.  This requires that the destination
     84 * area is cacheable.  -- paulus
     85 *
     86 * During early init, cache might not be active yet, so dcbz cannot be used.
     87 * We therefore skip the optimised bloc that uses dcbz. This jump is
     88 * replaced by a nop once cache is active. This is done in machine_init()
     89 */
     90_GLOBAL_KASAN(memset)
     91	cmplwi	0,r5,4
     92	blt	7f
     93
     94	rlwimi	r4,r4,8,16,23
     95	rlwimi	r4,r4,16,0,15
     96
     97	stw	r4,0(r3)
     98	beqlr
     99	andi.	r0,r3,3
    100	add	r5,r0,r5
    101	subf	r6,r0,r3
    102	cmplwi	0,r4,0
    103	/*
    104	 * Skip optimised bloc until cache is enabled. Will be replaced
    105	 * by 'bne' during boot to use normal procedure if r4 is not zero
    106	 */
    1075:	b	2f
    108	patch_site	5b, patch__memset_nocache
    109
    110	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
    111	add	r8,r7,r5
    112	srwi	r9,r8,LG_CACHELINE_BYTES
    113	addic.	r9,r9,-1	/* total number of complete cachelines */
    114	ble	2f
    115	xori	r0,r7,CACHELINE_MASK & ~3
    116	srwi.	r0,r0,2
    117	beq	3f
    118	mtctr	r0
    1194:	stwu	r4,4(r6)
    120	bdnz	4b
    1213:	mtctr	r9
    122	li	r7,4
    12310:	dcbz	r7,r6
    124	addi	r6,r6,CACHELINE_BYTES
    125	bdnz	10b
    126	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
    127	addi	r5,r5,4
    128
    1292:	srwi	r0,r5,2
    130	mtctr	r0
    131	bdz	6f
    1321:	stwu	r4,4(r6)
    133	bdnz	1b
    1346:	andi.	r5,r5,3
    135	beqlr
    136	mtctr	r5
    137	addi	r6,r6,3
    1388:	stbu	r4,1(r6)
    139	bdnz	8b
    140	blr
    141
    1427:	cmpwi	0,r5,0
    143	beqlr
    144	mtctr	r5
    145	addi	r6,r3,-1
    1469:	stbu	r4,1(r6)
    147	bdnz	9b
    148	blr
    149EXPORT_SYMBOL(memset)
    150EXPORT_SYMBOL_KASAN(memset)
    151
    152/*
    153 * This version uses dcbz on the complete cache lines in the
    154 * destination area to reduce memory traffic.  This requires that
    155 * the destination area is cacheable.
    156 * We only use this version if the source and dest don't overlap.
    157 * -- paulus.
    158 *
    159 * During early init, cache might not be active yet, so dcbz cannot be used.
    160 * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
    161 * replaced by a nop once cache is active. This is done in machine_init()
    162 */
    163_GLOBAL_KASAN(memmove)
    164	cmplw	0,r3,r4
    165	bgt	backwards_memcpy
    166	/* fall through */
    167
    168_GLOBAL_KASAN(memcpy)
    1691:	b	generic_memcpy
    170	patch_site	1b, patch__memcpy_nocache
    171
    172	add	r7,r3,r5		/* test if the src & dst overlap */
    173	add	r8,r4,r5
    174	cmplw	0,r4,r7
    175	cmplw	1,r3,r8
    176	crand	0,0,4			/* cr0.lt &= cr1.lt */
    177	blt	generic_memcpy		/* if regions overlap */
    178
    179	addi	r4,r4,-4
    180	addi	r6,r3,-4
    181	neg	r0,r3
    182	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
    183	beq	58f
    184
    185	cmplw	0,r5,r0			/* is this more than total to do? */
    186	blt	63f			/* if not much to do */
    187	andi.	r8,r0,3			/* get it word-aligned first */
    188	subf	r5,r0,r5
    189	mtctr	r8
    190	beq+	61f
    19170:	lbz	r9,4(r4)		/* do some bytes */
    192	addi	r4,r4,1
    193	addi	r6,r6,1
    194	stb	r9,3(r6)
    195	bdnz	70b
    19661:	srwi.	r0,r0,2
    197	mtctr	r0
    198	beq	58f
    19972:	lwzu	r9,4(r4)		/* do some words */
    200	stwu	r9,4(r6)
    201	bdnz	72b
    202
    20358:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
    204	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
    205	li	r11,4
    206	mtctr	r0
    207	beq	63f
    20853:
    209	dcbz	r11,r6
    210	COPY_16_BYTES
    211#if L1_CACHE_BYTES >= 32
    212	COPY_16_BYTES
    213#if L1_CACHE_BYTES >= 64
    214	COPY_16_BYTES
    215	COPY_16_BYTES
    216#if L1_CACHE_BYTES >= 128
    217	COPY_16_BYTES
    218	COPY_16_BYTES
    219	COPY_16_BYTES
    220	COPY_16_BYTES
    221#endif
    222#endif
    223#endif
    224	bdnz	53b
    225
    22663:	srwi.	r0,r5,2
    227	mtctr	r0
    228	beq	64f
    22930:	lwzu	r0,4(r4)
    230	stwu	r0,4(r6)
    231	bdnz	30b
    232
    23364:	andi.	r0,r5,3
    234	mtctr	r0
    235	beq+	65f
    236	addi	r4,r4,3
    237	addi	r6,r6,3
    23840:	lbzu	r0,1(r4)
    239	stbu	r0,1(r6)
    240	bdnz	40b
    24165:	blr
    242EXPORT_SYMBOL(memcpy)
    243EXPORT_SYMBOL(memmove)
    244EXPORT_SYMBOL_KASAN(memcpy)
    245EXPORT_SYMBOL_KASAN(memmove)
    246
    247generic_memcpy:
    248	srwi.	r7,r5,3
    249	addi	r6,r3,-4
    250	addi	r4,r4,-4
    251	beq	2f			/* if less than 8 bytes to do */
    252	andi.	r0,r6,3			/* get dest word aligned */
    253	mtctr	r7
    254	bne	5f
    2551:	lwz	r7,4(r4)
    256	lwzu	r8,8(r4)
    257	stw	r7,4(r6)
    258	stwu	r8,8(r6)
    259	bdnz	1b
    260	andi.	r5,r5,7
    2612:	cmplwi	0,r5,4
    262	blt	3f
    263	lwzu	r0,4(r4)
    264	addi	r5,r5,-4
    265	stwu	r0,4(r6)
    2663:	cmpwi	0,r5,0
    267	beqlr
    268	mtctr	r5
    269	addi	r4,r4,3
    270	addi	r6,r6,3
    2714:	lbzu	r0,1(r4)
    272	stbu	r0,1(r6)
    273	bdnz	4b
    274	blr
    2755:	subfic	r0,r0,4
    276	mtctr	r0
    2776:	lbz	r7,4(r4)
    278	addi	r4,r4,1
    279	stb	r7,4(r6)
    280	addi	r6,r6,1
    281	bdnz	6b
    282	subf	r5,r0,r5
    283	rlwinm.	r7,r5,32-3,3,31
    284	beq	2b
    285	mtctr	r7
    286	b	1b
    287
    288_GLOBAL(backwards_memcpy)
    289	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
    290	add	r6,r3,r5
    291	add	r4,r4,r5
    292	beq	2f
    293	andi.	r0,r6,3
    294	mtctr	r7
    295	bne	5f
    2961:	lwz	r7,-4(r4)
    297	lwzu	r8,-8(r4)
    298	stw	r7,-4(r6)
    299	stwu	r8,-8(r6)
    300	bdnz	1b
    301	andi.	r5,r5,7
    3022:	cmplwi	0,r5,4
    303	blt	3f
    304	lwzu	r0,-4(r4)
    305	subi	r5,r5,4
    306	stwu	r0,-4(r6)
    3073:	cmpwi	0,r5,0
    308	beqlr
    309	mtctr	r5
    3104:	lbzu	r0,-1(r4)
    311	stbu	r0,-1(r6)
    312	bdnz	4b
    313	blr
    3145:	mtctr	r0
    3156:	lbzu	r7,-1(r4)
    316	stbu	r7,-1(r6)
    317	bdnz	6b
    318	subf	r5,r0,r5
    319	rlwinm.	r7,r5,32-3,3,31
    320	beq	2b
    321	mtctr	r7
    322	b	1b
    323
    324_GLOBAL(__copy_tofrom_user)
    325	addi	r4,r4,-4
    326	addi	r6,r3,-4
    327	neg	r0,r3
    328	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
    329	beq	58f
    330
    331	cmplw	0,r5,r0			/* is this more than total to do? */
    332	blt	63f			/* if not much to do */
    333	andi.	r8,r0,3			/* get it word-aligned first */
    334	mtctr	r8
    335	beq+	61f
    33670:	lbz	r9,4(r4)		/* do some bytes */
    33771:	stb	r9,4(r6)
    338	addi	r4,r4,1
    339	addi	r6,r6,1
    340	bdnz	70b
    34161:	subf	r5,r0,r5
    342	srwi.	r0,r0,2
    343	mtctr	r0
    344	beq	58f
    34572:	lwzu	r9,4(r4)		/* do some words */
    34673:	stwu	r9,4(r6)
    347	bdnz	72b
    348
    349	EX_TABLE(70b,100f)
    350	EX_TABLE(71b,101f)
    351	EX_TABLE(72b,102f)
    352	EX_TABLE(73b,103f)
    353
    35458:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
    355	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
    356	li	r11,4
    357	beq	63f
    358
    359	/* Here we decide how far ahead to prefetch the source */
    360	li	r3,4
    361	cmpwi	r0,1
    362	li	r7,0
    363	ble	114f
    364	li	r7,1
    365#if MAX_COPY_PREFETCH > 1
    366	/* Heuristically, for large transfers we prefetch
    367	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
    368	   we prefetch 1 cacheline ahead. */
    369	cmpwi	r0,MAX_COPY_PREFETCH
    370	ble	112f
    371	li	r7,MAX_COPY_PREFETCH
    372112:	mtctr	r7
    373111:	dcbt	r3,r4
    374	addi	r3,r3,CACHELINE_BYTES
    375	bdnz	111b
    376#else
    377	dcbt	r3,r4
    378	addi	r3,r3,CACHELINE_BYTES
    379#endif /* MAX_COPY_PREFETCH > 1 */
    380
    381114:	subf	r8,r7,r0
    382	mr	r0,r7
    383	mtctr	r8
    384
    38553:	dcbt	r3,r4
    38654:	dcbz	r11,r6
    387	EX_TABLE(54b,105f)
    388/* the main body of the cacheline loop */
    389	COPY_16_BYTES_WITHEX(0)
    390#if L1_CACHE_BYTES >= 32
    391	COPY_16_BYTES_WITHEX(1)
    392#if L1_CACHE_BYTES >= 64
    393	COPY_16_BYTES_WITHEX(2)
    394	COPY_16_BYTES_WITHEX(3)
    395#if L1_CACHE_BYTES >= 128
    396	COPY_16_BYTES_WITHEX(4)
    397	COPY_16_BYTES_WITHEX(5)
    398	COPY_16_BYTES_WITHEX(6)
    399	COPY_16_BYTES_WITHEX(7)
    400#endif
    401#endif
    402#endif
    403	bdnz	53b
    404	cmpwi	r0,0
    405	li	r3,4
    406	li	r7,0
    407	bne	114b
    408
    40963:	srwi.	r0,r5,2
    410	mtctr	r0
    411	beq	64f
    41230:	lwzu	r0,4(r4)
    41331:	stwu	r0,4(r6)
    414	bdnz	30b
    415
    41664:	andi.	r0,r5,3
    417	mtctr	r0
    418	beq+	65f
    41940:	lbz	r0,4(r4)
    42041:	stb	r0,4(r6)
    421	addi	r4,r4,1
    422	addi	r6,r6,1
    423	bdnz	40b
    42465:	li	r3,0
    425	blr
    426
    427/* read fault, initial single-byte copy */
    428100:	li	r9,0
    429	b	90f
    430/* write fault, initial single-byte copy */
    431101:	li	r9,1
    43290:	subf	r5,r8,r5
    433	li	r3,0
    434	b	99f
    435/* read fault, initial word copy */
    436102:	li	r9,0
    437	b	91f
    438/* write fault, initial word copy */
    439103:	li	r9,1
    44091:	li	r3,2
    441	b	99f
    442
    443/*
    444 * this stuff handles faults in the cacheline loop and branches to either
    445 * 104f (if in read part) or 105f (if in write part), after updating r5
    446 */
    447	COPY_16_BYTES_EXCODE(0)
    448#if L1_CACHE_BYTES >= 32
    449	COPY_16_BYTES_EXCODE(1)
    450#if L1_CACHE_BYTES >= 64
    451	COPY_16_BYTES_EXCODE(2)
    452	COPY_16_BYTES_EXCODE(3)
    453#if L1_CACHE_BYTES >= 128
    454	COPY_16_BYTES_EXCODE(4)
    455	COPY_16_BYTES_EXCODE(5)
    456	COPY_16_BYTES_EXCODE(6)
    457	COPY_16_BYTES_EXCODE(7)
    458#endif
    459#endif
    460#endif
    461
    462/* read fault in cacheline loop */
    463104:	li	r9,0
    464	b	92f
    465/* fault on dcbz (effectively a write fault) */
    466/* or write fault in cacheline loop */
    467105:	li	r9,1
    46892:	li	r3,LG_CACHELINE_BYTES
    469	mfctr	r8
    470	add	r0,r0,r8
    471	b	106f
    472/* read fault in final word loop */
    473108:	li	r9,0
    474	b	93f
    475/* write fault in final word loop */
    476109:	li	r9,1
    47793:	andi.	r5,r5,3
    478	li	r3,2
    479	b	99f
    480/* read fault in final byte loop */
    481110:	li	r9,0
    482	b	94f
    483/* write fault in final byte loop */
    484111:	li	r9,1
    48594:	li	r5,0
    486	li	r3,0
    487/*
    488 * At this stage the number of bytes not copied is
    489 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
    490 */
    49199:	mfctr	r0
    492106:	slw	r3,r0,r3
    493	add.	r3,r3,r5
    494	beq	120f			/* shouldn't happen */
    495	cmpwi	0,r9,0
    496	bne	120f
    497/* for a read fault, first try to continue the copy one byte at a time */
    498	mtctr	r3
    499130:	lbz	r0,4(r4)
    500131:	stb	r0,4(r6)
    501	addi	r4,r4,1
    502	addi	r6,r6,1
    503	bdnz	130b
    504/* then clear out the destination: r3 bytes starting at 4(r6) */
    505132:	mfctr	r3
    506120:	blr
    507
    508	EX_TABLE(30b,108b)
    509	EX_TABLE(31b,109b)
    510	EX_TABLE(40b,110b)
    511	EX_TABLE(41b,111b)
    512	EX_TABLE(130b,132b)
    513	EX_TABLE(131b,120b)
    514
    515EXPORT_SYMBOL(__copy_tofrom_user)