cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

copyuser_64.S (11151B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
      4 */
      5#include <asm/processor.h>
      6#include <asm/ppc_asm.h>
      7#include <asm/export.h>
      8#include <asm/asm-compat.h>
      9#include <asm/feature-fixups.h>
     10
     11#ifndef SELFTEST_CASE
     12/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
     13#define SELFTEST_CASE	0
     14#endif
     15
     16#ifdef __BIG_ENDIAN__
     17#define sLd sld		/* Shift towards low-numbered address. */
     18#define sHd srd		/* Shift towards high-numbered address. */
     19#else
     20#define sLd srd		/* Shift towards low-numbered address. */
     21#define sHd sld		/* Shift towards high-numbered address. */
     22#endif
     23
     24/*
     25 * These macros are used to generate exception table entries.
     26 * The exception handlers below use the original arguments
     27 * (stored on the stack) and the point where we're up to in
     28 * the destination buffer, i.e. the address of the first
     29 * unmodified byte.  Generally r3 points into the destination
     30 * buffer, but the first unmodified byte is at a variable
     31 * offset from r3.  In the code below, the symbol r3_offset
     32 * is set to indicate the current offset at each point in
     33 * the code.  This offset is then used as a negative offset
     34 * from the exception handler code, and those instructions
     35 * before the exception handlers are addi instructions that
     36 * adjust r3 to point to the correct place.
     37 */
     38	.macro	lex		/* exception handler for load */
     39100:	EX_TABLE(100b, .Lld_exc - r3_offset)
     40	.endm
     41
     42	.macro	stex		/* exception handler for store */
     43100:	EX_TABLE(100b, .Lst_exc - r3_offset)
     44	.endm
     45
     46	.align	7
     47_GLOBAL_TOC(__copy_tofrom_user)
     48#ifdef CONFIG_PPC_BOOK3S_64
     49BEGIN_FTR_SECTION
     50	nop
     51FTR_SECTION_ELSE
     52	b	__copy_tofrom_user_power7
     53ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
     54#endif
     55_GLOBAL(__copy_tofrom_user_base)
     56	/* first check for a 4kB copy on a 4kB boundary */
     57	cmpldi	cr1,r5,16
     58	cmpdi	cr6,r5,4096
     59	or	r0,r3,r4
     60	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
     61	andi.	r0,r0,4095
     62	std	r3,-24(r1)
     63	crand	cr0*4+2,cr0*4+2,cr6*4+2
     64	std	r4,-16(r1)
     65	std	r5,-8(r1)
     66	dcbt	0,r4
     67	beq	.Lcopy_page_4K
     68	andi.	r6,r6,7
     69	PPC_MTOCRF(0x01,r5)
     70	blt	cr1,.Lshort_copy
     71/* Below we want to nop out the bne if we're on a CPU that has the
     72 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
     73 * cleared.
     74 * At the time of writing the only CPU that has this combination of bits
     75 * set is Power6.
     76 */
     77test_feature = (SELFTEST_CASE == 1)
     78BEGIN_FTR_SECTION
     79	nop
     80FTR_SECTION_ELSE
     81	bne	.Ldst_unaligned
     82ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
     83		    CPU_FTR_UNALIGNED_LD_STD)
     84.Ldst_aligned:
     85	addi	r3,r3,-16
     86r3_offset = 16
     87test_feature = (SELFTEST_CASE == 0)
     88BEGIN_FTR_SECTION
     89	andi.	r0,r4,7
     90	bne	.Lsrc_unaligned
     91END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
     92	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
     93	srdi	r0,r5,5
     94	cmpdi	cr1,r0,0
     95lex;	ld	r7,0(r4)
     96lex;	ld	r6,8(r4)
     97	addi	r4,r4,16
     98	mtctr	r0
     99	andi.	r0,r5,0x10
    100	beq	22f
    101	addi	r3,r3,16
    102r3_offset = 0
    103	addi	r4,r4,-16
    104	mr	r9,r7
    105	mr	r8,r6
    106	beq	cr1,72f
    10721:
    108lex;	ld	r7,16(r4)
    109lex;	ld	r6,24(r4)
    110	addi	r4,r4,32
    111stex;	std	r9,0(r3)
    112r3_offset = 8
    113stex;	std	r8,8(r3)
    114r3_offset = 16
    11522:
    116lex;	ld	r9,0(r4)
    117lex;	ld	r8,8(r4)
    118stex;	std	r7,16(r3)
    119r3_offset = 24
    120stex;	std	r6,24(r3)
    121	addi	r3,r3,32
    122r3_offset = 0
    123	bdnz	21b
    12472:
    125stex;	std	r9,0(r3)
    126r3_offset = 8
    127stex;	std	r8,8(r3)
    128r3_offset = 16
    129	andi.	r5,r5,0xf
    130	beq+	3f
    131	addi	r4,r4,16
    132.Ldo_tail:
    133	addi	r3,r3,16
    134r3_offset = 0
    135	bf	cr7*4+0,246f
    136lex;	ld	r9,0(r4)
    137	addi	r4,r4,8
    138stex;	std	r9,0(r3)
    139	addi	r3,r3,8
    140246:	bf	cr7*4+1,1f
    141lex;	lwz	r9,0(r4)
    142	addi	r4,r4,4
    143stex;	stw	r9,0(r3)
    144	addi	r3,r3,4
    1451:	bf	cr7*4+2,2f
    146lex;	lhz	r9,0(r4)
    147	addi	r4,r4,2
    148stex;	sth	r9,0(r3)
    149	addi	r3,r3,2
    1502:	bf	cr7*4+3,3f
    151lex;	lbz	r9,0(r4)
    152stex;	stb	r9,0(r3)
    1533:	li	r3,0
    154	blr
    155
    156.Lsrc_unaligned:
    157r3_offset = 16
    158	srdi	r6,r5,3
    159	addi	r5,r5,-16
    160	subf	r4,r0,r4
    161	srdi	r7,r5,4
    162	sldi	r10,r0,3
    163	cmpldi	cr6,r6,3
    164	andi.	r5,r5,7
    165	mtctr	r7
    166	subfic	r11,r10,64
    167	add	r5,r5,r0
    168	bt	cr7*4+0,28f
    169
    170lex;	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
    171lex;	ld	r0,8(r4)
    172	sLd	r6,r9,r10
    173lex;	ldu	r9,16(r4)
    174	sHd	r7,r0,r11
    175	sLd	r8,r0,r10
    176	or	r7,r7,r6
    177	blt	cr6,79f
    178lex;	ld	r0,8(r4)
    179	b	2f
    180
    18128:
    182lex;	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
    183lex;	ldu	r9,8(r4)
    184	sLd	r8,r0,r10
    185	addi	r3,r3,-8
    186r3_offset = 24
    187	blt	cr6,5f
    188lex;	ld	r0,8(r4)
    189	sHd	r12,r9,r11
    190	sLd	r6,r9,r10
    191lex;	ldu	r9,16(r4)
    192	or	r12,r8,r12
    193	sHd	r7,r0,r11
    194	sLd	r8,r0,r10
    195	addi	r3,r3,16
    196r3_offset = 8
    197	beq	cr6,78f
    198
    1991:	or	r7,r7,r6
    200lex;	ld	r0,8(r4)
    201stex;	std	r12,8(r3)
    202r3_offset = 16
    2032:	sHd	r12,r9,r11
    204	sLd	r6,r9,r10
    205lex;	ldu	r9,16(r4)
    206	or	r12,r8,r12
    207stex;	stdu	r7,16(r3)
    208r3_offset = 8
    209	sHd	r7,r0,r11
    210	sLd	r8,r0,r10
    211	bdnz	1b
    212
    21378:
    214stex;	std	r12,8(r3)
    215r3_offset = 16
    216	or	r7,r7,r6
    21779:
    218stex;	std	r7,16(r3)
    219r3_offset = 24
    2205:	sHd	r12,r9,r11
    221	or	r12,r8,r12
    222stex;	std	r12,24(r3)
    223r3_offset = 32
    224	bne	6f
    225	li	r3,0
    226	blr
    2276:	cmpwi	cr1,r5,8
    228	addi	r3,r3,32
    229r3_offset = 0
    230	sLd	r9,r9,r10
    231	ble	cr1,7f
    232lex;	ld	r0,8(r4)
    233	sHd	r7,r0,r11
    234	or	r9,r7,r9
    2357:
    236	bf	cr7*4+1,1f
    237#ifdef __BIG_ENDIAN__
    238	rotldi	r9,r9,32
    239#endif
    240stex;	stw	r9,0(r3)
    241#ifdef __LITTLE_ENDIAN__
    242	rotrdi	r9,r9,32
    243#endif
    244	addi	r3,r3,4
    2451:	bf	cr7*4+2,2f
    246#ifdef __BIG_ENDIAN__
    247	rotldi	r9,r9,16
    248#endif
    249stex;	sth	r9,0(r3)
    250#ifdef __LITTLE_ENDIAN__
    251	rotrdi	r9,r9,16
    252#endif
    253	addi	r3,r3,2
    2542:	bf	cr7*4+3,3f
    255#ifdef __BIG_ENDIAN__
    256	rotldi	r9,r9,8
    257#endif
    258stex;	stb	r9,0(r3)
    259#ifdef __LITTLE_ENDIAN__
    260	rotrdi	r9,r9,8
    261#endif
    2623:	li	r3,0
    263	blr
    264
    265.Ldst_unaligned:
    266r3_offset = 0
    267	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
    268	subf	r5,r6,r5
    269	li	r7,0
    270	cmpldi	cr1,r5,16
    271	bf	cr7*4+3,1f
    272100:	EX_TABLE(100b, .Lld_exc_r7)
    273	lbz	r0,0(r4)
    274100:	EX_TABLE(100b, .Lst_exc_r7)
    275	stb	r0,0(r3)
    276	addi	r7,r7,1
    2771:	bf	cr7*4+2,2f
    278100:	EX_TABLE(100b, .Lld_exc_r7)
    279	lhzx	r0,r7,r4
    280100:	EX_TABLE(100b, .Lst_exc_r7)
    281	sthx	r0,r7,r3
    282	addi	r7,r7,2
    2832:	bf	cr7*4+1,3f
    284100:	EX_TABLE(100b, .Lld_exc_r7)
    285	lwzx	r0,r7,r4
    286100:	EX_TABLE(100b, .Lst_exc_r7)
    287	stwx	r0,r7,r3
    2883:	PPC_MTOCRF(0x01,r5)
    289	add	r4,r6,r4
    290	add	r3,r6,r3
    291	b	.Ldst_aligned
    292
    293.Lshort_copy:
    294r3_offset = 0
    295	bf	cr7*4+0,1f
    296lex;	lwz	r0,0(r4)
    297lex;	lwz	r9,4(r4)
    298	addi	r4,r4,8
    299stex;	stw	r0,0(r3)
    300stex;	stw	r9,4(r3)
    301	addi	r3,r3,8
    3021:	bf	cr7*4+1,2f
    303lex;	lwz	r0,0(r4)
    304	addi	r4,r4,4
    305stex;	stw	r0,0(r3)
    306	addi	r3,r3,4
    3072:	bf	cr7*4+2,3f
    308lex;	lhz	r0,0(r4)
    309	addi	r4,r4,2
    310stex;	sth	r0,0(r3)
    311	addi	r3,r3,2
    3123:	bf	cr7*4+3,4f
    313lex;	lbz	r0,0(r4)
    314stex;	stb	r0,0(r3)
    3154:	li	r3,0
    316	blr
    317
    318/*
    319 * exception handlers follow
    320 * we have to return the number of bytes not copied
    321 * for an exception on a load, we set the rest of the destination to 0
    322 * Note that the number of bytes of instructions for adjusting r3 needs
    323 * to equal the amount of the adjustment, due to the trick of using
    324 * .Lld_exc - r3_offset as the handler address.
    325 */
    326
    327.Lld_exc_r7:
    328	add	r3,r3,r7
    329	b	.Lld_exc
    330
    331	/* adjust by 24 */
    332	addi	r3,r3,8
    333	nop
    334	/* adjust by 16 */
    335	addi	r3,r3,8
    336	nop
    337	/* adjust by 8 */
    338	addi	r3,r3,8
    339	nop
    340
    341/*
    342 * Here we have had a fault on a load and r3 points to the first
    343 * unmodified byte of the destination.  We use the original arguments
    344 * and r3 to work out how much wasn't copied.  Since we load some
    345 * distance ahead of the stores, we continue copying byte-by-byte until
    346 * we hit the load fault again in order to copy as much as possible.
    347 */
    348.Lld_exc:
    349	ld	r6,-24(r1)
    350	ld	r4,-16(r1)
    351	ld	r5,-8(r1)
    352	subf	r6,r6,r3
    353	add	r4,r4,r6
    354	subf	r5,r6,r5	/* #bytes left to go */
    355
    356/*
    357 * first see if we can copy any more bytes before hitting another exception
    358 */
    359	mtctr	r5
    360r3_offset = 0
    361100:	EX_TABLE(100b, .Ldone)
    36243:	lbz	r0,0(r4)
    363	addi	r4,r4,1
    364stex;	stb	r0,0(r3)
    365	addi	r3,r3,1
    366	bdnz	43b
    367	li	r3,0		/* huh? all copied successfully this time? */
    368	blr
    369
    370/*
    371 * here we have trapped again, amount remaining is in ctr.
    372 */
    373.Ldone:
    374	mfctr	r3
    375	blr
    376
    377/*
    378 * exception handlers for stores: we need to work out how many bytes
    379 * weren't copied, and we may need to copy some more.
    380 * Note that the number of bytes of instructions for adjusting r3 needs
    381 * to equal the amount of the adjustment, due to the trick of using
    382 * .Lst_exc - r3_offset as the handler address.
    383 */
    384.Lst_exc_r7:
    385	add	r3,r3,r7
    386	b	.Lst_exc
    387
    388	/* adjust by 24 */
    389	addi	r3,r3,8
    390	nop
    391	/* adjust by 16 */
    392	addi	r3,r3,8
    393	nop
    394	/* adjust by 8 */
    395	addi	r3,r3,4
    396	/* adjust by 4 */
    397	addi	r3,r3,4
    398.Lst_exc:
    399	ld	r6,-24(r1)	/* original destination pointer */
    400	ld	r4,-16(r1)	/* original source pointer */
    401	ld	r5,-8(r1)	/* original number of bytes */
    402	add	r7,r6,r5
    403	/*
    404	 * If the destination pointer isn't 8-byte aligned,
    405	 * we may have got the exception as a result of a
    406	 * store that overlapped a page boundary, so we may be
    407	 * able to copy a few more bytes.
    408	 */
    40917:	andi.	r0,r3,7
    410	beq	19f
    411	subf	r8,r6,r3	/* #bytes copied */
    412100:	EX_TABLE(100b,19f)
    413	lbzx	r0,r8,r4
    414100:	EX_TABLE(100b,19f)
    415	stb	r0,0(r3)
    416	addi	r3,r3,1
    417	cmpld	r3,r7
    418	blt	17b
    41919:	subf	r3,r3,r7	/* #bytes not copied in r3 */
    420	blr
    421
    422/*
    423 * Routine to copy a whole page of data, optimized for POWER4.
    424 * On POWER4 it is more than 50% faster than the simple loop
    425 * above (following the .Ldst_aligned label).
    426 */
    427	.macro	exc
    428100:	EX_TABLE(100b, .Labort)
    429	.endm
    430.Lcopy_page_4K:
    431	std	r31,-32(1)
    432	std	r30,-40(1)
    433	std	r29,-48(1)
    434	std	r28,-56(1)
    435	std	r27,-64(1)
    436	std	r26,-72(1)
    437	std	r25,-80(1)
    438	std	r24,-88(1)
    439	std	r23,-96(1)
    440	std	r22,-104(1)
    441	std	r21,-112(1)
    442	std	r20,-120(1)
    443	li	r5,4096/32 - 1
    444	addi	r3,r3,-8
    445	li	r0,5
    4460:	addi	r5,r5,-24
    447	mtctr	r0
    448exc;	ld	r22,640(4)
    449exc;	ld	r21,512(4)
    450exc;	ld	r20,384(4)
    451exc;	ld	r11,256(4)
    452exc;	ld	r9,128(4)
    453exc;	ld	r7,0(4)
    454exc;	ld	r25,648(4)
    455exc;	ld	r24,520(4)
    456exc;	ld	r23,392(4)
    457exc;	ld	r10,264(4)
    458exc;	ld	r8,136(4)
    459exc;	ldu	r6,8(4)
    460	cmpwi	r5,24
    4611:
    462exc;	std	r22,648(3)
    463exc;	std	r21,520(3)
    464exc;	std	r20,392(3)
    465exc;	std	r11,264(3)
    466exc;	std	r9,136(3)
    467exc;	std	r7,8(3)
    468exc;	ld	r28,648(4)
    469exc;	ld	r27,520(4)
    470exc;	ld	r26,392(4)
    471exc;	ld	r31,264(4)
    472exc;	ld	r30,136(4)
    473exc;	ld	r29,8(4)
    474exc;	std	r25,656(3)
    475exc;	std	r24,528(3)
    476exc;	std	r23,400(3)
    477exc;	std	r10,272(3)
    478exc;	std	r8,144(3)
    479exc;	std	r6,16(3)
    480exc;	ld	r22,656(4)
    481exc;	ld	r21,528(4)
    482exc;	ld	r20,400(4)
    483exc;	ld	r11,272(4)
    484exc;	ld	r9,144(4)
    485exc;	ld	r7,16(4)
    486exc;	std	r28,664(3)
    487exc;	std	r27,536(3)
    488exc;	std	r26,408(3)
    489exc;	std	r31,280(3)
    490exc;	std	r30,152(3)
    491exc;	stdu	r29,24(3)
    492exc;	ld	r25,664(4)
    493exc;	ld	r24,536(4)
    494exc;	ld	r23,408(4)
    495exc;	ld	r10,280(4)
    496exc;	ld	r8,152(4)
    497exc;	ldu	r6,24(4)
    498	bdnz	1b
    499exc;	std	r22,648(3)
    500exc;	std	r21,520(3)
    501exc;	std	r20,392(3)
    502exc;	std	r11,264(3)
    503exc;	std	r9,136(3)
    504exc;	std	r7,8(3)
    505	addi	r4,r4,640
    506	addi	r3,r3,648
    507	bge	0b
    508	mtctr	r5
    509exc;	ld	r7,0(4)
    510exc;	ld	r8,8(4)
    511exc;	ldu	r9,16(4)
    5123:
    513exc;	ld	r10,8(4)
    514exc;	std	r7,8(3)
    515exc;	ld	r7,16(4)
    516exc;	std	r8,16(3)
    517exc;	ld	r8,24(4)
    518exc;	std	r9,24(3)
    519exc;	ldu	r9,32(4)
    520exc;	stdu	r10,32(3)
    521	bdnz	3b
    5224:
    523exc;	ld	r10,8(4)
    524exc;	std	r7,8(3)
    525exc;	std	r8,16(3)
    526exc;	std	r9,24(3)
    527exc;	std	r10,32(3)
    5289:	ld	r20,-120(1)
    529	ld	r21,-112(1)
    530	ld	r22,-104(1)
    531	ld	r23,-96(1)
    532	ld	r24,-88(1)
    533	ld	r25,-80(1)
    534	ld	r26,-72(1)
    535	ld	r27,-64(1)
    536	ld	r28,-56(1)
    537	ld	r29,-48(1)
    538	ld	r30,-40(1)
    539	ld	r31,-32(1)
    540	li	r3,0
    541	blr
    542
    543/*
    544 * on an exception, reset to the beginning and jump back into the
    545 * standard __copy_tofrom_user
    546 */
    547.Labort:
    548	ld	r20,-120(1)
    549	ld	r21,-112(1)
    550	ld	r22,-104(1)
    551	ld	r23,-96(1)
    552	ld	r24,-88(1)
    553	ld	r25,-80(1)
    554	ld	r26,-72(1)
    555	ld	r27,-64(1)
    556	ld	r28,-56(1)
    557	ld	r29,-48(1)
    558	ld	r30,-40(1)
    559	ld	r31,-32(1)
    560	ld	r3,-24(r1)
    561	ld	r4,-16(r1)
    562	li	r5,4096
    563	b	.Ldst_aligned
    564EXPORT_SYMBOL(__copy_tofrom_user)