cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

copy_mc_64.S (4168B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * Copyright (C) IBM Corporation, 2011
      4 * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
      5 * Author - Balbir Singh <bsingharora@gmail.com>
      6 */
      7#include <asm/ppc_asm.h>
      8#include <asm/errno.h>
      9#include <asm/export.h>
     10
     11	.macro err1
     12100:
     13	EX_TABLE(100b,.Ldo_err1)
     14	.endm
     15
     16	.macro err2
     17200:
     18	EX_TABLE(200b,.Ldo_err2)
     19	.endm
     20
     21	.macro err3
     22300:	EX_TABLE(300b,.Ldone)
     23	.endm
     24
     25.Ldo_err2:
     26	ld	r22,STK_REG(R22)(r1)
     27	ld	r21,STK_REG(R21)(r1)
     28	ld	r20,STK_REG(R20)(r1)
     29	ld	r19,STK_REG(R19)(r1)
     30	ld	r18,STK_REG(R18)(r1)
     31	ld	r17,STK_REG(R17)(r1)
     32	ld	r16,STK_REG(R16)(r1)
     33	ld	r15,STK_REG(R15)(r1)
     34	ld	r14,STK_REG(R14)(r1)
     35	addi	r1,r1,STACKFRAMESIZE
     36.Ldo_err1:
     37	/* Do a byte by byte copy to get the exact remaining size */
     38	mtctr	r7
     3946:
     40err3;	lbz	r0,0(r4)
     41	addi	r4,r4,1
     42err3;	stb	r0,0(r3)
     43	addi	r3,r3,1
     44	bdnz	46b
     45	li	r3,0
     46	blr
     47
     48.Ldone:
     49	mfctr	r3
     50	blr
     51
     52
     53_GLOBAL(copy_mc_generic)
     54	mr	r7,r5
     55	cmpldi	r5,16
     56	blt	.Lshort_copy
     57
     58.Lcopy:
     59	/* Get the source 8B aligned */
     60	neg	r6,r4
     61	mtocrf	0x01,r6
     62	clrldi	r6,r6,(64-3)
     63
     64	bf	cr7*4+3,1f
     65err1;	lbz	r0,0(r4)
     66	addi	r4,r4,1
     67err1;	stb	r0,0(r3)
     68	addi	r3,r3,1
     69	subi	r7,r7,1
     70
     711:	bf	cr7*4+2,2f
     72err1;	lhz	r0,0(r4)
     73	addi	r4,r4,2
     74err1;	sth	r0,0(r3)
     75	addi	r3,r3,2
     76	subi	r7,r7,2
     77
     782:	bf	cr7*4+1,3f
     79err1;	lwz	r0,0(r4)
     80	addi	r4,r4,4
     81err1;	stw	r0,0(r3)
     82	addi	r3,r3,4
     83	subi	r7,r7,4
     84
     853:	sub	r5,r5,r6
     86	cmpldi	r5,128
     87
     88	mflr	r0
     89	stdu	r1,-STACKFRAMESIZE(r1)
     90	std	r14,STK_REG(R14)(r1)
     91	std	r15,STK_REG(R15)(r1)
     92	std	r16,STK_REG(R16)(r1)
     93	std	r17,STK_REG(R17)(r1)
     94	std	r18,STK_REG(R18)(r1)
     95	std	r19,STK_REG(R19)(r1)
     96	std	r20,STK_REG(R20)(r1)
     97	std	r21,STK_REG(R21)(r1)
     98	std	r22,STK_REG(R22)(r1)
     99	std	r0,STACKFRAMESIZE+16(r1)
    100
    101	blt	5f
    102	srdi	r6,r5,7
    103	mtctr	r6
    104
    105	/* Now do cacheline (128B) sized loads and stores. */
    106	.align	5
    1074:
    108err2;	ld	r0,0(r4)
    109err2;	ld	r6,8(r4)
    110err2;	ld	r8,16(r4)
    111err2;	ld	r9,24(r4)
    112err2;	ld	r10,32(r4)
    113err2;	ld	r11,40(r4)
    114err2;	ld	r12,48(r4)
    115err2;	ld	r14,56(r4)
    116err2;	ld	r15,64(r4)
    117err2;	ld	r16,72(r4)
    118err2;	ld	r17,80(r4)
    119err2;	ld	r18,88(r4)
    120err2;	ld	r19,96(r4)
    121err2;	ld	r20,104(r4)
    122err2;	ld	r21,112(r4)
    123err2;	ld	r22,120(r4)
    124	addi	r4,r4,128
    125err2;	std	r0,0(r3)
    126err2;	std	r6,8(r3)
    127err2;	std	r8,16(r3)
    128err2;	std	r9,24(r3)
    129err2;	std	r10,32(r3)
    130err2;	std	r11,40(r3)
    131err2;	std	r12,48(r3)
    132err2;	std	r14,56(r3)
    133err2;	std	r15,64(r3)
    134err2;	std	r16,72(r3)
    135err2;	std	r17,80(r3)
    136err2;	std	r18,88(r3)
    137err2;	std	r19,96(r3)
    138err2;	std	r20,104(r3)
    139err2;	std	r21,112(r3)
    140err2;	std	r22,120(r3)
    141	addi	r3,r3,128
    142	subi	r7,r7,128
    143	bdnz	4b
    144
    145	clrldi	r5,r5,(64-7)
    146
    147	/* Up to 127B to go */
    1485:	srdi	r6,r5,4
    149	mtocrf	0x01,r6
    150
    1516:	bf	cr7*4+1,7f
    152err2;	ld	r0,0(r4)
    153err2;	ld	r6,8(r4)
    154err2;	ld	r8,16(r4)
    155err2;	ld	r9,24(r4)
    156err2;	ld	r10,32(r4)
    157err2;	ld	r11,40(r4)
    158err2;	ld	r12,48(r4)
    159err2;	ld	r14,56(r4)
    160	addi	r4,r4,64
    161err2;	std	r0,0(r3)
    162err2;	std	r6,8(r3)
    163err2;	std	r8,16(r3)
    164err2;	std	r9,24(r3)
    165err2;	std	r10,32(r3)
    166err2;	std	r11,40(r3)
    167err2;	std	r12,48(r3)
    168err2;	std	r14,56(r3)
    169	addi	r3,r3,64
    170	subi	r7,r7,64
    171
    1727:	ld	r14,STK_REG(R14)(r1)
    173	ld	r15,STK_REG(R15)(r1)
    174	ld	r16,STK_REG(R16)(r1)
    175	ld	r17,STK_REG(R17)(r1)
    176	ld	r18,STK_REG(R18)(r1)
    177	ld	r19,STK_REG(R19)(r1)
    178	ld	r20,STK_REG(R20)(r1)
    179	ld	r21,STK_REG(R21)(r1)
    180	ld	r22,STK_REG(R22)(r1)
    181	addi	r1,r1,STACKFRAMESIZE
    182
    183	/* Up to 63B to go */
    184	bf	cr7*4+2,8f
    185err1;	ld	r0,0(r4)
    186err1;	ld	r6,8(r4)
    187err1;	ld	r8,16(r4)
    188err1;	ld	r9,24(r4)
    189	addi	r4,r4,32
    190err1;	std	r0,0(r3)
    191err1;	std	r6,8(r3)
    192err1;	std	r8,16(r3)
    193err1;	std	r9,24(r3)
    194	addi	r3,r3,32
    195	subi	r7,r7,32
    196
    197	/* Up to 31B to go */
    1988:	bf	cr7*4+3,9f
    199err1;	ld	r0,0(r4)
    200err1;	ld	r6,8(r4)
    201	addi	r4,r4,16
    202err1;	std	r0,0(r3)
    203err1;	std	r6,8(r3)
    204	addi	r3,r3,16
    205	subi	r7,r7,16
    206
    2079:	clrldi	r5,r5,(64-4)
    208
    209	/* Up to 15B to go */
    210.Lshort_copy:
    211	mtocrf	0x01,r5
    212	bf	cr7*4+0,12f
    213err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
    214err1;	lwz	r6,4(r4)
    215	addi	r4,r4,8
    216err1;	stw	r0,0(r3)
    217err1;	stw	r6,4(r3)
    218	addi	r3,r3,8
    219	subi	r7,r7,8
    220
    22112:	bf	cr7*4+1,13f
    222err1;	lwz	r0,0(r4)
    223	addi	r4,r4,4
    224err1;	stw	r0,0(r3)
    225	addi	r3,r3,4
    226	subi	r7,r7,4
    227
    22813:	bf	cr7*4+2,14f
    229err1;	lhz	r0,0(r4)
    230	addi	r4,r4,2
    231err1;	sth	r0,0(r3)
    232	addi	r3,r3,2
    233	subi	r7,r7,2
    234
    23514:	bf	cr7*4+3,15f
    236err1;	lbz	r0,0(r4)
    237err1;	stb	r0,0(r3)
    238
    23915:	li	r3,0
    240	blr
    241
    242EXPORT_SYMBOL_GPL(copy_mc_generic);