cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

memcpy_64.S (4272B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
      4 */
      5#include <asm/processor.h>
      6#include <asm/ppc_asm.h>
      7#include <asm/export.h>
      8#include <asm/asm-compat.h>
      9#include <asm/feature-fixups.h>
     10#include <asm/kasan.h>
     11
     12#ifndef SELFTEST_CASE
     13/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
     14#define SELFTEST_CASE	0
     15#endif
     16
     17	.align	7
     18_GLOBAL_TOC_KASAN(memcpy)
     19BEGIN_FTR_SECTION
     20#ifdef __LITTLE_ENDIAN__
     21	cmpdi	cr7,r5,0
     22#else
     23	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
     24#endif
     25FTR_SECTION_ELSE
     26#ifdef CONFIG_PPC_BOOK3S_64
     27	b	memcpy_power7
     28#endif
     29ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
     30#ifdef __LITTLE_ENDIAN__
     31	/* dumb little-endian memcpy that will get replaced at runtime */
     32	addi r9,r3,-1
     33	addi r4,r4,-1
     34	beqlr cr7
     35	mtctr r5
     361:	lbzu r10,1(r4)
     37	stbu r10,1(r9)
     38	bdnz 1b
     39	blr
     40#else
     41	PPC_MTOCRF(0x01,r5)
     42	cmpldi	cr1,r5,16
     43	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
     44	andi.	r6,r6,7
     45	dcbt	0,r4
     46	blt	cr1,.Lshort_copy
     47/* Below we want to nop out the bne if we're on a CPU that has the
     48   CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
     49   cleared.
     50   At the time of writing the only CPU that has this combination of bits
     51   set is Power6. */
     52test_feature = (SELFTEST_CASE == 1)
     53BEGIN_FTR_SECTION
     54	nop
     55FTR_SECTION_ELSE
     56	bne	.Ldst_unaligned
     57ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
     58                    CPU_FTR_UNALIGNED_LD_STD)
     59.Ldst_aligned:
     60	addi	r3,r3,-16
     61test_feature = (SELFTEST_CASE == 0)
     62BEGIN_FTR_SECTION
     63	andi.	r0,r4,7
     64	bne	.Lsrc_unaligned
     65END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
     66	srdi	r7,r5,4
     67	ld	r9,0(r4)
     68	addi	r4,r4,-8
     69	mtctr	r7
     70	andi.	r5,r5,7
     71	bf	cr7*4+0,2f
     72	addi	r3,r3,8
     73	addi	r4,r4,8
     74	mr	r8,r9
     75	blt	cr1,3f
     761:	ld	r9,8(r4)
     77	std	r8,8(r3)
     782:	ldu	r8,16(r4)
     79	stdu	r9,16(r3)
     80	bdnz	1b
     813:	std	r8,8(r3)
     82	beq	3f
     83	addi	r3,r3,16
     84.Ldo_tail:
     85	bf	cr7*4+1,1f
     86	lwz	r9,8(r4)
     87	addi	r4,r4,4
     88	stw	r9,0(r3)
     89	addi	r3,r3,4
     901:	bf	cr7*4+2,2f
     91	lhz	r9,8(r4)
     92	addi	r4,r4,2
     93	sth	r9,0(r3)
     94	addi	r3,r3,2
     952:	bf	cr7*4+3,3f
     96	lbz	r9,8(r4)
     97	stb	r9,0(r3)
     983:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
     99	blr
    100
    101.Lsrc_unaligned:
    102	srdi	r6,r5,3
    103	addi	r5,r5,-16
    104	subf	r4,r0,r4
    105	srdi	r7,r5,4
    106	sldi	r10,r0,3
    107	cmpdi	cr6,r6,3
    108	andi.	r5,r5,7
    109	mtctr	r7
    110	subfic	r11,r10,64
    111	add	r5,r5,r0
    112
    113	bt	cr7*4+0,0f
    114
    115	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
    116	ld	r0,8(r4)
    117	sld	r6,r9,r10
    118	ldu	r9,16(r4)
    119	srd	r7,r0,r11
    120	sld	r8,r0,r10
    121	or	r7,r7,r6
    122	blt	cr6,4f
    123	ld	r0,8(r4)
    124	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
    125	b	2f
    126
    1270:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
    128	ldu	r9,8(r4)
    129	sld	r8,r0,r10
    130	addi	r3,r3,-8
    131	blt	cr6,5f
    132	ld	r0,8(r4)
    133	srd	r12,r9,r11
    134	sld	r6,r9,r10
    135	ldu	r9,16(r4)
    136	or	r12,r8,r12
    137	srd	r7,r0,r11
    138	sld	r8,r0,r10
    139	addi	r3,r3,16
    140	beq	cr6,3f
    141
    142	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
    1431:	or	r7,r7,r6
    144	ld	r0,8(r4)
    145	std	r12,8(r3)
    1462:	srd	r12,r9,r11
    147	sld	r6,r9,r10
    148	ldu	r9,16(r4)
    149	or	r12,r8,r12
    150	stdu	r7,16(r3)
    151	srd	r7,r0,r11
    152	sld	r8,r0,r10
    153	bdnz	1b
    154
    1553:	std	r12,8(r3)
    156	or	r7,r7,r6
    1574:	std	r7,16(r3)
    1585:	srd	r12,r9,r11
    159	or	r12,r8,r12
    160	std	r12,24(r3)
    161	beq	4f
    162	cmpwi	cr1,r5,8
    163	addi	r3,r3,32
    164	sld	r9,r9,r10
    165	ble	cr1,6f
    166	ld	r0,8(r4)
    167	srd	r7,r0,r11
    168	or	r9,r7,r9
    1696:
    170	bf	cr7*4+1,1f
    171	rotldi	r9,r9,32
    172	stw	r9,0(r3)
    173	addi	r3,r3,4
    1741:	bf	cr7*4+2,2f
    175	rotldi	r9,r9,16
    176	sth	r9,0(r3)
    177	addi	r3,r3,2
    1782:	bf	cr7*4+3,3f
    179	rotldi	r9,r9,8
    180	stb	r9,0(r3)
    1813:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
    182	blr
    183
    184.Ldst_unaligned:
    185	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
    186	subf	r5,r6,r5
    187	li	r7,0
    188	cmpldi	cr1,r5,16
    189	bf	cr7*4+3,1f
    190	lbz	r0,0(r4)
    191	stb	r0,0(r3)
    192	addi	r7,r7,1
    1931:	bf	cr7*4+2,2f
    194	lhzx	r0,r7,r4
    195	sthx	r0,r7,r3
    196	addi	r7,r7,2
    1972:	bf	cr7*4+1,3f
    198	lwzx	r0,r7,r4
    199	stwx	r0,r7,r3
    2003:	PPC_MTOCRF(0x01,r5)
    201	add	r4,r6,r4
    202	add	r3,r6,r3
    203	b	.Ldst_aligned
    204
    205.Lshort_copy:
    206	bf	cr7*4+0,1f
    207	lwz	r0,0(r4)
    208	lwz	r9,4(r4)
    209	addi	r4,r4,8
    210	stw	r0,0(r3)
    211	stw	r9,4(r3)
    212	addi	r3,r3,8
    2131:	bf	cr7*4+1,2f
    214	lwz	r0,0(r4)
    215	addi	r4,r4,4
    216	stw	r0,0(r3)
    217	addi	r3,r3,4
    2182:	bf	cr7*4+2,3f
    219	lhz	r0,0(r4)
    220	addi	r4,r4,2
    221	sth	r0,0(r3)
    222	addi	r3,r3,2
    2233:	bf	cr7*4+3,4f
    224	lbz	r0,0(r4)
    225	stb	r0,0(r3)
    2264:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
    227	blr
    228#endif
    229EXPORT_SYMBOL(memcpy)
    230EXPORT_SYMBOL_KASAN(memcpy)