cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

memcpy-archs.S (4405B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
      4 */
      5
      6#include <linux/linkage.h>
      7
      8#ifdef __LITTLE_ENDIAN__
      9# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
     10# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
     11# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
     12# define MERGE_2(RX,RY,IMM)
     13# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
     14# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
     15#else
     16# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
     17# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
     18# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
     19# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
     20# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
     21# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
     22#endif
     23
     24#ifdef CONFIG_ARC_HAS_LL64
     25# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
     26# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
     27# define ZOLSHFT		5
     28# define ZOLAND			0x1F
     29#else
     30# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
     31# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
     32# define ZOLSHFT		4
     33# define ZOLAND			0xF
     34#endif
     35
     36ENTRY_CFI(memcpy)
     37	mov.f	0, r2
     38;;; if size is zero
     39	jz.d	[blink]
     40	mov	r3, r0		; don;t clobber ret val
     41
     42;;; if size <= 8
     43	cmp	r2, 8
     44	bls.d	@.Lsmallchunk
     45	mov.f	lp_count, r2
     46
     47	and.f	r4, r0, 0x03
     48	rsub	lp_count, r4, 4
     49	lpnz	@.Laligndestination
     50	;; LOOP BEGIN
     51	ldb.ab	r5, [r1,1]
     52	sub	r2, r2, 1
     53	stb.ab	r5, [r3,1]
     54.Laligndestination:
     55
     56;;; Check the alignment of the source
     57	and.f	r4, r1, 0x03
     58	bnz.d	@.Lsourceunaligned
     59
     60;;; CASE 0: Both source and destination are 32bit aligned
     61;;; Convert len to Dwords, unfold x4
     62	lsr.f	lp_count, r2, ZOLSHFT
     63	lpnz	@.Lcopy32_64bytes
     64	;; LOOP START
     65	LOADX (r6, r1)
     66	LOADX (r8, r1)
     67	LOADX (r10, r1)
     68	LOADX (r4, r1)
     69	STOREX (r6, r3)
     70	STOREX (r8, r3)
     71	STOREX (r10, r3)
     72	STOREX (r4, r3)
     73.Lcopy32_64bytes:
     74
     75	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
     76.Lsmallchunk:
     77	lpnz	@.Lcopyremainingbytes
     78	;; LOOP START
     79	ldb.ab	r5, [r1,1]
     80	stb.ab	r5, [r3,1]
     81.Lcopyremainingbytes:
     82
     83	j	[blink]
     84;;; END CASE 0
     85
     86.Lsourceunaligned:
     87	cmp	r4, 2
     88	beq.d	@.LunalignedOffby2
     89	sub	r2, r2, 1
     90
     91	bhi.d	@.LunalignedOffby3
     92	ldb.ab	r5, [r1, 1]
     93
     94;;; CASE 1: The source is unaligned, off by 1
     95	;; Hence I need to read 1 byte for a 16bit alignment
     96	;; and 2bytes to reach 32bit alignment
     97	ldh.ab	r6, [r1, 2]
     98	sub	r2, r2, 2
     99	;; Convert to words, unfold x2
    100	lsr.f	lp_count, r2, 3
    101	MERGE_1 (r6, r6, 8)
    102	MERGE_2 (r5, r5, 24)
    103	or	r5, r5, r6
    104
    105	;; Both src and dst are aligned
    106	lpnz	@.Lcopy8bytes_1
    107	;; LOOP START
    108	ld.ab	r6, [r1, 4]
    109	ld.ab	r8, [r1,4]
    110
    111	SHIFT_1	(r7, r6, 24)
    112	or	r7, r7, r5
    113	SHIFT_2	(r5, r6, 8)
    114
    115	SHIFT_1	(r9, r8, 24)
    116	or	r9, r9, r5
    117	SHIFT_2	(r5, r8, 8)
    118
    119	st.ab	r7, [r3, 4]
    120	st.ab	r9, [r3, 4]
    121.Lcopy8bytes_1:
    122
    123	;; Write back the remaining 16bits
    124	EXTRACT_1 (r6, r5, 16)
    125	sth.ab	r6, [r3, 2]
    126	;; Write back the remaining 8bits
    127	EXTRACT_2 (r5, r5, 16)
    128	stb.ab	r5, [r3, 1]
    129
    130	and.f	lp_count, r2, 0x07 ;Last 8bytes
    131	lpnz	@.Lcopybytewise_1
    132	;; LOOP START
    133	ldb.ab	r6, [r1,1]
    134	stb.ab	r6, [r3,1]
    135.Lcopybytewise_1:
    136	j	[blink]
    137
    138.LunalignedOffby2:
    139;;; CASE 2: The source is unaligned, off by 2
    140	ldh.ab	r5, [r1, 2]
    141	sub	r2, r2, 1
    142
    143	;; Both src and dst are aligned
    144	;; Convert to words, unfold x2
    145	lsr.f	lp_count, r2, 3
    146#ifdef __BIG_ENDIAN__
    147	asl.nz	r5, r5, 16
    148#endif
    149	lpnz	@.Lcopy8bytes_2
    150	;; LOOP START
    151	ld.ab	r6, [r1, 4]
    152	ld.ab	r8, [r1,4]
    153
    154	SHIFT_1	(r7, r6, 16)
    155	or	r7, r7, r5
    156	SHIFT_2	(r5, r6, 16)
    157
    158	SHIFT_1	(r9, r8, 16)
    159	or	r9, r9, r5
    160	SHIFT_2	(r5, r8, 16)
    161
    162	st.ab	r7, [r3, 4]
    163	st.ab	r9, [r3, 4]
    164.Lcopy8bytes_2:
    165
    166#ifdef __BIG_ENDIAN__
    167	lsr.nz	r5, r5, 16
    168#endif
    169	sth.ab	r5, [r3, 2]
    170
    171	and.f	lp_count, r2, 0x07 ;Last 8bytes
    172	lpnz	@.Lcopybytewise_2
    173	;; LOOP START
    174	ldb.ab	r6, [r1,1]
    175	stb.ab	r6, [r3,1]
    176.Lcopybytewise_2:
    177	j	[blink]
    178
    179.LunalignedOffby3:
    180;;; CASE 3: The source is unaligned, off by 3
    181;;; Hence, I need to read 1byte for achieve the 32bit alignment
    182
    183	;; Both src and dst are aligned
    184	;; Convert to words, unfold x2
    185	lsr.f	lp_count, r2, 3
    186#ifdef __BIG_ENDIAN__
    187	asl.ne	r5, r5, 24
    188#endif
    189	lpnz	@.Lcopy8bytes_3
    190	;; LOOP START
    191	ld.ab	r6, [r1, 4]
    192	ld.ab	r8, [r1,4]
    193
    194	SHIFT_1	(r7, r6, 8)
    195	or	r7, r7, r5
    196	SHIFT_2	(r5, r6, 24)
    197
    198	SHIFT_1	(r9, r8, 8)
    199	or	r9, r9, r5
    200	SHIFT_2	(r5, r8, 24)
    201
    202	st.ab	r7, [r3, 4]
    203	st.ab	r9, [r3, 4]
    204.Lcopy8bytes_3:
    205
    206#ifdef __BIG_ENDIAN__
    207	lsr.nz	r5, r5, 24
    208#endif
    209	stb.ab	r5, [r3, 1]
    210
    211	and.f	lp_count, r2, 0x07 ;Last 8bytes
    212	lpnz	@.Lcopybytewise_3
    213	;; LOOP START
    214	ldb.ab	r6, [r1,1]
    215	stb.ab	r6, [r3,1]
    216.Lcopybytewise_3:
    217	j	[blink]
    218
    219END_CFI(memcpy)