cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

csum_copy.S (7141B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/* csum_copy.S: Checksum+copy code for sparc64
      3 *
      4 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
      5 */
      6
      7#include <asm/export.h>
      8
      9#ifdef __KERNEL__
     10#define GLOBAL_SPARE	%g7
     11#else
     12#define GLOBAL_SPARE	%g5
     13#endif
     14
     15#ifndef EX_LD
     16#define EX_LD(x)	x
     17#endif
     18
     19#ifndef EX_ST
     20#define EX_ST(x)	x
     21#endif
     22
     23#ifndef EX_RETVAL
     24#define EX_RETVAL(x)	x
     25#endif
     26
     27#ifndef LOAD
     28#define LOAD(type,addr,dest)	type [addr], dest
     29#endif
     30
     31#ifndef STORE
     32#define STORE(type,src,addr)	type src, [addr]
     33#endif
     34
     35#ifndef FUNC_NAME
     36#define FUNC_NAME	csum_partial_copy_nocheck
     37#endif
     38
     39	.register	%g2, #scratch
     40	.register	%g3, #scratch
     41
     42	.text
     43
     4490:
     45	/* We checked for zero length already, so there must be
     46	 * at least one byte.
     47	 */
     48	be,pt		%icc, 1f
     49	 nop
     50	EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
     51	add		%o0, 1, %o0
     52	sub		%o2, 1, %o2
     53	EX_ST(STORE(stb, %o4, %o1 + 0x00))
     54	add		%o1, 1, %o1
     551:	andcc		%o0, 0x2, %g0
     56	be,pn		%icc, 80f
     57	 cmp		%o2, 2
     58	blu,pn		%icc, 60f
     59	 nop
     60	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
     61	add		%o0, 2, %o0
     62	sub		%o2, 2, %o2
     63	EX_ST(STORE(sth, %o5, %o1 + 0x00))
     64	add		%o1, 2, %o1
     65	ba,pt		%xcc, 80f
     66	 add		%o5, %o4, %o4
     67
     68	.globl		FUNC_NAME
     69	.type		FUNC_NAME,#function
     70	EXPORT_SYMBOL(FUNC_NAME)
     71FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len */
     72	LOAD(prefetch, %o0 + 0x000, #n_reads)
     73	xor		%o0, %o1, %g1
     74	mov		-1, %o3
     75	clr		%o4
     76	andcc		%g1, 0x3, %g0
     77	bne,pn		%icc, 95f
     78	 LOAD(prefetch, %o0 + 0x040, #n_reads)
     79	
     80	brz,pn		%o2, 70f
     81	 andcc		%o0, 0x3, %g0
     82
     83	/* We "remember" whether the lowest bit in the address
     84	 * was set in GLOBAL_SPARE.  Because if it is, we have to swap
     85	 * upper and lower 8 bit fields of the sum we calculate.
     86	*/
     87	bne,pn		%icc, 90b
     88	 andcc		%o0, 0x1, GLOBAL_SPARE
     89
     9080:
     91	LOAD(prefetch, %o0 + 0x080, #n_reads)
     92	andncc		%o2, 0x3f, %g3
     93
     94	LOAD(prefetch, %o0 + 0x0c0, #n_reads)
     95	sub		%o2, %g3, %o2
     96	brz,pn		%g3, 2f
     97	 LOAD(prefetch, %o0 + 0x100, #n_reads)
     98
     99	/* So that we don't need to use the non-pairing
    100	 * add-with-carry instructions we accumulate 32-bit
    101	 * values into a 64-bit register.  At the end of the
    102	 * loop we fold it down to 32-bits and so on.
    103	 */
    104	ba,pt		%xcc, 1f
    105	LOAD(prefetch, %o0 + 0x140, #n_reads)
    106
    107	.align		32
    1081:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
    109	EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
    110	EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
    111	add		%o4, %o5, %o4
    112	EX_ST(STORE(stw, %o5, %o1 + 0x00))
    113	EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
    114	add		%o4, %g1, %o4
    115	EX_ST(STORE(stw, %g1, %o1 + 0x04))
    116	EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
    117	add		%o4, %g2, %o4
    118	EX_ST(STORE(stw, %g2, %o1 + 0x08))
    119	EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
    120	add		%o4, %o5, %o4
    121	EX_ST(STORE(stw, %o5, %o1 + 0x0c))
    122	EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
    123	add		%o4, %g1, %o4
    124	EX_ST(STORE(stw, %g1, %o1 + 0x10))
    125	EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
    126	add		%o4, %g2, %o4
    127	EX_ST(STORE(stw, %g2, %o1 + 0x14))
    128	EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
    129	add		%o4, %o5, %o4
    130	EX_ST(STORE(stw, %o5, %o1 + 0x18))
    131	EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
    132	add		%o4, %g1, %o4
    133	EX_ST(STORE(stw, %g1, %o1 + 0x1c))
    134	EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
    135	add		%o4, %g2, %o4
    136	EX_ST(STORE(stw, %g2, %o1 + 0x20))
    137	EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
    138	add		%o4, %o5, %o4
    139	EX_ST(STORE(stw, %o5, %o1 + 0x24))
    140	EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
    141	add		%o4, %g1, %o4
    142	EX_ST(STORE(stw, %g1, %o1 + 0x28))
    143	EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
    144	add		%o4, %g2, %o4
    145	EX_ST(STORE(stw, %g2, %o1 + 0x2c))
    146	EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
    147	add		%o4, %o5, %o4
    148	EX_ST(STORE(stw, %o5, %o1 + 0x30))
    149	EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
    150	add		%o4, %g1, %o4
    151	EX_ST(STORE(stw, %g1, %o1 + 0x34))
    152	LOAD(prefetch, %o0 + 0x180, #n_reads)
    153	add		%o4, %g2, %o4
    154	EX_ST(STORE(stw, %g2, %o1 + 0x38))
    155	subcc		%g3, 0x40, %g3
    156	add		%o0, 0x40, %o0
    157	add		%o4, %o5, %o4
    158	EX_ST(STORE(stw, %o5, %o1 + 0x3c))
    159	bne,pt		%icc, 1b
    160	 add		%o1, 0x40, %o1
    161
    1622:	and		%o2, 0x3c, %g3
    163	brz,pn		%g3, 2f
    164	 sub		%o2, %g3, %o2
    1651:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
    166	subcc		%g3, 0x4, %g3
    167	add		%o0, 0x4, %o0
    168	add		%o4, %o5, %o4
    169	EX_ST(STORE(stw, %o5, %o1 + 0x00))
    170	bne,pt		%icc, 1b
    171	 add		%o1, 0x4, %o1
    172
    1732:
    174	/* fold 64-->32 */
    175	srlx		%o4, 32, %o5
    176	srl		%o4, 0, %o4
    177	add		%o4, %o5, %o4
    178	srlx		%o4, 32, %o5
    179	srl		%o4, 0, %o4
    180	add		%o4, %o5, %o4
    181
    182	/* fold 32-->16 */
    183	sethi		%hi(0xffff0000), %g1
    184	srl		%o4, 16, %o5
    185	andn		%o4, %g1, %g2
    186	add		%o5, %g2, %o4
    187	srl		%o4, 16, %o5
    188	andn		%o4, %g1, %g2
    189	add		%o5, %g2, %o4
    190
    19160:
    192	/* %o4 has the 16-bit sum we have calculated so-far.  */
    193	cmp		%o2, 2
    194	blu,pt		%icc, 1f
    195	 nop
    196	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
    197	sub		%o2, 2, %o2
    198	add		%o0, 2, %o0
    199	add		%o4, %o5, %o4
    200	EX_ST(STORE(sth, %o5, %o1 + 0x00))
    201	add		%o1, 0x2, %o1
    2021:	brz,pt		%o2, 1f
    203	 nop
    204	EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
    205	sub		%o2, 1, %o2
    206	add		%o0, 1, %o0
    207	EX_ST(STORE(stb, %o5, %o1 + 0x00))
    208	sllx		%o5, 8, %o5
    209	add		%o1, 1, %o1
    210	add		%o4, %o5, %o4
    2111:
    212	/* fold 32-->16 */
    213	sethi		%hi(0xffff0000), %g1
    214	srl		%o4, 16, %o5
    215	andn		%o4, %g1, %g2
    216	add		%o5, %g2, %o4
    217	srl		%o4, 16, %o5
    218	andn		%o4, %g1, %g2
    219	add		%o5, %g2, %o4
    220
    2211:	brz,pt		GLOBAL_SPARE, 1f
    222	 nop
    223
    224	/* We started with an odd byte, byte-swap the result.  */
    225	srl		%o4, 8, %o5
    226	and		%o4, 0xff, %g1
    227	sll		%g1, 8, %g1
    228	or		%o5, %g1, %o4
    229
    2301:	addcc		%o3, %o4, %o3
    231	addc		%g0, %o3, %o3
    232
    23370:
    234	retl
    235	 srl		%o3, 0, %o0
    236
    23795:	mov		0, GLOBAL_SPARE
    238	brlez,pn	%o2, 4f
    239	 andcc		%o0, 1, %o5		
    240	be,a,pt		%icc, 1f
    241	 srl		%o2, 1, %g1		
    242	sub		%o2, 1, %o2	
    243	EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
    244	add		%o0, 1, %o0	
    245	EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
    246	srl		%o2, 1, %g1
    247	add		%o1, 1, %o1
    2481:	brz,a,pn	%g1, 3f
    249	 andcc		%o2, 1, %g0
    250	andcc		%o0, 2, %g0	
    251	be,a,pt		%icc, 1f
    252	 srl		%g1, 1, %g1
    253	EX_LD(LOAD(lduh, %o0, %o4))
    254	sub		%o2, 2, %o2	
    255	srl		%o4, 8, %g2
    256	sub		%g1, 1, %g1	
    257	EX_ST(STORE(stb, %g2, %o1))
    258	add		%o4, GLOBAL_SPARE, GLOBAL_SPARE
    259	EX_ST(STORE(stb, %o4, %o1 + 1))
    260	add		%o0, 2, %o0	
    261	srl		%g1, 1, %g1
    262	add		%o1, 2, %o1
    2631:	brz,a,pn	%g1, 2f		
    264	 andcc		%o2, 2, %g0
    265	EX_LD(LOAD(lduw, %o0, %o4))
    2665:	srl		%o4, 24, %g2
    267	srl		%o4, 16, %g3
    268	EX_ST(STORE(stb, %g2, %o1))
    269	srl		%o4, 8, %g2
    270	EX_ST(STORE(stb, %g3, %o1 + 1))
    271	add		%o0, 4, %o0
    272	EX_ST(STORE(stb, %g2, %o1 + 2))
    273	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
    274	EX_ST(STORE(stb, %o4, %o1 + 3))
    275	addc		GLOBAL_SPARE, %g0, GLOBAL_SPARE
    276	add		%o1, 4, %o1
    277	subcc		%g1, 1, %g1
    278	bne,a,pt	%icc, 5b
    279	 EX_LD(LOAD(lduw, %o0, %o4))
    280	sll		GLOBAL_SPARE, 16, %g2
    281	srl		GLOBAL_SPARE, 16, GLOBAL_SPARE
    282	srl		%g2, 16, %g2
    283	andcc		%o2, 2, %g0
    284	add		%g2, GLOBAL_SPARE, GLOBAL_SPARE 
    2852:	be,a,pt		%icc, 3f		
    286	 andcc		%o2, 1, %g0
    287	EX_LD(LOAD(lduh, %o0, %o4))
    288	andcc		%o2, 1, %g0
    289	srl		%o4, 8, %g2
    290	add		%o0, 2, %o0	
    291	EX_ST(STORE(stb, %g2, %o1))
    292	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
    293	EX_ST(STORE(stb, %o4, %o1 + 1))
    294	add		%o1, 2, %o1
    2953:	be,a,pt		%icc, 1f		
    296	 sll		GLOBAL_SPARE, 16, %o4
    297	EX_LD(LOAD(ldub, %o0, %g2))
    298	sll		%g2, 8, %o4	
    299	EX_ST(STORE(stb, %g2, %o1))
    300	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
    301	sll		GLOBAL_SPARE, 16, %o4
    3021:	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
    303	srl		GLOBAL_SPARE, 16, %o4
    304	addc		%g0, %o4, GLOBAL_SPARE
    305	brz,pt		%o5, 4f
    306	 srl		GLOBAL_SPARE, 8, %o4
    307	and		GLOBAL_SPARE, 0xff, %g2
    308	and		%o4, 0xff, %o4
    309	sll		%g2, 8, %g2
    310	or		%g2, %o4, GLOBAL_SPARE
    3114:	addcc		%o3, GLOBAL_SPARE, %o3
    312	addc		%g0, %o3, %o0
    313	retl
    314	 srl		%o0, 0, %o0
    315	.size		FUNC_NAME, .-FUNC_NAME