checksum_32.S - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
checksum_32.S (9059B)
      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * INET		An implementation of the TCP/IP protocol suite for the LINUX
      4 *		operating system.  INET is implemented using the  BSD Socket
      5 *		interface as the means of communication with the user level.
      6 *
      7 *		IP/TCP/UDP checksumming routines
      8 *
      9 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
     10 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
     11 *		Tom May, <ftom@netcom.com>
     12 *              Pentium Pro/II routines:
     13 *              Alexander Kjeldaas <astor@guardian.no>
     14 *              Finn Arne Gangstad <finnag@guardian.no>
     15 *		Lots of code moved from tcp.c and ip.c; see those files
     16 *		for more names.
     17 *
     18 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
     19 *			     handling.
     20 *		Andi Kleen,  add zeroing on error
     21 *                   converted to pure assembler
     22 */
     23
     24#include <linux/linkage.h>
     25#include <asm/errno.h>
     26#include <asm/asm.h>
     27#include <asm/export.h>
     28#include <asm/nospec-branch.h>
     29
     30/*
     31 * computes a partial checksum, e.g. for TCP/UDP fragments
     32 */
     33
     34/*	
     35unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
     36 */
     37		
     38.text
     39		
     40#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
     41
     42	  /*		
     43	   * Experiments with Ethernet and SLIP connections show that buff
     44	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
     45	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
     46	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
     47	   * alignment for the unrolled loop.
     48	   */		
     49SYM_FUNC_START(csum_partial)
     50	pushl %esi
     51	pushl %ebx
     52	movl 20(%esp),%eax	# Function arg: unsigned int sum
     53	movl 16(%esp),%ecx	# Function arg: int len
     54	movl 12(%esp),%esi	# Function arg: unsigned char *buff
     55	testl $3, %esi		# Check alignment.
     56	jz 2f			# Jump if alignment is ok.
     57	testl $1, %esi		# Check alignment.
     58	jz 10f			# Jump if alignment is boundary of 2 bytes.
     59
     60	# buf is odd
     61	dec %ecx
     62	jl 8f
     63	movzbl (%esi), %ebx
     64	adcl %ebx, %eax
     65	roll $8, %eax
     66	inc %esi
     67	testl $2, %esi
     68	jz 2f
     6910:
     70	subl $2, %ecx		# Alignment uses up two bytes.
     71	jae 1f			# Jump if we had at least two bytes.
     72	addl $2, %ecx		# ecx was < 2.  Deal with it.
     73	jmp 4f
     741:	movw (%esi), %bx
     75	addl $2, %esi
     76	addw %bx, %ax
     77	adcl $0, %eax
     782:
     79	movl %ecx, %edx
     80	shrl $5, %ecx
     81	jz 2f
     82	testl %esi, %esi
     831:	movl (%esi), %ebx
     84	adcl %ebx, %eax
     85	movl 4(%esi), %ebx
     86	adcl %ebx, %eax
     87	movl 8(%esi), %ebx
     88	adcl %ebx, %eax
     89	movl 12(%esi), %ebx
     90	adcl %ebx, %eax
     91	movl 16(%esi), %ebx
     92	adcl %ebx, %eax
     93	movl 20(%esi), %ebx
     94	adcl %ebx, %eax
     95	movl 24(%esi), %ebx
     96	adcl %ebx, %eax
     97	movl 28(%esi), %ebx
     98	adcl %ebx, %eax
     99	lea 32(%esi), %esi
    100	dec %ecx
    101	jne 1b
    102	adcl $0, %eax
    1032:	movl %edx, %ecx
    104	andl $0x1c, %edx
    105	je 4f
    106	shrl $2, %edx		# This clears CF
    1073:	adcl (%esi), %eax
    108	lea 4(%esi), %esi
    109	dec %edx
    110	jne 3b
    111	adcl $0, %eax
    1124:	andl $3, %ecx
    113	jz 7f
    114	cmpl $2, %ecx
    115	jb 5f
    116	movw (%esi),%cx
    117	leal 2(%esi),%esi
    118	je 6f
    119	shll $16,%ecx
    1205:	movb (%esi),%cl
    1216:	addl %ecx,%eax
    122	adcl $0, %eax 
    1237:	
    124	testb $1, 12(%esp)
    125	jz 8f
    126	roll $8, %eax
    1278:
    128	popl %ebx
    129	popl %esi
    130	RET
    131SYM_FUNC_END(csum_partial)
    132
    133#else
    134
    135/* Version for PentiumII/PPro */
    136
    137SYM_FUNC_START(csum_partial)
    138	pushl %esi
    139	pushl %ebx
    140	movl 20(%esp),%eax	# Function arg: unsigned int sum
    141	movl 16(%esp),%ecx	# Function arg: int len
    142	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
    143
    144	testl $3, %esi         
    145	jnz 25f                 
    14610:
    147	movl %ecx, %edx
    148	movl %ecx, %ebx
    149	andl $0x7c, %ebx
    150	shrl $7, %ecx
    151	addl %ebx,%esi
    152	shrl $2, %ebx  
    153	negl %ebx
    154	lea 45f(%ebx,%ebx,2), %ebx
    155	testl %esi, %esi
    156	JMP_NOSPEC ebx
    157
    158	# Handle 2-byte-aligned regions
    15920:	addw (%esi), %ax
    160	lea 2(%esi), %esi
    161	adcl $0, %eax
    162	jmp 10b
    16325:
    164	testl $1, %esi         
    165	jz 30f                 
    166	# buf is odd
    167	dec %ecx
    168	jl 90f
    169	movzbl (%esi), %ebx
    170	addl %ebx, %eax
    171	adcl $0, %eax
    172	roll $8, %eax
    173	inc %esi
    174	testl $2, %esi
    175	jz 10b
    176
    17730:	subl $2, %ecx          
    178	ja 20b                 
    179	je 32f
    180	addl $2, %ecx
    181	jz 80f
    182	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
    183	addl %ebx, %eax
    184	adcl $0, %eax
    185	jmp 80f
    18632:
    187	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
    188	adcl $0, %eax
    189	jmp 80f
    190
    19140: 
    192	addl -128(%esi), %eax
    193	adcl -124(%esi), %eax
    194	adcl -120(%esi), %eax
    195	adcl -116(%esi), %eax   
    196	adcl -112(%esi), %eax   
    197	adcl -108(%esi), %eax
    198	adcl -104(%esi), %eax
    199	adcl -100(%esi), %eax
    200	adcl -96(%esi), %eax
    201	adcl -92(%esi), %eax
    202	adcl -88(%esi), %eax
    203	adcl -84(%esi), %eax
    204	adcl -80(%esi), %eax
    205	adcl -76(%esi), %eax
    206	adcl -72(%esi), %eax
    207	adcl -68(%esi), %eax
    208	adcl -64(%esi), %eax     
    209	adcl -60(%esi), %eax     
    210	adcl -56(%esi), %eax     
    211	adcl -52(%esi), %eax   
    212	adcl -48(%esi), %eax   
    213	adcl -44(%esi), %eax
    214	adcl -40(%esi), %eax
    215	adcl -36(%esi), %eax
    216	adcl -32(%esi), %eax
    217	adcl -28(%esi), %eax
    218	adcl -24(%esi), %eax
    219	adcl -20(%esi), %eax
    220	adcl -16(%esi), %eax
    221	adcl -12(%esi), %eax
    222	adcl -8(%esi), %eax
    223	adcl -4(%esi), %eax
    22445:
    225	lea 128(%esi), %esi
    226	adcl $0, %eax
    227	dec %ecx
    228	jge 40b
    229	movl %edx, %ecx
    23050:	andl $3, %ecx
    231	jz 80f
    232
    233	# Handle the last 1-3 bytes without jumping
    234	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
    235	movl $0xffffff,%ebx	# by the shll and shrl instructions
    236	shll $3,%ecx
    237	shrl %cl,%ebx
    238	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
    239	addl %ebx,%eax
    240	adcl $0,%eax
    24180: 
    242	testb $1, 12(%esp)
    243	jz 90f
    244	roll $8, %eax
    24590: 
    246	popl %ebx
    247	popl %esi
    248	RET
    249SYM_FUNC_END(csum_partial)
    250				
    251#endif
    252EXPORT_SYMBOL(csum_partial)
    253
    254/*
    255unsigned int csum_partial_copy_generic (const char *src, char *dst,
    256				  int len)
    257 */ 
    258
    259/*
    260 * Copy from ds while checksumming, otherwise like csum_partial
    261 */
    262
    263#define EXC(y...)						\
    264	9999: y;						\
    265	_ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX)
    266
    267#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
    268
    269#define ARGBASE 16		
    270#define FP		12
    271		
    272SYM_FUNC_START(csum_partial_copy_generic)
    273	subl  $4,%esp	
    274	pushl %edi
    275	pushl %esi
    276	pushl %ebx
    277	movl ARGBASE+12(%esp),%ecx	# len
    278	movl ARGBASE+4(%esp),%esi	# src
    279	movl ARGBASE+8(%esp),%edi	# dst
    280
    281	movl $-1, %eax			# sum
    282	testl $2, %edi			# Check alignment. 
    283	jz 2f				# Jump if alignment is ok.
    284	subl $2, %ecx			# Alignment uses up two bytes.
    285	jae 1f				# Jump if we had at least two bytes.
    286	addl $2, %ecx			# ecx was < 2.  Deal with it.
    287	jmp 4f
    288EXC(1:	movw (%esi), %bx	)
    289	addl $2, %esi
    290EXC(	movw %bx, (%edi)	)
    291	addl $2, %edi
    292	addw %bx, %ax	
    293	adcl $0, %eax
    2942:
    295	movl %ecx, FP(%esp)
    296	shrl $5, %ecx
    297	jz 2f
    298	testl %esi, %esi		# what's wrong with clc?
    299EXC(1:	movl (%esi), %ebx	)
    300EXC(	movl 4(%esi), %edx	)
    301	adcl %ebx, %eax
    302EXC(	movl %ebx, (%edi)	)
    303	adcl %edx, %eax
    304EXC(	movl %edx, 4(%edi)	)
    305
    306EXC(	movl 8(%esi), %ebx	)
    307EXC(	movl 12(%esi), %edx	)
    308	adcl %ebx, %eax
    309EXC(	movl %ebx, 8(%edi)	)
    310	adcl %edx, %eax
    311EXC(	movl %edx, 12(%edi)	)
    312
    313EXC(	movl 16(%esi), %ebx 	)
    314EXC(	movl 20(%esi), %edx	)
    315	adcl %ebx, %eax
    316EXC(	movl %ebx, 16(%edi)	)
    317	adcl %edx, %eax
    318EXC(	movl %edx, 20(%edi)	)
    319
    320EXC(	movl 24(%esi), %ebx	)
    321EXC(	movl 28(%esi), %edx	)
    322	adcl %ebx, %eax
    323EXC(	movl %ebx, 24(%edi)	)
    324	adcl %edx, %eax
    325EXC(	movl %edx, 28(%edi)	)
    326
    327	lea 32(%esi), %esi
    328	lea 32(%edi), %edi
    329	dec %ecx
    330	jne 1b
    331	adcl $0, %eax
    3322:	movl FP(%esp), %edx
    333	movl %edx, %ecx
    334	andl $0x1c, %edx
    335	je 4f
    336	shrl $2, %edx			# This clears CF
    337EXC(3:	movl (%esi), %ebx	)
    338	adcl %ebx, %eax
    339EXC(	movl %ebx, (%edi)	)
    340	lea 4(%esi), %esi
    341	lea 4(%edi), %edi
    342	dec %edx
    343	jne 3b
    344	adcl $0, %eax
    3454:	andl $3, %ecx
    346	jz 7f
    347	cmpl $2, %ecx
    348	jb 5f
    349EXC(	movw (%esi), %cx	)
    350	leal 2(%esi), %esi
    351EXC(	movw %cx, (%edi)	)
    352	leal 2(%edi), %edi
    353	je 6f
    354	shll $16,%ecx
    355EXC(5:	movb (%esi), %cl	)
    356EXC(	movb %cl, (%edi)	)
    3576:	addl %ecx, %eax
    358	adcl $0, %eax
    3597:
    360
    361	popl %ebx
    362	popl %esi
    363	popl %edi
    364	popl %ecx			# equivalent to addl $4,%esp
    365	RET
    366SYM_FUNC_END(csum_partial_copy_generic)
    367
    368#else
    369
    370/* Version for PentiumII/PPro */
    371
    372#define ROUND1(x) \
    373	EXC(movl x(%esi), %ebx	)	;	\
    374	addl %ebx, %eax			;	\
    375	EXC(movl %ebx, x(%edi)	)	;
    376
    377#define ROUND(x) \
    378	EXC(movl x(%esi), %ebx	)	;	\
    379	adcl %ebx, %eax			;	\
    380	EXC(movl %ebx, x(%edi)	)	;
    381
    382#define ARGBASE 12
    383		
    384SYM_FUNC_START(csum_partial_copy_generic)
    385	pushl %ebx
    386	pushl %edi
    387	pushl %esi
    388	movl ARGBASE+4(%esp),%esi	#src
    389	movl ARGBASE+8(%esp),%edi	#dst	
    390	movl ARGBASE+12(%esp),%ecx	#len
    391	movl $-1, %eax			#sum
    392#	movl %ecx, %edx  
    393	movl %ecx, %ebx  
    394	movl %esi, %edx
    395	shrl $6, %ecx     
    396	andl $0x3c, %ebx  
    397	negl %ebx
    398	subl %ebx, %esi  
    399	subl %ebx, %edi  
    400	lea  -1(%esi),%edx
    401	andl $-32,%edx
    402	lea 3f(%ebx,%ebx), %ebx
    403	testl %esi, %esi 
    404	JMP_NOSPEC ebx
    4051:	addl $64,%esi
    406	addl $64,%edi 
    407	EXC(movb -32(%edx),%bl)	; EXC(movb (%edx),%bl)
    408	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)	
    409	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)	
    410	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)	
    411	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)	
    4123:	adcl $0,%eax
    413	addl $64, %edx
    414	dec %ecx
    415	jge 1b
    4164:	movl ARGBASE+12(%esp),%edx	#len
    417	andl $3, %edx
    418	jz 7f
    419	cmpl $2, %edx
    420	jb 5f
    421EXC(	movw (%esi), %dx         )
    422	leal 2(%esi), %esi
    423EXC(	movw %dx, (%edi)         )
    424	leal 2(%edi), %edi
    425	je 6f
    426	shll $16,%edx
    4275:
    428EXC(	movb (%esi), %dl         )
    429EXC(	movb %dl, (%edi)         )
    4306:	addl %edx, %eax
    431	adcl $0, %eax
    4327:
    433
    434	popl %esi
    435	popl %edi
    436	popl %ebx
    437	RET
    438SYM_FUNC_END(csum_partial_copy_generic)
    439				
    440#undef ROUND
    441#undef ROUND1		
    442		
    443#endif
    444EXPORT_SYMBOL(csum_partial_copy_generic)