checksum_32.S - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
checksum_32.S (4692B)
      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * INET		An implementation of the TCP/IP protocol suite for the LINUX
      4 *		operating system.  INET is implemented using the  BSD Socket
      5 *		interface as the means of communication with the user level.
      6 *
      7 *		IP/TCP/UDP checksumming routines
      8 *
      9 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
     10 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
     11 *		Tom May, <ftom@netcom.com>
     12 *              Pentium Pro/II routines:
     13 *              Alexander Kjeldaas <astor@guardian.no>
     14 *              Finn Arne Gangstad <finnag@guardian.no>
     15 *		Lots of code moved from tcp.c and ip.c; see those files
     16 *		for more names.
     17 *
     18 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
     19 *			     handling.
     20 *		Andi Kleen,  add zeroing on error
     21 *                   converted to pure assembler
     22 */
     23
     24#include <asm/errno.h>
     25#include <asm/asm.h>
     26#include <asm/export.h>
     27				
     28/*
     29 * computes a partial checksum, e.g. for TCP/UDP fragments
     30 */
     31
     32/*	
     33unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
     34 */
     35		
     36.text
     37.align 4
     38.globl csum_partial
     39		
     40#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
     41
     42	  /*		
     43	   * Experiments with Ethernet and SLIP connections show that buff
     44	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
     45	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
     46	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
     47	   * alignment for the unrolled loop.
     48	   */		
     49csum_partial:
     50	pushl %esi
     51	pushl %ebx
     52	movl 20(%esp),%eax	# Function arg: unsigned int sum
     53	movl 16(%esp),%ecx	# Function arg: int len
     54	movl 12(%esp),%esi	# Function arg: unsigned char *buff
     55	testl $2, %esi		# Check alignment.
     56	jz 2f			# Jump if alignment is ok.
     57	subl $2, %ecx		# Alignment uses up two bytes.
     58	jae 1f			# Jump if we had at least two bytes.
     59	addl $2, %ecx		# ecx was < 2.  Deal with it.
     60	jmp 4f
     611:	movw (%esi), %bx
     62	addl $2, %esi
     63	addw %bx, %ax
     64	adcl $0, %eax
     652:
     66	movl %ecx, %edx
     67	shrl $5, %ecx
     68	jz 2f
     69	testl %esi, %esi
     701:	movl (%esi), %ebx
     71	adcl %ebx, %eax
     72	movl 4(%esi), %ebx
     73	adcl %ebx, %eax
     74	movl 8(%esi), %ebx
     75	adcl %ebx, %eax
     76	movl 12(%esi), %ebx
     77	adcl %ebx, %eax
     78	movl 16(%esi), %ebx
     79	adcl %ebx, %eax
     80	movl 20(%esi), %ebx
     81	adcl %ebx, %eax
     82	movl 24(%esi), %ebx
     83	adcl %ebx, %eax
     84	movl 28(%esi), %ebx
     85	adcl %ebx, %eax
     86	lea 32(%esi), %esi
     87	dec %ecx
     88	jne 1b
     89	adcl $0, %eax
     902:	movl %edx, %ecx
     91	andl $0x1c, %edx
     92	je 4f
     93	shrl $2, %edx		# This clears CF
     943:	adcl (%esi), %eax
     95	lea 4(%esi), %esi
     96	dec %edx
     97	jne 3b
     98	adcl $0, %eax
     994:	andl $3, %ecx
    100	jz 7f
    101	cmpl $2, %ecx
    102	jb 5f
    103	movw (%esi),%cx
    104	leal 2(%esi),%esi
    105	je 6f
    106	shll $16,%ecx
    1075:	movb (%esi),%cl
    1086:	addl %ecx,%eax
    109	adcl $0, %eax 
    1107:	
    111	popl %ebx
    112	popl %esi
    113	RET
    114
    115#else
    116
    117/* Version for PentiumII/PPro */
    118
    119csum_partial:
    120	pushl %esi
    121	pushl %ebx
    122	movl 20(%esp),%eax	# Function arg: unsigned int sum
    123	movl 16(%esp),%ecx	# Function arg: int len
    124	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
    125
    126	testl $2, %esi         
    127	jnz 30f                 
    12810:
    129	movl %ecx, %edx
    130	movl %ecx, %ebx
    131	andl $0x7c, %ebx
    132	shrl $7, %ecx
    133	addl %ebx,%esi
    134	shrl $2, %ebx  
    135	negl %ebx
    136	lea 45f(%ebx,%ebx,2), %ebx
    137	testl %esi, %esi
    138	jmp *%ebx
    139
    140	# Handle 2-byte-aligned regions
    14120:	addw (%esi), %ax
    142	lea 2(%esi), %esi
    143	adcl $0, %eax
    144	jmp 10b
    145
    14630:	subl $2, %ecx          
    147	ja 20b                 
    148	je 32f
    149	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
    150	addl %ebx, %eax
    151	adcl $0, %eax
    152	jmp 80f
    15332:
    154	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
    155	adcl $0, %eax
    156	jmp 80f
    157
    15840: 
    159	addl -128(%esi), %eax
    160	adcl -124(%esi), %eax
    161	adcl -120(%esi), %eax
    162	adcl -116(%esi), %eax   
    163	adcl -112(%esi), %eax   
    164	adcl -108(%esi), %eax
    165	adcl -104(%esi), %eax
    166	adcl -100(%esi), %eax
    167	adcl -96(%esi), %eax
    168	adcl -92(%esi), %eax
    169	adcl -88(%esi), %eax
    170	adcl -84(%esi), %eax
    171	adcl -80(%esi), %eax
    172	adcl -76(%esi), %eax
    173	adcl -72(%esi), %eax
    174	adcl -68(%esi), %eax
    175	adcl -64(%esi), %eax     
    176	adcl -60(%esi), %eax     
    177	adcl -56(%esi), %eax     
    178	adcl -52(%esi), %eax   
    179	adcl -48(%esi), %eax   
    180	adcl -44(%esi), %eax
    181	adcl -40(%esi), %eax
    182	adcl -36(%esi), %eax
    183	adcl -32(%esi), %eax
    184	adcl -28(%esi), %eax
    185	adcl -24(%esi), %eax
    186	adcl -20(%esi), %eax
    187	adcl -16(%esi), %eax
    188	adcl -12(%esi), %eax
    189	adcl -8(%esi), %eax
    190	adcl -4(%esi), %eax
    19145:
    192	lea 128(%esi), %esi
    193	adcl $0, %eax
    194	dec %ecx
    195	jge 40b
    196	movl %edx, %ecx
    19750:	andl $3, %ecx
    198	jz 80f
    199
    200	# Handle the last 1-3 bytes without jumping
    201	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
    202	movl $0xffffff,%ebx	# by the shll and shrl instructions
    203	shll $3,%ecx
    204	shrl %cl,%ebx
    205	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
    206	addl %ebx,%eax
    207	adcl $0,%eax
    20880: 
    209	popl %ebx
    210	popl %esi
    211	RET
    212				
    213#endif
    214	EXPORT_SYMBOL(csum_partial)