cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

checksum_32.S (6420B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * This file contains assembly-language implementations
      4 * of IP-style 1's complement checksum routines.
      5 *	
      6 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
      7 *
      8 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
      9 */
     10
     11#include <linux/sys.h>
     12#include <asm/processor.h>
     13#include <asm/cache.h>
     14#include <asm/errno.h>
     15#include <asm/ppc_asm.h>
     16#include <asm/export.h>
     17
     18	.text
     19
     20/*
     21 * computes the checksum of a memory block at buff, length len,
     22 * and adds in "sum" (32-bit)
     23 *
     24 * __csum_partial(buff, len, sum)
     25 */
     26_GLOBAL(__csum_partial)
     27	subi	r3,r3,4
     28	srawi.	r6,r4,2		/* Divide len by 4 and also clear carry */
     29	beq	3f		/* if we're doing < 4 bytes */
     30	andi.	r0,r3,2		/* Align buffer to longword boundary */
     31	beq+	1f
     32	lhz	r0,4(r3)	/* do 2 bytes to get aligned */
     33	subi	r4,r4,2
     34	addi	r3,r3,2
     35	srwi.	r6,r4,2		/* # words to do */
     36	adde	r5,r5,r0
     37	beq	3f
     381:	andi.	r6,r6,3		/* Prepare to handle words 4 by 4 */
     39	beq	21f
     40	mtctr	r6
     412:	lwzu	r0,4(r3)
     42	adde	r5,r5,r0
     43	bdnz	2b
     4421:	srwi.	r6,r4,4		/* # blocks of 4 words to do */
     45	beq	3f
     46	lwz	r0,4(r3)
     47	mtctr	r6
     48	lwz	r6,8(r3)
     49	adde	r5,r5,r0
     50	lwz	r7,12(r3)
     51	adde	r5,r5,r6
     52	lwzu	r8,16(r3)
     53	adde	r5,r5,r7
     54	bdz	23f
     5522:	lwz	r0,4(r3)
     56	adde	r5,r5,r8
     57	lwz	r6,8(r3)
     58	adde	r5,r5,r0
     59	lwz	r7,12(r3)
     60	adde	r5,r5,r6
     61	lwzu	r8,16(r3)
     62	adde	r5,r5,r7
     63	bdnz	22b
     6423:	adde	r5,r5,r8
     653:	andi.	r0,r4,2
     66	beq+	4f
     67	lhz	r0,4(r3)
     68	addi	r3,r3,2
     69	adde	r5,r5,r0
     704:	andi.	r0,r4,1
     71	beq+	5f
     72	lbz	r0,4(r3)
     73	slwi	r0,r0,8		/* Upper byte of word */
     74	adde	r5,r5,r0
     755:	addze	r3,r5		/* add in final carry */
     76	blr
     77EXPORT_SYMBOL(__csum_partial)
     78
     79/*
     80 * Computes the checksum of a memory block at src, length len,
     81 * and adds in 0xffffffff, while copying the block to dst.
     82 * If an access exception occurs it returns zero.
     83 *
     84 * csum_partial_copy_generic(src, dst, len)
     85 */
     86#define CSUM_COPY_16_BYTES_WITHEX(n)	\
     878 ## n ## 0:			\
     88	lwz	r7,4(r4);	\
     898 ## n ## 1:			\
     90	lwz	r8,8(r4);	\
     918 ## n ## 2:			\
     92	lwz	r9,12(r4);	\
     938 ## n ## 3:			\
     94	lwzu	r10,16(r4);	\
     958 ## n ## 4:			\
     96	stw	r7,4(r6);	\
     97	adde	r12,r12,r7;	\
     988 ## n ## 5:			\
     99	stw	r8,8(r6);	\
    100	adde	r12,r12,r8;	\
    1018 ## n ## 6:			\
    102	stw	r9,12(r6);	\
    103	adde	r12,r12,r9;	\
    1048 ## n ## 7:			\
    105	stwu	r10,16(r6);	\
    106	adde	r12,r12,r10
    107
    108#define CSUM_COPY_16_BYTES_EXCODE(n)		\
    109	EX_TABLE(8 ## n ## 0b, fault);	\
    110	EX_TABLE(8 ## n ## 1b, fault);	\
    111	EX_TABLE(8 ## n ## 2b, fault);	\
    112	EX_TABLE(8 ## n ## 3b, fault);	\
    113	EX_TABLE(8 ## n ## 4b, fault);	\
    114	EX_TABLE(8 ## n ## 5b, fault);	\
    115	EX_TABLE(8 ## n ## 6b, fault);	\
    116	EX_TABLE(8 ## n ## 7b, fault);
    117
    118	.text
    119
    120CACHELINE_BYTES = L1_CACHE_BYTES
    121LG_CACHELINE_BYTES = L1_CACHE_SHIFT
    122CACHELINE_MASK = (L1_CACHE_BYTES-1)
    123
    124_GLOBAL(csum_partial_copy_generic)
    125	li	r12,-1
    126	addic	r0,r0,0			/* clear carry */
    127	addi	r6,r4,-4
    128	neg	r0,r4
    129	addi	r4,r3,-4
    130	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
    131	crset	4*cr7+eq
    132	beq	58f
    133
    134	cmplw	0,r5,r0			/* is this more than total to do? */
    135	blt	63f			/* if not much to do */
    136	rlwinm	r7,r6,3,0x8
    137	rlwnm	r12,r12,r7,0,31	/* odd destination address: rotate one byte */
    138	cmplwi	cr7,r7,0	/* is destination address even ? */
    139	andi.	r8,r0,3			/* get it word-aligned first */
    140	mtctr	r8
    141	beq+	61f
    142	li	r3,0
    14370:	lbz	r9,4(r4)		/* do some bytes */
    144	addi	r4,r4,1
    145	slwi	r3,r3,8
    146	rlwimi	r3,r9,0,24,31
    14771:	stb	r9,4(r6)
    148	addi	r6,r6,1
    149	bdnz	70b
    150	adde	r12,r12,r3
    15161:	subf	r5,r0,r5
    152	srwi.	r0,r0,2
    153	mtctr	r0
    154	beq	58f
    15572:	lwzu	r9,4(r4)		/* do some words */
    156	adde	r12,r12,r9
    15773:	stwu	r9,4(r6)
    158	bdnz	72b
    159
    16058:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
    161	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
    162	li	r11,4
    163	beq	63f
    164
    165	/* Here we decide how far ahead to prefetch the source */
    166	li	r3,4
    167	cmpwi	r0,1
    168	li	r7,0
    169	ble	114f
    170	li	r7,1
    171#if MAX_COPY_PREFETCH > 1
    172	/* Heuristically, for large transfers we prefetch
    173	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
    174	   we prefetch 1 cacheline ahead. */
    175	cmpwi	r0,MAX_COPY_PREFETCH
    176	ble	112f
    177	li	r7,MAX_COPY_PREFETCH
    178112:	mtctr	r7
    179111:	dcbt	r3,r4
    180	addi	r3,r3,CACHELINE_BYTES
    181	bdnz	111b
    182#else
    183	dcbt	r3,r4
    184	addi	r3,r3,CACHELINE_BYTES
    185#endif /* MAX_COPY_PREFETCH > 1 */
    186
    187114:	subf	r8,r7,r0
    188	mr	r0,r7
    189	mtctr	r8
    190
    19153:	dcbt	r3,r4
    19254:	dcbz	r11,r6
    193/* the main body of the cacheline loop */
    194	CSUM_COPY_16_BYTES_WITHEX(0)
    195#if L1_CACHE_BYTES >= 32
    196	CSUM_COPY_16_BYTES_WITHEX(1)
    197#if L1_CACHE_BYTES >= 64
    198	CSUM_COPY_16_BYTES_WITHEX(2)
    199	CSUM_COPY_16_BYTES_WITHEX(3)
    200#if L1_CACHE_BYTES >= 128
    201	CSUM_COPY_16_BYTES_WITHEX(4)
    202	CSUM_COPY_16_BYTES_WITHEX(5)
    203	CSUM_COPY_16_BYTES_WITHEX(6)
    204	CSUM_COPY_16_BYTES_WITHEX(7)
    205#endif
    206#endif
    207#endif
    208	bdnz	53b
    209	cmpwi	r0,0
    210	li	r3,4
    211	li	r7,0
    212	bne	114b
    213
    21463:	srwi.	r0,r5,2
    215	mtctr	r0
    216	beq	64f
    21730:	lwzu	r0,4(r4)
    218	adde	r12,r12,r0
    21931:	stwu	r0,4(r6)
    220	bdnz	30b
    221
    22264:	andi.	r0,r5,2
    223	beq+	65f
    22440:	lhz	r0,4(r4)
    225	addi	r4,r4,2
    22641:	sth	r0,4(r6)
    227	adde	r12,r12,r0
    228	addi	r6,r6,2
    22965:	andi.	r0,r5,1
    230	beq+	66f
    23150:	lbz	r0,4(r4)
    23251:	stb	r0,4(r6)
    233	slwi	r0,r0,8
    234	adde	r12,r12,r0
    23566:	addze	r3,r12
    236	beqlr+	cr7
    237	rlwinm	r3,r3,8,0,31	/* odd destination address: rotate one byte */
    238	blr
    239
    240fault:
    241	li	r3,0
    242	blr
    243
    244	EX_TABLE(70b, fault);
    245	EX_TABLE(71b, fault);
    246	EX_TABLE(72b, fault);
    247	EX_TABLE(73b, fault);
    248	EX_TABLE(54b, fault);
    249
    250/*
    251 * this stuff handles faults in the cacheline loop and branches to either
    252 * fault (if in read part) or fault (if in write part)
    253 */
    254	CSUM_COPY_16_BYTES_EXCODE(0)
    255#if L1_CACHE_BYTES >= 32
    256	CSUM_COPY_16_BYTES_EXCODE(1)
    257#if L1_CACHE_BYTES >= 64
    258	CSUM_COPY_16_BYTES_EXCODE(2)
    259	CSUM_COPY_16_BYTES_EXCODE(3)
    260#if L1_CACHE_BYTES >= 128
    261	CSUM_COPY_16_BYTES_EXCODE(4)
    262	CSUM_COPY_16_BYTES_EXCODE(5)
    263	CSUM_COPY_16_BYTES_EXCODE(6)
    264	CSUM_COPY_16_BYTES_EXCODE(7)
    265#endif
    266#endif
    267#endif
    268
    269	EX_TABLE(30b, fault);
    270	EX_TABLE(31b, fault);
    271	EX_TABLE(40b, fault);
    272	EX_TABLE(41b, fault);
    273	EX_TABLE(50b, fault);
    274	EX_TABLE(51b, fault);
    275
    276EXPORT_SYMBOL(csum_partial_copy_generic)
    277
    278/*
    279 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
    280 *			   const struct in6_addr *daddr,
    281 *			   __u32 len, __u8 proto, __wsum sum)
    282 */
    283
    284_GLOBAL(csum_ipv6_magic)
    285	lwz	r8, 0(r3)
    286	lwz	r9, 4(r3)
    287	addc	r0, r7, r8
    288	lwz	r10, 8(r3)
    289	adde	r0, r0, r9
    290	lwz	r11, 12(r3)
    291	adde	r0, r0, r10
    292	lwz	r8, 0(r4)
    293	adde	r0, r0, r11
    294	lwz	r9, 4(r4)
    295	adde	r0, r0, r8
    296	lwz	r10, 8(r4)
    297	adde	r0, r0, r9
    298	lwz	r11, 12(r4)
    299	adde	r0, r0, r10
    300	add	r5, r5, r6	/* assumption: len + proto doesn't carry */
    301	adde	r0, r0, r11
    302	adde	r0, r0, r5
    303	addze	r0, r0
    304	rotlwi	r3, r0, 16
    305	add	r3, r0, r3
    306	not	r3, r3
    307	rlwinm	r3, r3, 16, 16, 31
    308	blr
    309EXPORT_SYMBOL(csum_ipv6_magic)