cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

csumpartialcopygeneric.S (6834B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 *  linux/arch/arm/lib/csumpartialcopygeneric.S
      4 *
      5 *  Copyright (C) 1995-2001 Russell King
      6 */
      7#include <asm/assembler.h>
      8
      9/*
     10 * unsigned int
     11 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
     12 *  r0 = src, r1 = dst, r2 = len, r3 = sum
     13 *  Returns : r0 = checksum
     14 *
     15 * Note that 'tst' and 'teq' preserve the carry flag.
     16 */
     17
     18src	.req	r0
     19dst	.req	r1
     20len	.req	r2
     21sum	.req	r3
     22
     23.Lzero:		mov	r0, sum
     24		load_regs
     25
     26		/*
     27		 * Align an unaligned destination pointer.  We know that
     28		 * we have >= 8 bytes here, so we don't need to check
     29		 * the length.  Note that the source pointer hasn't been
     30		 * aligned yet.
     31		 */
     32.Ldst_unaligned:
     33		tst	dst, #1
     34		beq	.Ldst_16bit
     35
     36		load1b	ip
     37		sub	len, len, #1
     38		adcs	sum, sum, ip, put_byte_1	@ update checksum
     39		strb	ip, [dst], #1
     40		tst	dst, #2
     41		reteq	lr			@ dst is now 32bit aligned
     42
     43.Ldst_16bit:	load2b	r8, ip
     44		sub	len, len, #2
     45		adcs	sum, sum, r8, put_byte_0
     46		strb	r8, [dst], #1
     47		adcs	sum, sum, ip, put_byte_1
     48		strb	ip, [dst], #1
     49		ret	lr			@ dst is now 32bit aligned
     50
     51		/*
     52		 * Handle 0 to 7 bytes, with any alignment of source and
     53		 * destination pointers.  Note that when we get here, C = 0
     54		 */
     55.Lless8:	teq	len, #0			@ check for zero count
     56		beq	.Lzero
     57
     58		/* we must have at least one byte. */
     59		tst	dst, #1			@ dst 16-bit aligned
     60		beq	.Lless8_aligned
     61
     62		/* Align dst */
     63		load1b	ip
     64		sub	len, len, #1
     65		adcs	sum, sum, ip, put_byte_1	@ update checksum
     66		strb	ip, [dst], #1
     67		tst	len, #6
     68		beq	.Lless8_byteonly
     69
     701:		load2b	r8, ip
     71		sub	len, len, #2
     72		adcs	sum, sum, r8, put_byte_0
     73		strb	r8, [dst], #1
     74		adcs	sum, sum, ip, put_byte_1
     75		strb	ip, [dst], #1
     76.Lless8_aligned:
     77		tst	len, #6
     78		bne	1b
     79.Lless8_byteonly:
     80		tst	len, #1
     81		beq	.Ldone
     82		load1b	r8
     83		adcs	sum, sum, r8, put_byte_0	@ update checksum
     84		strb	r8, [dst], #1
     85		b	.Ldone
     86
     87FN_ENTRY
     88		save_regs
     89		mov	sum, #-1
     90
     91		cmp	len, #8			@ Ensure that we have at least
     92		blo	.Lless8			@ 8 bytes to copy.
     93
     94		adds	sum, sum, #0		@ C = 0
     95		tst	dst, #3			@ Test destination alignment
     96		blne	.Ldst_unaligned		@ align destination, return here
     97
     98		/*
     99		 * Ok, the dst pointer is now 32bit aligned, and we know
    100		 * that we must have more than 4 bytes to copy.  Note
    101		 * that C contains the carry from the dst alignment above.
    102		 */
    103
    104		tst	src, #3			@ Test source alignment
    105		bne	.Lsrc_not_aligned
    106
    107		/* Routine for src & dst aligned */
    108
    109		bics	ip, len, #15
    110		beq	2f
    111
    1121:		load4l	r4, r5, r6, r7
    113		stmia	dst!, {r4, r5, r6, r7}
    114		adcs	sum, sum, r4
    115		adcs	sum, sum, r5
    116		adcs	sum, sum, r6
    117		adcs	sum, sum, r7
    118		sub	ip, ip, #16
    119		teq	ip, #0
    120		bne	1b
    121
    1222:		ands	ip, len, #12
    123		beq	4f
    124		tst	ip, #8
    125		beq	3f
    126		load2l	r4, r5
    127		stmia	dst!, {r4, r5}
    128		adcs	sum, sum, r4
    129		adcs	sum, sum, r5
    130		tst	ip, #4
    131		beq	4f
    132
    1333:		load1l	r4
    134		str	r4, [dst], #4
    135		adcs	sum, sum, r4
    136
    1374:		ands	len, len, #3
    138		beq	.Ldone
    139		load1l	r4
    140		tst	len, #2
    141		mov	r5, r4, get_byte_0
    142		beq	.Lexit
    143		adcs	sum, sum, r4, lspush #16
    144		strb	r5, [dst], #1
    145		mov	r5, r4, get_byte_1
    146		strb	r5, [dst], #1
    147		mov	r5, r4, get_byte_2
    148.Lexit:		tst	len, #1
    149		strbne	r5, [dst], #1
    150		andne	r5, r5, #255
    151		adcsne	sum, sum, r5, put_byte_0
    152
    153		/*
    154		 * If the dst pointer was not 16-bit aligned, we
    155		 * need to rotate the checksum here to get around
    156		 * the inefficient byte manipulations in the
    157		 * architecture independent code.
    158		 */
    159.Ldone:		adc	r0, sum, #0
    160		ldr	sum, [sp, #0]		@ dst
    161		tst	sum, #1
    162		movne	r0, r0, ror #8
    163		load_regs
    164
    165.Lsrc_not_aligned:
    166		adc	sum, sum, #0		@ include C from dst alignment
    167		and	ip, src, #3
    168		bic	src, src, #3
    169		load1l	r5
    170		cmp	ip, #2
    171		beq	.Lsrc2_aligned
    172		bhi	.Lsrc3_aligned
    173		mov	r4, r5, lspull #8		@ C = 0
    174		bics	ip, len, #15
    175		beq	2f
    1761:		load4l	r5, r6, r7, r8
    177		orr	r4, r4, r5, lspush #24
    178		mov	r5, r5, lspull #8
    179		orr	r5, r5, r6, lspush #24
    180		mov	r6, r6, lspull #8
    181		orr	r6, r6, r7, lspush #24
    182		mov	r7, r7, lspull #8
    183		orr	r7, r7, r8, lspush #24
    184		stmia	dst!, {r4, r5, r6, r7}
    185		adcs	sum, sum, r4
    186		adcs	sum, sum, r5
    187		adcs	sum, sum, r6
    188		adcs	sum, sum, r7
    189		mov	r4, r8, lspull #8
    190		sub	ip, ip, #16
    191		teq	ip, #0
    192		bne	1b
    1932:		ands	ip, len, #12
    194		beq	4f
    195		tst	ip, #8
    196		beq	3f
    197		load2l	r5, r6
    198		orr	r4, r4, r5, lspush #24
    199		mov	r5, r5, lspull #8
    200		orr	r5, r5, r6, lspush #24
    201		stmia	dst!, {r4, r5}
    202		adcs	sum, sum, r4
    203		adcs	sum, sum, r5
    204		mov	r4, r6, lspull #8
    205		tst	ip, #4
    206		beq	4f
    2073:		load1l	r5
    208		orr	r4, r4, r5, lspush #24
    209		str	r4, [dst], #4
    210		adcs	sum, sum, r4
    211		mov	r4, r5, lspull #8
    2124:		ands	len, len, #3
    213		beq	.Ldone
    214		mov	r5, r4, get_byte_0
    215		tst	len, #2
    216		beq	.Lexit
    217		adcs	sum, sum, r4, lspush #16
    218		strb	r5, [dst], #1
    219		mov	r5, r4, get_byte_1
    220		strb	r5, [dst], #1
    221		mov	r5, r4, get_byte_2
    222		b	.Lexit
    223
    224.Lsrc2_aligned:	mov	r4, r5, lspull #16
    225		adds	sum, sum, #0
    226		bics	ip, len, #15
    227		beq	2f
    2281:		load4l	r5, r6, r7, r8
    229		orr	r4, r4, r5, lspush #16
    230		mov	r5, r5, lspull #16
    231		orr	r5, r5, r6, lspush #16
    232		mov	r6, r6, lspull #16
    233		orr	r6, r6, r7, lspush #16
    234		mov	r7, r7, lspull #16
    235		orr	r7, r7, r8, lspush #16
    236		stmia	dst!, {r4, r5, r6, r7}
    237		adcs	sum, sum, r4
    238		adcs	sum, sum, r5
    239		adcs	sum, sum, r6
    240		adcs	sum, sum, r7
    241		mov	r4, r8, lspull #16
    242		sub	ip, ip, #16
    243		teq	ip, #0
    244		bne	1b
    2452:		ands	ip, len, #12
    246		beq	4f
    247		tst	ip, #8
    248		beq	3f
    249		load2l	r5, r6
    250		orr	r4, r4, r5, lspush #16
    251		mov	r5, r5, lspull #16
    252		orr	r5, r5, r6, lspush #16
    253		stmia	dst!, {r4, r5}
    254		adcs	sum, sum, r4
    255		adcs	sum, sum, r5
    256		mov	r4, r6, lspull #16
    257		tst	ip, #4
    258		beq	4f
    2593:		load1l	r5
    260		orr	r4, r4, r5, lspush #16
    261		str	r4, [dst], #4
    262		adcs	sum, sum, r4
    263		mov	r4, r5, lspull #16
    2644:		ands	len, len, #3
    265		beq	.Ldone
    266		mov	r5, r4, get_byte_0
    267		tst	len, #2
    268		beq	.Lexit
    269		adcs	sum, sum, r4
    270		strb	r5, [dst], #1
    271		mov	r5, r4, get_byte_1
    272		strb	r5, [dst], #1
    273		tst	len, #1
    274		beq	.Ldone
    275		load1b	r5
    276		b	.Lexit
    277
    278.Lsrc3_aligned:	mov	r4, r5, lspull #24
    279		adds	sum, sum, #0
    280		bics	ip, len, #15
    281		beq	2f
    2821:		load4l	r5, r6, r7, r8
    283		orr	r4, r4, r5, lspush #8
    284		mov	r5, r5, lspull #24
    285		orr	r5, r5, r6, lspush #8
    286		mov	r6, r6, lspull #24
    287		orr	r6, r6, r7, lspush #8
    288		mov	r7, r7, lspull #24
    289		orr	r7, r7, r8, lspush #8
    290		stmia	dst!, {r4, r5, r6, r7}
    291		adcs	sum, sum, r4
    292		adcs	sum, sum, r5
    293		adcs	sum, sum, r6
    294		adcs	sum, sum, r7
    295		mov	r4, r8, lspull #24
    296		sub	ip, ip, #16
    297		teq	ip, #0
    298		bne	1b
    2992:		ands	ip, len, #12
    300		beq	4f
    301		tst	ip, #8
    302		beq	3f
    303		load2l	r5, r6
    304		orr	r4, r4, r5, lspush #8
    305		mov	r5, r5, lspull #24
    306		orr	r5, r5, r6, lspush #8
    307		stmia	dst!, {r4, r5}
    308		adcs	sum, sum, r4
    309		adcs	sum, sum, r5
    310		mov	r4, r6, lspull #24
    311		tst	ip, #4
    312		beq	4f
    3133:		load1l	r5
    314		orr	r4, r4, r5, lspush #8
    315		str	r4, [dst], #4
    316		adcs	sum, sum, r4
    317		mov	r4, r5, lspull #24
    3184:		ands	len, len, #3
    319		beq	.Ldone
    320		mov	r5, r4, get_byte_0
    321		tst	len, #2
    322		beq	.Lexit
    323		strb	r5, [dst], #1
    324		adcs	sum, sum, r4
    325		load1l	r4
    326		mov	r5, r4, get_byte_0
    327		strb	r5, [dst], #1
    328		adcs	sum, sum, r4, lspush #24
    329		mov	r5, r4, get_byte_1
    330		b	.Lexit
    331FN_EXIT