cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

csum-copy_64.S (4335B)


      1/*
      2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
      3 *
      4 * This file is subject to the terms and conditions of the GNU General Public
      5 * License.  See the file COPYING in the main directory of this archive
      6 * for more details. No warranty for anything given at all.
      7 */
      8#include <linux/linkage.h>
      9#include <asm/errno.h>
     10#include <asm/asm.h>
     11
     12/*
     13 * Checksum copy with exception handling.
     14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
     15 * destination is zeroed.
     16 *
     17 * Input
     18 * rdi  source
     19 * rsi  destination
     20 * edx  len (32bit)
     21 *
     22 * Output
     23 * eax  64bit sum. undefined in case of exception.
     24 *
     25 * Wrappers need to take care of valid exception sum and zeroing.
     26 * They also should align source or destination to 8 bytes.
     27 */
     28
     29	.macro source
     3010:
     31	_ASM_EXTABLE_UA(10b, .Lfault)
     32	.endm
     33
     34	.macro dest
     3520:
     36	_ASM_EXTABLE_UA(20b, .Lfault)
     37	.endm
     38
     39SYM_FUNC_START(csum_partial_copy_generic)
     40	subq  $5*8, %rsp
     41	movq  %rbx, 0*8(%rsp)
     42	movq  %r12, 1*8(%rsp)
     43	movq  %r14, 2*8(%rsp)
     44	movq  %r13, 3*8(%rsp)
     45	movq  %r15, 4*8(%rsp)
     46
     47	movl  $-1, %eax
     48	xorl  %r9d, %r9d
     49	movl  %edx, %ecx
     50	cmpl  $8, %ecx
     51	jb    .Lshort
     52
     53	testb  $7, %sil
     54	jne   .Lunaligned
     55.Laligned:
     56	movl  %ecx, %r12d
     57
     58	shrq  $6, %r12
     59	jz	.Lhandle_tail       /* < 64 */
     60
     61	clc
     62
     63	/* main loop. clear in 64 byte blocks */
     64	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
     65	/* r11:	temp3, rdx: temp4, r12 loopcnt */
     66	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
     67	.p2align 4
     68.Lloop:
     69	source
     70	movq  (%rdi), %rbx
     71	source
     72	movq  8(%rdi), %r8
     73	source
     74	movq  16(%rdi), %r11
     75	source
     76	movq  24(%rdi), %rdx
     77
     78	source
     79	movq  32(%rdi), %r10
     80	source
     81	movq  40(%rdi), %r15
     82	source
     83	movq  48(%rdi), %r14
     84	source
     85	movq  56(%rdi), %r13
     86
     8730:
     88	/*
     89	 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
     90	 * potentially unmapped kernel address.
     91	 */
     92	_ASM_EXTABLE(30b, 2f)
     93	prefetcht0 5*64(%rdi)
     942:
     95	adcq  %rbx, %rax
     96	adcq  %r8, %rax
     97	adcq  %r11, %rax
     98	adcq  %rdx, %rax
     99	adcq  %r10, %rax
    100	adcq  %r15, %rax
    101	adcq  %r14, %rax
    102	adcq  %r13, %rax
    103
    104	decl %r12d
    105
    106	dest
    107	movq %rbx, (%rsi)
    108	dest
    109	movq %r8, 8(%rsi)
    110	dest
    111	movq %r11, 16(%rsi)
    112	dest
    113	movq %rdx, 24(%rsi)
    114
    115	dest
    116	movq %r10, 32(%rsi)
    117	dest
    118	movq %r15, 40(%rsi)
    119	dest
    120	movq %r14, 48(%rsi)
    121	dest
    122	movq %r13, 56(%rsi)
    123
    124	leaq 64(%rdi), %rdi
    125	leaq 64(%rsi), %rsi
    126
    127	jnz	.Lloop
    128
    129	adcq  %r9, %rax
    130
    131	/* do last up to 56 bytes */
    132.Lhandle_tail:
    133	/* ecx:	count, rcx.63: the end result needs to be rol8 */
    134	movq %rcx, %r10
    135	andl $63, %ecx
    136	shrl $3, %ecx
    137	jz	.Lfold
    138	clc
    139	.p2align 4
    140.Lloop_8:
    141	source
    142	movq (%rdi), %rbx
    143	adcq %rbx, %rax
    144	decl %ecx
    145	dest
    146	movq %rbx, (%rsi)
    147	leaq 8(%rsi), %rsi /* preserve carry */
    148	leaq 8(%rdi), %rdi
    149	jnz	.Lloop_8
    150	adcq %r9, %rax	/* add in carry */
    151
    152.Lfold:
    153	/* reduce checksum to 32bits */
    154	movl %eax, %ebx
    155	shrq $32, %rax
    156	addl %ebx, %eax
    157	adcl %r9d, %eax
    158
    159	/* do last up to 6 bytes */
    160.Lhandle_7:
    161	movl %r10d, %ecx
    162	andl $7, %ecx
    163.L1:				/* .Lshort rejoins the common path here */
    164	shrl $1, %ecx
    165	jz   .Lhandle_1
    166	movl $2, %edx
    167	xorl %ebx, %ebx
    168	clc
    169	.p2align 4
    170.Lloop_1:
    171	source
    172	movw (%rdi), %bx
    173	adcl %ebx, %eax
    174	decl %ecx
    175	dest
    176	movw %bx, (%rsi)
    177	leaq 2(%rdi), %rdi
    178	leaq 2(%rsi), %rsi
    179	jnz .Lloop_1
    180	adcl %r9d, %eax	/* add in carry */
    181
    182	/* handle last odd byte */
    183.Lhandle_1:
    184	testb $1, %r10b
    185	jz    .Lende
    186	xorl  %ebx, %ebx
    187	source
    188	movb (%rdi), %bl
    189	dest
    190	movb %bl, (%rsi)
    191	addl %ebx, %eax
    192	adcl %r9d, %eax		/* carry */
    193
    194.Lende:
    195	testq %r10, %r10
    196	js  .Lwas_odd
    197.Lout:
    198	movq 0*8(%rsp), %rbx
    199	movq 1*8(%rsp), %r12
    200	movq 2*8(%rsp), %r14
    201	movq 3*8(%rsp), %r13
    202	movq 4*8(%rsp), %r15
    203	addq $5*8, %rsp
    204	RET
    205.Lshort:
    206	movl %ecx, %r10d
    207	jmp  .L1
    208.Lunaligned:
    209	xorl %ebx, %ebx
    210	testb $1, %sil
    211	jne  .Lodd
    2121:	testb $2, %sil
    213	je   2f
    214	source
    215	movw (%rdi), %bx
    216	dest
    217	movw %bx, (%rsi)
    218	leaq 2(%rdi), %rdi
    219	subq $2, %rcx
    220	leaq 2(%rsi), %rsi
    221	addq %rbx, %rax
    2222:	testb $4, %sil
    223	je .Laligned
    224	source
    225	movl (%rdi), %ebx
    226	dest
    227	movl %ebx, (%rsi)
    228	leaq 4(%rdi), %rdi
    229	subq $4, %rcx
    230	leaq 4(%rsi), %rsi
    231	addq %rbx, %rax
    232	jmp .Laligned
    233
    234.Lodd:
    235	source
    236	movb (%rdi), %bl
    237	dest
    238	movb %bl, (%rsi)
    239	leaq 1(%rdi), %rdi
    240	leaq 1(%rsi), %rsi
    241	/* decrement, set MSB */
    242	leaq -1(%rcx, %rcx), %rcx
    243	rorq $1, %rcx
    244	shll $8, %ebx
    245	addq %rbx, %rax
    246	jmp 1b
    247
    248.Lwas_odd:
    249	roll $8, %eax
    250	jmp .Lout
    251
    252	/* Exception: just return 0 */
    253.Lfault:
    254	xorl %eax, %eax
    255	jmp  .Lout
    256SYM_FUNC_END(csum_partial_copy_generic)