csum-copy_64.S (4335B)
1/* 2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs. 3 * 4 * This file is subject to the terms and conditions of the GNU General Public 5 * License. See the file COPYING in the main directory of this archive 6 * for more details. No warranty for anything given at all. 7 */ 8#include <linux/linkage.h> 9#include <asm/errno.h> 10#include <asm/asm.h> 11 12/* 13 * Checksum copy with exception handling. 14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 15 * destination is zeroed. 16 * 17 * Input 18 * rdi source 19 * rsi destination 20 * edx len (32bit) 21 * 22 * Output 23 * eax 64bit sum. undefined in case of exception. 24 * 25 * Wrappers need to take care of valid exception sum and zeroing. 26 * They also should align source or destination to 8 bytes. 27 */ 28 29 .macro source 3010: 31 _ASM_EXTABLE_UA(10b, .Lfault) 32 .endm 33 34 .macro dest 3520: 36 _ASM_EXTABLE_UA(20b, .Lfault) 37 .endm 38 39SYM_FUNC_START(csum_partial_copy_generic) 40 subq $5*8, %rsp 41 movq %rbx, 0*8(%rsp) 42 movq %r12, 1*8(%rsp) 43 movq %r14, 2*8(%rsp) 44 movq %r13, 3*8(%rsp) 45 movq %r15, 4*8(%rsp) 46 47 movl $-1, %eax 48 xorl %r9d, %r9d 49 movl %edx, %ecx 50 cmpl $8, %ecx 51 jb .Lshort 52 53 testb $7, %sil 54 jne .Lunaligned 55.Laligned: 56 movl %ecx, %r12d 57 58 shrq $6, %r12 59 jz .Lhandle_tail /* < 64 */ 60 61 clc 62 63 /* main loop. clear in 64 byte blocks */ 64 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ 65 /* r11: temp3, rdx: temp4, r12 loopcnt */ 66 /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ 67 .p2align 4 68.Lloop: 69 source 70 movq (%rdi), %rbx 71 source 72 movq 8(%rdi), %r8 73 source 74 movq 16(%rdi), %r11 75 source 76 movq 24(%rdi), %rdx 77 78 source 79 movq 32(%rdi), %r10 80 source 81 movq 40(%rdi), %r15 82 source 83 movq 48(%rdi), %r14 84 source 85 movq 56(%rdi), %r13 86 8730: 88 /* 89 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a 90 * potentially unmapped kernel address. 91 */ 92 _ASM_EXTABLE(30b, 2f) 93 prefetcht0 5*64(%rdi) 942: 95 adcq %rbx, %rax 96 adcq %r8, %rax 97 adcq %r11, %rax 98 adcq %rdx, %rax 99 adcq %r10, %rax 100 adcq %r15, %rax 101 adcq %r14, %rax 102 adcq %r13, %rax 103 104 decl %r12d 105 106 dest 107 movq %rbx, (%rsi) 108 dest 109 movq %r8, 8(%rsi) 110 dest 111 movq %r11, 16(%rsi) 112 dest 113 movq %rdx, 24(%rsi) 114 115 dest 116 movq %r10, 32(%rsi) 117 dest 118 movq %r15, 40(%rsi) 119 dest 120 movq %r14, 48(%rsi) 121 dest 122 movq %r13, 56(%rsi) 123 124 leaq 64(%rdi), %rdi 125 leaq 64(%rsi), %rsi 126 127 jnz .Lloop 128 129 adcq %r9, %rax 130 131 /* do last up to 56 bytes */ 132.Lhandle_tail: 133 /* ecx: count, rcx.63: the end result needs to be rol8 */ 134 movq %rcx, %r10 135 andl $63, %ecx 136 shrl $3, %ecx 137 jz .Lfold 138 clc 139 .p2align 4 140.Lloop_8: 141 source 142 movq (%rdi), %rbx 143 adcq %rbx, %rax 144 decl %ecx 145 dest 146 movq %rbx, (%rsi) 147 leaq 8(%rsi), %rsi /* preserve carry */ 148 leaq 8(%rdi), %rdi 149 jnz .Lloop_8 150 adcq %r9, %rax /* add in carry */ 151 152.Lfold: 153 /* reduce checksum to 32bits */ 154 movl %eax, %ebx 155 shrq $32, %rax 156 addl %ebx, %eax 157 adcl %r9d, %eax 158 159 /* do last up to 6 bytes */ 160.Lhandle_7: 161 movl %r10d, %ecx 162 andl $7, %ecx 163.L1: /* .Lshort rejoins the common path here */ 164 shrl $1, %ecx 165 jz .Lhandle_1 166 movl $2, %edx 167 xorl %ebx, %ebx 168 clc 169 .p2align 4 170.Lloop_1: 171 source 172 movw (%rdi), %bx 173 adcl %ebx, %eax 174 decl %ecx 175 dest 176 movw %bx, (%rsi) 177 leaq 2(%rdi), %rdi 178 leaq 2(%rsi), %rsi 179 jnz .Lloop_1 180 adcl %r9d, %eax /* add in carry */ 181 182 /* handle last odd byte */ 183.Lhandle_1: 184 testb $1, %r10b 185 jz .Lende 186 xorl %ebx, %ebx 187 source 188 movb (%rdi), %bl 189 dest 190 movb %bl, (%rsi) 191 addl %ebx, %eax 192 adcl %r9d, %eax /* carry */ 193 194.Lende: 195 testq %r10, %r10 196 js .Lwas_odd 197.Lout: 198 movq 0*8(%rsp), %rbx 199 movq 1*8(%rsp), %r12 200 movq 2*8(%rsp), %r14 201 movq 3*8(%rsp), %r13 202 movq 4*8(%rsp), %r15 203 addq $5*8, %rsp 204 RET 205.Lshort: 206 movl %ecx, %r10d 207 jmp .L1 208.Lunaligned: 209 xorl %ebx, %ebx 210 testb $1, %sil 211 jne .Lodd 2121: testb $2, %sil 213 je 2f 214 source 215 movw (%rdi), %bx 216 dest 217 movw %bx, (%rsi) 218 leaq 2(%rdi), %rdi 219 subq $2, %rcx 220 leaq 2(%rsi), %rsi 221 addq %rbx, %rax 2222: testb $4, %sil 223 je .Laligned 224 source 225 movl (%rdi), %ebx 226 dest 227 movl %ebx, (%rsi) 228 leaq 4(%rdi), %rdi 229 subq $4, %rcx 230 leaq 4(%rsi), %rsi 231 addq %rbx, %rax 232 jmp .Laligned 233 234.Lodd: 235 source 236 movb (%rdi), %bl 237 dest 238 movb %bl, (%rsi) 239 leaq 1(%rdi), %rdi 240 leaq 1(%rsi), %rsi 241 /* decrement, set MSB */ 242 leaq -1(%rcx, %rcx), %rcx 243 rorq $1, %rcx 244 shll $8, %ebx 245 addq %rbx, %rax 246 jmp 1b 247 248.Lwas_odd: 249 roll $8, %eax 250 jmp .Lout 251 252 /* Exception: just return 0 */ 253.Lfault: 254 xorl %eax, %eax 255 jmp .Lout 256SYM_FUNC_END(csum_partial_copy_generic)