checksum_32.S (9059B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IP/TCP/UDP checksumming routines 8 * 9 * Authors: Jorge Cwik, <jorge@laser.satlink.net> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11 * Tom May, <ftom@netcom.com> 12 * Pentium Pro/II routines: 13 * Alexander Kjeldaas <astor@guardian.no> 14 * Finn Arne Gangstad <finnag@guardian.no> 15 * Lots of code moved from tcp.c and ip.c; see those files 16 * for more names. 17 * 18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 19 * handling. 20 * Andi Kleen, add zeroing on error 21 * converted to pure assembler 22 */ 23 24#include <linux/linkage.h> 25#include <asm/errno.h> 26#include <asm/asm.h> 27#include <asm/export.h> 28#include <asm/nospec-branch.h> 29 30/* 31 * computes a partial checksum, e.g. for TCP/UDP fragments 32 */ 33 34/* 35unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 36 */ 37 38.text 39 40#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 41 42 /* 43 * Experiments with Ethernet and SLIP connections show that buff 44 * is aligned on either a 2-byte or 4-byte boundary. We get at 45 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 46 * Fortunately, it is easy to convert 2-byte alignment to 4-byte 47 * alignment for the unrolled loop. 48 */ 49SYM_FUNC_START(csum_partial) 50 pushl %esi 51 pushl %ebx 52 movl 20(%esp),%eax # Function arg: unsigned int sum 53 movl 16(%esp),%ecx # Function arg: int len 54 movl 12(%esp),%esi # Function arg: unsigned char *buff 55 testl $3, %esi # Check alignment. 56 jz 2f # Jump if alignment is ok. 57 testl $1, %esi # Check alignment. 58 jz 10f # Jump if alignment is boundary of 2 bytes. 59 60 # buf is odd 61 dec %ecx 62 jl 8f 63 movzbl (%esi), %ebx 64 adcl %ebx, %eax 65 roll $8, %eax 66 inc %esi 67 testl $2, %esi 68 jz 2f 6910: 70 subl $2, %ecx # Alignment uses up two bytes. 71 jae 1f # Jump if we had at least two bytes. 72 addl $2, %ecx # ecx was < 2. Deal with it. 73 jmp 4f 741: movw (%esi), %bx 75 addl $2, %esi 76 addw %bx, %ax 77 adcl $0, %eax 782: 79 movl %ecx, %edx 80 shrl $5, %ecx 81 jz 2f 82 testl %esi, %esi 831: movl (%esi), %ebx 84 adcl %ebx, %eax 85 movl 4(%esi), %ebx 86 adcl %ebx, %eax 87 movl 8(%esi), %ebx 88 adcl %ebx, %eax 89 movl 12(%esi), %ebx 90 adcl %ebx, %eax 91 movl 16(%esi), %ebx 92 adcl %ebx, %eax 93 movl 20(%esi), %ebx 94 adcl %ebx, %eax 95 movl 24(%esi), %ebx 96 adcl %ebx, %eax 97 movl 28(%esi), %ebx 98 adcl %ebx, %eax 99 lea 32(%esi), %esi 100 dec %ecx 101 jne 1b 102 adcl $0, %eax 1032: movl %edx, %ecx 104 andl $0x1c, %edx 105 je 4f 106 shrl $2, %edx # This clears CF 1073: adcl (%esi), %eax 108 lea 4(%esi), %esi 109 dec %edx 110 jne 3b 111 adcl $0, %eax 1124: andl $3, %ecx 113 jz 7f 114 cmpl $2, %ecx 115 jb 5f 116 movw (%esi),%cx 117 leal 2(%esi),%esi 118 je 6f 119 shll $16,%ecx 1205: movb (%esi),%cl 1216: addl %ecx,%eax 122 adcl $0, %eax 1237: 124 testb $1, 12(%esp) 125 jz 8f 126 roll $8, %eax 1278: 128 popl %ebx 129 popl %esi 130 RET 131SYM_FUNC_END(csum_partial) 132 133#else 134 135/* Version for PentiumII/PPro */ 136 137SYM_FUNC_START(csum_partial) 138 pushl %esi 139 pushl %ebx 140 movl 20(%esp),%eax # Function arg: unsigned int sum 141 movl 16(%esp),%ecx # Function arg: int len 142 movl 12(%esp),%esi # Function arg: const unsigned char *buf 143 144 testl $3, %esi 145 jnz 25f 14610: 147 movl %ecx, %edx 148 movl %ecx, %ebx 149 andl $0x7c, %ebx 150 shrl $7, %ecx 151 addl %ebx,%esi 152 shrl $2, %ebx 153 negl %ebx 154 lea 45f(%ebx,%ebx,2), %ebx 155 testl %esi, %esi 156 JMP_NOSPEC ebx 157 158 # Handle 2-byte-aligned regions 15920: addw (%esi), %ax 160 lea 2(%esi), %esi 161 adcl $0, %eax 162 jmp 10b 16325: 164 testl $1, %esi 165 jz 30f 166 # buf is odd 167 dec %ecx 168 jl 90f 169 movzbl (%esi), %ebx 170 addl %ebx, %eax 171 adcl $0, %eax 172 roll $8, %eax 173 inc %esi 174 testl $2, %esi 175 jz 10b 176 17730: subl $2, %ecx 178 ja 20b 179 je 32f 180 addl $2, %ecx 181 jz 80f 182 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 183 addl %ebx, %eax 184 adcl $0, %eax 185 jmp 80f 18632: 187 addw (%esi), %ax # csumming 2 bytes, 2-aligned 188 adcl $0, %eax 189 jmp 80f 190 19140: 192 addl -128(%esi), %eax 193 adcl -124(%esi), %eax 194 adcl -120(%esi), %eax 195 adcl -116(%esi), %eax 196 adcl -112(%esi), %eax 197 adcl -108(%esi), %eax 198 adcl -104(%esi), %eax 199 adcl -100(%esi), %eax 200 adcl -96(%esi), %eax 201 adcl -92(%esi), %eax 202 adcl -88(%esi), %eax 203 adcl -84(%esi), %eax 204 adcl -80(%esi), %eax 205 adcl -76(%esi), %eax 206 adcl -72(%esi), %eax 207 adcl -68(%esi), %eax 208 adcl -64(%esi), %eax 209 adcl -60(%esi), %eax 210 adcl -56(%esi), %eax 211 adcl -52(%esi), %eax 212 adcl -48(%esi), %eax 213 adcl -44(%esi), %eax 214 adcl -40(%esi), %eax 215 adcl -36(%esi), %eax 216 adcl -32(%esi), %eax 217 adcl -28(%esi), %eax 218 adcl -24(%esi), %eax 219 adcl -20(%esi), %eax 220 adcl -16(%esi), %eax 221 adcl -12(%esi), %eax 222 adcl -8(%esi), %eax 223 adcl -4(%esi), %eax 22445: 225 lea 128(%esi), %esi 226 adcl $0, %eax 227 dec %ecx 228 jge 40b 229 movl %edx, %ecx 23050: andl $3, %ecx 231 jz 80f 232 233 # Handle the last 1-3 bytes without jumping 234 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 235 movl $0xffffff,%ebx # by the shll and shrl instructions 236 shll $3,%ecx 237 shrl %cl,%ebx 238 andl -128(%esi),%ebx # esi is 4-aligned so should be ok 239 addl %ebx,%eax 240 adcl $0,%eax 24180: 242 testb $1, 12(%esp) 243 jz 90f 244 roll $8, %eax 24590: 246 popl %ebx 247 popl %esi 248 RET 249SYM_FUNC_END(csum_partial) 250 251#endif 252EXPORT_SYMBOL(csum_partial) 253 254/* 255unsigned int csum_partial_copy_generic (const char *src, char *dst, 256 int len) 257 */ 258 259/* 260 * Copy from ds while checksumming, otherwise like csum_partial 261 */ 262 263#define EXC(y...) \ 264 9999: y; \ 265 _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX) 266 267#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 268 269#define ARGBASE 16 270#define FP 12 271 272SYM_FUNC_START(csum_partial_copy_generic) 273 subl $4,%esp 274 pushl %edi 275 pushl %esi 276 pushl %ebx 277 movl ARGBASE+12(%esp),%ecx # len 278 movl ARGBASE+4(%esp),%esi # src 279 movl ARGBASE+8(%esp),%edi # dst 280 281 movl $-1, %eax # sum 282 testl $2, %edi # Check alignment. 283 jz 2f # Jump if alignment is ok. 284 subl $2, %ecx # Alignment uses up two bytes. 285 jae 1f # Jump if we had at least two bytes. 286 addl $2, %ecx # ecx was < 2. Deal with it. 287 jmp 4f 288EXC(1: movw (%esi), %bx ) 289 addl $2, %esi 290EXC( movw %bx, (%edi) ) 291 addl $2, %edi 292 addw %bx, %ax 293 adcl $0, %eax 2942: 295 movl %ecx, FP(%esp) 296 shrl $5, %ecx 297 jz 2f 298 testl %esi, %esi # what's wrong with clc? 299EXC(1: movl (%esi), %ebx ) 300EXC( movl 4(%esi), %edx ) 301 adcl %ebx, %eax 302EXC( movl %ebx, (%edi) ) 303 adcl %edx, %eax 304EXC( movl %edx, 4(%edi) ) 305 306EXC( movl 8(%esi), %ebx ) 307EXC( movl 12(%esi), %edx ) 308 adcl %ebx, %eax 309EXC( movl %ebx, 8(%edi) ) 310 adcl %edx, %eax 311EXC( movl %edx, 12(%edi) ) 312 313EXC( movl 16(%esi), %ebx ) 314EXC( movl 20(%esi), %edx ) 315 adcl %ebx, %eax 316EXC( movl %ebx, 16(%edi) ) 317 adcl %edx, %eax 318EXC( movl %edx, 20(%edi) ) 319 320EXC( movl 24(%esi), %ebx ) 321EXC( movl 28(%esi), %edx ) 322 adcl %ebx, %eax 323EXC( movl %ebx, 24(%edi) ) 324 adcl %edx, %eax 325EXC( movl %edx, 28(%edi) ) 326 327 lea 32(%esi), %esi 328 lea 32(%edi), %edi 329 dec %ecx 330 jne 1b 331 adcl $0, %eax 3322: movl FP(%esp), %edx 333 movl %edx, %ecx 334 andl $0x1c, %edx 335 je 4f 336 shrl $2, %edx # This clears CF 337EXC(3: movl (%esi), %ebx ) 338 adcl %ebx, %eax 339EXC( movl %ebx, (%edi) ) 340 lea 4(%esi), %esi 341 lea 4(%edi), %edi 342 dec %edx 343 jne 3b 344 adcl $0, %eax 3454: andl $3, %ecx 346 jz 7f 347 cmpl $2, %ecx 348 jb 5f 349EXC( movw (%esi), %cx ) 350 leal 2(%esi), %esi 351EXC( movw %cx, (%edi) ) 352 leal 2(%edi), %edi 353 je 6f 354 shll $16,%ecx 355EXC(5: movb (%esi), %cl ) 356EXC( movb %cl, (%edi) ) 3576: addl %ecx, %eax 358 adcl $0, %eax 3597: 360 361 popl %ebx 362 popl %esi 363 popl %edi 364 popl %ecx # equivalent to addl $4,%esp 365 RET 366SYM_FUNC_END(csum_partial_copy_generic) 367 368#else 369 370/* Version for PentiumII/PPro */ 371 372#define ROUND1(x) \ 373 EXC(movl x(%esi), %ebx ) ; \ 374 addl %ebx, %eax ; \ 375 EXC(movl %ebx, x(%edi) ) ; 376 377#define ROUND(x) \ 378 EXC(movl x(%esi), %ebx ) ; \ 379 adcl %ebx, %eax ; \ 380 EXC(movl %ebx, x(%edi) ) ; 381 382#define ARGBASE 12 383 384SYM_FUNC_START(csum_partial_copy_generic) 385 pushl %ebx 386 pushl %edi 387 pushl %esi 388 movl ARGBASE+4(%esp),%esi #src 389 movl ARGBASE+8(%esp),%edi #dst 390 movl ARGBASE+12(%esp),%ecx #len 391 movl $-1, %eax #sum 392# movl %ecx, %edx 393 movl %ecx, %ebx 394 movl %esi, %edx 395 shrl $6, %ecx 396 andl $0x3c, %ebx 397 negl %ebx 398 subl %ebx, %esi 399 subl %ebx, %edi 400 lea -1(%esi),%edx 401 andl $-32,%edx 402 lea 3f(%ebx,%ebx), %ebx 403 testl %esi, %esi 404 JMP_NOSPEC ebx 4051: addl $64,%esi 406 addl $64,%edi 407 EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl) 408 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 409 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 410 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 411 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 4123: adcl $0,%eax 413 addl $64, %edx 414 dec %ecx 415 jge 1b 4164: movl ARGBASE+12(%esp),%edx #len 417 andl $3, %edx 418 jz 7f 419 cmpl $2, %edx 420 jb 5f 421EXC( movw (%esi), %dx ) 422 leal 2(%esi), %esi 423EXC( movw %dx, (%edi) ) 424 leal 2(%edi), %edi 425 je 6f 426 shll $16,%edx 4275: 428EXC( movb (%esi), %dl ) 429EXC( movb %dl, (%edi) ) 4306: addl %edx, %eax 431 adcl $0, %eax 4327: 433 434 popl %esi 435 popl %edi 436 popl %ebx 437 RET 438SYM_FUNC_END(csum_partial_copy_generic) 439 440#undef ROUND 441#undef ROUND1 442 443#endif 444EXPORT_SYMBOL(csum_partial_copy_generic)