csum_partial.S (16274B)
1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Quick'n'dirty IP checksum ... 7 * 8 * Copyright (C) 1998, 1999 Ralf Baechle 9 * Copyright (C) 1999 Silicon Graphics, Inc. 10 * Copyright (C) 2007 Maciej W. Rozycki 11 * Copyright (C) 2014 Imagination Technologies Ltd. 12 */ 13#include <linux/errno.h> 14#include <asm/asm.h> 15#include <asm/asm-offsets.h> 16#include <asm/export.h> 17#include <asm/regdef.h> 18 19#ifdef CONFIG_64BIT 20/* 21 * As we are sharing code base with the mips32 tree (which use the o32 ABI 22 * register definitions). We need to redefine the register definitions from 23 * the n64 ABI register naming to the o32 ABI register naming. 24 */ 25#undef t0 26#undef t1 27#undef t2 28#undef t3 29#define t0 $8 30#define t1 $9 31#define t2 $10 32#define t3 $11 33#define t4 $12 34#define t5 $13 35#define t6 $14 36#define t7 $15 37 38#define USE_DOUBLE 39#endif 40 41#ifdef USE_DOUBLE 42 43#define LOAD ld 44#define LOAD32 lwu 45#define ADD daddu 46#define NBYTES 8 47 48#else 49 50#define LOAD lw 51#define LOAD32 lw 52#define ADD addu 53#define NBYTES 4 54 55#endif /* USE_DOUBLE */ 56 57#define UNIT(unit) ((unit)*NBYTES) 58 59#define ADDC(sum,reg) \ 60 .set push; \ 61 .set noat; \ 62 ADD sum, reg; \ 63 sltu v1, sum, reg; \ 64 ADD sum, v1; \ 65 .set pop 66 67#define ADDC32(sum,reg) \ 68 .set push; \ 69 .set noat; \ 70 addu sum, reg; \ 71 sltu v1, sum, reg; \ 72 addu sum, v1; \ 73 .set pop 74 75#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ 76 LOAD _t0, (offset + UNIT(0))(src); \ 77 LOAD _t1, (offset + UNIT(1))(src); \ 78 LOAD _t2, (offset + UNIT(2))(src); \ 79 LOAD _t3, (offset + UNIT(3))(src); \ 80 ADDC(_t0, _t1); \ 81 ADDC(_t2, _t3); \ 82 ADDC(sum, _t0); \ 83 ADDC(sum, _t2) 84 85#ifdef USE_DOUBLE 86#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 87 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) 88#else 89#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 90 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \ 91 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3) 92#endif 93 94/* 95 * a0: source address 96 * a1: length of the area to checksum 97 * a2: partial checksum 98 */ 99 100#define src a0 101#define sum v0 102 103 .text 104 .set noreorder 105 .align 5 106LEAF(csum_partial) 107EXPORT_SYMBOL(csum_partial) 108 move sum, zero 109 move t7, zero 110 111 sltiu t8, a1, 0x8 112 bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ 113 move t2, a1 114 115 andi t7, src, 0x1 /* odd buffer? */ 116 117.Lhword_align: 118 beqz t7, .Lword_align 119 andi t8, src, 0x2 120 121 lbu t0, (src) 122 LONG_SUBU a1, a1, 0x1 123#ifdef __MIPSEL__ 124 sll t0, t0, 8 125#endif 126 ADDC(sum, t0) 127 PTR_ADDU src, src, 0x1 128 andi t8, src, 0x2 129 130.Lword_align: 131 beqz t8, .Ldword_align 132 sltiu t8, a1, 56 133 134 lhu t0, (src) 135 LONG_SUBU a1, a1, 0x2 136 ADDC(sum, t0) 137 sltiu t8, a1, 56 138 PTR_ADDU src, src, 0x2 139 140.Ldword_align: 141 bnez t8, .Ldo_end_words 142 move t8, a1 143 144 andi t8, src, 0x4 145 beqz t8, .Lqword_align 146 andi t8, src, 0x8 147 148 LOAD32 t0, 0x00(src) 149 LONG_SUBU a1, a1, 0x4 150 ADDC(sum, t0) 151 PTR_ADDU src, src, 0x4 152 andi t8, src, 0x8 153 154.Lqword_align: 155 beqz t8, .Loword_align 156 andi t8, src, 0x10 157 158#ifdef USE_DOUBLE 159 ld t0, 0x00(src) 160 LONG_SUBU a1, a1, 0x8 161 ADDC(sum, t0) 162#else 163 lw t0, 0x00(src) 164 lw t1, 0x04(src) 165 LONG_SUBU a1, a1, 0x8 166 ADDC(sum, t0) 167 ADDC(sum, t1) 168#endif 169 PTR_ADDU src, src, 0x8 170 andi t8, src, 0x10 171 172.Loword_align: 173 beqz t8, .Lbegin_movement 174 LONG_SRL t8, a1, 0x7 175 176#ifdef USE_DOUBLE 177 ld t0, 0x00(src) 178 ld t1, 0x08(src) 179 ADDC(sum, t0) 180 ADDC(sum, t1) 181#else 182 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4) 183#endif 184 LONG_SUBU a1, a1, 0x10 185 PTR_ADDU src, src, 0x10 186 LONG_SRL t8, a1, 0x7 187 188.Lbegin_movement: 189 beqz t8, 1f 190 andi t2, a1, 0x40 191 192.Lmove_128bytes: 193 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 194 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 195 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) 196 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) 197 LONG_SUBU t8, t8, 0x01 198 .set reorder /* DADDI_WAR */ 199 PTR_ADDU src, src, 0x80 200 bnez t8, .Lmove_128bytes 201 .set noreorder 202 2031: 204 beqz t2, 1f 205 andi t2, a1, 0x20 206 207.Lmove_64bytes: 208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 209 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 210 PTR_ADDU src, src, 0x40 211 2121: 213 beqz t2, .Ldo_end_words 214 andi t8, a1, 0x1c 215 216.Lmove_32bytes: 217 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 218 andi t8, a1, 0x1c 219 PTR_ADDU src, src, 0x20 220 221.Ldo_end_words: 222 beqz t8, .Lsmall_csumcpy 223 andi t2, a1, 0x3 224 LONG_SRL t8, t8, 0x2 225 226.Lend_words: 227 LOAD32 t0, (src) 228 LONG_SUBU t8, t8, 0x1 229 ADDC(sum, t0) 230 .set reorder /* DADDI_WAR */ 231 PTR_ADDU src, src, 0x4 232 bnez t8, .Lend_words 233 .set noreorder 234 235/* unknown src alignment and < 8 bytes to go */ 236.Lsmall_csumcpy: 237 move a1, t2 238 239 andi t0, a1, 4 240 beqz t0, 1f 241 andi t0, a1, 2 242 243 /* Still a full word to go */ 244 ulw t1, (src) 245 PTR_ADDIU src, 4 246#ifdef USE_DOUBLE 247 dsll t1, t1, 32 /* clear lower 32bit */ 248#endif 249 ADDC(sum, t1) 250 2511: move t1, zero 252 beqz t0, 1f 253 andi t0, a1, 1 254 255 /* Still a halfword to go */ 256 ulhu t1, (src) 257 PTR_ADDIU src, 2 258 2591: beqz t0, 1f 260 sll t1, t1, 16 261 262 lbu t2, (src) 263 nop 264 265#ifdef __MIPSEB__ 266 sll t2, t2, 8 267#endif 268 or t1, t2 269 2701: ADDC(sum, t1) 271 272 /* fold checksum */ 273#ifdef USE_DOUBLE 274 dsll32 v1, sum, 0 275 daddu sum, v1 276 sltu v1, sum, v1 277 dsra32 sum, sum, 0 278 addu sum, v1 279#endif 280 281 /* odd buffer alignment? */ 282#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ 283 defined(CONFIG_CPU_LOONGSON64) 284 .set push 285 .set arch=mips32r2 286 wsbh v1, sum 287 movn sum, v1, t7 288 .set pop 289#else 290 beqz t7, 1f /* odd buffer alignment? */ 291 lui v1, 0x00ff 292 addu v1, 0x00ff 293 and t0, sum, v1 294 sll t0, t0, 8 295 srl sum, sum, 8 296 and sum, sum, v1 297 or sum, sum, t0 2981: 299#endif 300 .set reorder 301 /* Add the passed partial csum. */ 302 ADDC32(sum, a2) 303 jr ra 304 .set noreorder 305 END(csum_partial) 306 307 308/* 309 * checksum and copy routines based on memcpy.S 310 * 311 * csum_partial_copy_nocheck(src, dst, len) 312 * __csum_partial_copy_kernel(src, dst, len) 313 * 314 * See "Spec" in memcpy.S for details. Unlike __copy_user, all 315 * function in this file use the standard calling convention. 316 */ 317 318#define src a0 319#define dst a1 320#define len a2 321#define sum v0 322#define odd t8 323 324/* 325 * All exception handlers simply return 0. 326 */ 327 328/* Instruction type */ 329#define LD_INSN 1 330#define ST_INSN 2 331#define LEGACY_MODE 1 332#define EVA_MODE 2 333#define USEROP 1 334#define KERNELOP 2 335 336/* 337 * Wrapper to add an entry in the exception table 338 * in case the insn causes a memory exception. 339 * Arguments: 340 * insn : Load/store instruction 341 * type : Instruction type 342 * reg : Register 343 * addr : Address 344 * handler : Exception handler 345 */ 346#define EXC(insn, type, reg, addr) \ 347 .if \mode == LEGACY_MODE; \ 3489: insn reg, addr; \ 349 .section __ex_table,"a"; \ 350 PTR_WD 9b, .L_exc; \ 351 .previous; \ 352 /* This is enabled in EVA mode */ \ 353 .else; \ 354 /* If loading from user or storing to user */ \ 355 .if ((\from == USEROP) && (type == LD_INSN)) || \ 356 ((\to == USEROP) && (type == ST_INSN)); \ 3579: __BUILD_EVA_INSN(insn##e, reg, addr); \ 358 .section __ex_table,"a"; \ 359 PTR_WD 9b, .L_exc; \ 360 .previous; \ 361 .else; \ 362 /* EVA without exception */ \ 363 insn reg, addr; \ 364 .endif; \ 365 .endif 366 367#undef LOAD 368 369#ifdef USE_DOUBLE 370 371#define LOADK ld /* No exception */ 372#define LOAD(reg, addr) EXC(ld, LD_INSN, reg, addr) 373#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr) 374#define LOADL(reg, addr) EXC(ldl, LD_INSN, reg, addr) 375#define LOADR(reg, addr) EXC(ldr, LD_INSN, reg, addr) 376#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr) 377#define STOREL(reg, addr) EXC(sdl, ST_INSN, reg, addr) 378#define STORER(reg, addr) EXC(sdr, ST_INSN, reg, addr) 379#define STORE(reg, addr) EXC(sd, ST_INSN, reg, addr) 380#define ADD daddu 381#define SUB dsubu 382#define SRL dsrl 383#define SLL dsll 384#define SLLV dsllv 385#define SRLV dsrlv 386#define NBYTES 8 387#define LOG_NBYTES 3 388 389#else 390 391#define LOADK lw /* No exception */ 392#define LOAD(reg, addr) EXC(lw, LD_INSN, reg, addr) 393#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr) 394#define LOADL(reg, addr) EXC(lwl, LD_INSN, reg, addr) 395#define LOADR(reg, addr) EXC(lwr, LD_INSN, reg, addr) 396#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr) 397#define STOREL(reg, addr) EXC(swl, ST_INSN, reg, addr) 398#define STORER(reg, addr) EXC(swr, ST_INSN, reg, addr) 399#define STORE(reg, addr) EXC(sw, ST_INSN, reg, addr) 400#define ADD addu 401#define SUB subu 402#define SRL srl 403#define SLL sll 404#define SLLV sllv 405#define SRLV srlv 406#define NBYTES 4 407#define LOG_NBYTES 2 408 409#endif /* USE_DOUBLE */ 410 411#ifdef CONFIG_CPU_LITTLE_ENDIAN 412#define LDFIRST LOADR 413#define LDREST LOADL 414#define STFIRST STORER 415#define STREST STOREL 416#define SHIFT_DISCARD SLLV 417#define SHIFT_DISCARD_REVERT SRLV 418#else 419#define LDFIRST LOADL 420#define LDREST LOADR 421#define STFIRST STOREL 422#define STREST STORER 423#define SHIFT_DISCARD SRLV 424#define SHIFT_DISCARD_REVERT SLLV 425#endif 426 427#define FIRST(unit) ((unit)*NBYTES) 428#define REST(unit) (FIRST(unit)+NBYTES-1) 429 430#define ADDRMASK (NBYTES-1) 431 432#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 433 .set noat 434#else 435 .set at=v1 436#endif 437 438 .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to 439 440 li sum, -1 441 move odd, zero 442 /* 443 * Note: dst & src may be unaligned, len may be 0 444 * Temps 445 */ 446 /* 447 * The "issue break"s below are very approximate. 448 * Issue delays for dcache fills will perturb the schedule, as will 449 * load queue full replay traps, etc. 450 * 451 * If len < NBYTES use byte operations. 452 */ 453 sltu t2, len, NBYTES 454 and t1, dst, ADDRMASK 455 bnez t2, .Lcopy_bytes_checklen\@ 456 and t0, src, ADDRMASK 457 andi odd, dst, 0x1 /* odd buffer? */ 458 bnez t1, .Ldst_unaligned\@ 459 nop 460 bnez t0, .Lsrc_unaligned_dst_aligned\@ 461 /* 462 * use delay slot for fall-through 463 * src and dst are aligned; need to compute rem 464 */ 465.Lboth_aligned\@: 466 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 467 beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES 468 nop 469 SUB len, 8*NBYTES # subtract here for bgez loop 470 .align 4 4711: 472 LOAD(t0, UNIT(0)(src)) 473 LOAD(t1, UNIT(1)(src)) 474 LOAD(t2, UNIT(2)(src)) 475 LOAD(t3, UNIT(3)(src)) 476 LOAD(t4, UNIT(4)(src)) 477 LOAD(t5, UNIT(5)(src)) 478 LOAD(t6, UNIT(6)(src)) 479 LOAD(t7, UNIT(7)(src)) 480 SUB len, len, 8*NBYTES 481 ADD src, src, 8*NBYTES 482 STORE(t0, UNIT(0)(dst)) 483 ADDC(t0, t1) 484 STORE(t1, UNIT(1)(dst)) 485 ADDC(sum, t0) 486 STORE(t2, UNIT(2)(dst)) 487 ADDC(t2, t3) 488 STORE(t3, UNIT(3)(dst)) 489 ADDC(sum, t2) 490 STORE(t4, UNIT(4)(dst)) 491 ADDC(t4, t5) 492 STORE(t5, UNIT(5)(dst)) 493 ADDC(sum, t4) 494 STORE(t6, UNIT(6)(dst)) 495 ADDC(t6, t7) 496 STORE(t7, UNIT(7)(dst)) 497 ADDC(sum, t6) 498 .set reorder /* DADDI_WAR */ 499 ADD dst, dst, 8*NBYTES 500 bgez len, 1b 501 .set noreorder 502 ADD len, 8*NBYTES # revert len (see above) 503 504 /* 505 * len == the number of bytes left to copy < 8*NBYTES 506 */ 507.Lcleanup_both_aligned\@: 508#define rem t7 509 beqz len, .Ldone\@ 510 sltu t0, len, 4*NBYTES 511 bnez t0, .Lless_than_4units\@ 512 and rem, len, (NBYTES-1) # rem = len % NBYTES 513 /* 514 * len >= 4*NBYTES 515 */ 516 LOAD(t0, UNIT(0)(src)) 517 LOAD(t1, UNIT(1)(src)) 518 LOAD(t2, UNIT(2)(src)) 519 LOAD(t3, UNIT(3)(src)) 520 SUB len, len, 4*NBYTES 521 ADD src, src, 4*NBYTES 522 STORE(t0, UNIT(0)(dst)) 523 ADDC(t0, t1) 524 STORE(t1, UNIT(1)(dst)) 525 ADDC(sum, t0) 526 STORE(t2, UNIT(2)(dst)) 527 ADDC(t2, t3) 528 STORE(t3, UNIT(3)(dst)) 529 ADDC(sum, t2) 530 .set reorder /* DADDI_WAR */ 531 ADD dst, dst, 4*NBYTES 532 beqz len, .Ldone\@ 533 .set noreorder 534.Lless_than_4units\@: 535 /* 536 * rem = len % NBYTES 537 */ 538 beq rem, len, .Lcopy_bytes\@ 539 nop 5401: 541 LOAD(t0, 0(src)) 542 ADD src, src, NBYTES 543 SUB len, len, NBYTES 544 STORE(t0, 0(dst)) 545 ADDC(sum, t0) 546 .set reorder /* DADDI_WAR */ 547 ADD dst, dst, NBYTES 548 bne rem, len, 1b 549 .set noreorder 550 551 /* 552 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 553 * A loop would do only a byte at a time with possible branch 554 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE 555 * because can't assume read-access to dst. Instead, use 556 * STREST dst, which doesn't require read access to dst. 557 * 558 * This code should perform better than a simple loop on modern, 559 * wide-issue mips processors because the code has fewer branches and 560 * more instruction-level parallelism. 561 */ 562#define bits t2 563 beqz len, .Ldone\@ 564 ADD t1, dst, len # t1 is just past last byte of dst 565 li bits, 8*NBYTES 566 SLL rem, len, 3 # rem = number of bits to keep 567 LOAD(t0, 0(src)) 568 SUB bits, bits, rem # bits = number of bits to discard 569 SHIFT_DISCARD t0, t0, bits 570 STREST(t0, -1(t1)) 571 SHIFT_DISCARD_REVERT t0, t0, bits 572 .set reorder 573 ADDC(sum, t0) 574 b .Ldone\@ 575 .set noreorder 576.Ldst_unaligned\@: 577 /* 578 * dst is unaligned 579 * t0 = src & ADDRMASK 580 * t1 = dst & ADDRMASK; T1 > 0 581 * len >= NBYTES 582 * 583 * Copy enough bytes to align dst 584 * Set match = (src and dst have same alignment) 585 */ 586#define match rem 587 LDFIRST(t3, FIRST(0)(src)) 588 ADD t2, zero, NBYTES 589 LDREST(t3, REST(0)(src)) 590 SUB t2, t2, t1 # t2 = number of bytes copied 591 xor match, t0, t1 592 STFIRST(t3, FIRST(0)(dst)) 593 SLL t4, t1, 3 # t4 = number of bits to discard 594 SHIFT_DISCARD t3, t3, t4 595 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ 596 ADDC(sum, t3) 597 beq len, t2, .Ldone\@ 598 SUB len, len, t2 599 ADD dst, dst, t2 600 beqz match, .Lboth_aligned\@ 601 ADD src, src, t2 602 603.Lsrc_unaligned_dst_aligned\@: 604 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 605 beqz t0, .Lcleanup_src_unaligned\@ 606 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 6071: 608/* 609 * Avoid consecutive LD*'s to the same register since some mips 610 * implementations can't issue them in the same cycle. 611 * It's OK to load FIRST(N+1) before REST(N) because the two addresses 612 * are to the same unit (unless src is aligned, but it's not). 613 */ 614 LDFIRST(t0, FIRST(0)(src)) 615 LDFIRST(t1, FIRST(1)(src)) 616 SUB len, len, 4*NBYTES 617 LDREST(t0, REST(0)(src)) 618 LDREST(t1, REST(1)(src)) 619 LDFIRST(t2, FIRST(2)(src)) 620 LDFIRST(t3, FIRST(3)(src)) 621 LDREST(t2, REST(2)(src)) 622 LDREST(t3, REST(3)(src)) 623 ADD src, src, 4*NBYTES 624#ifdef CONFIG_CPU_SB1 625 nop # improves slotting 626#endif 627 STORE(t0, UNIT(0)(dst)) 628 ADDC(t0, t1) 629 STORE(t1, UNIT(1)(dst)) 630 ADDC(sum, t0) 631 STORE(t2, UNIT(2)(dst)) 632 ADDC(t2, t3) 633 STORE(t3, UNIT(3)(dst)) 634 ADDC(sum, t2) 635 .set reorder /* DADDI_WAR */ 636 ADD dst, dst, 4*NBYTES 637 bne len, rem, 1b 638 .set noreorder 639 640.Lcleanup_src_unaligned\@: 641 beqz len, .Ldone\@ 642 and rem, len, NBYTES-1 # rem = len % NBYTES 643 beq rem, len, .Lcopy_bytes\@ 644 nop 6451: 646 LDFIRST(t0, FIRST(0)(src)) 647 LDREST(t0, REST(0)(src)) 648 ADD src, src, NBYTES 649 SUB len, len, NBYTES 650 STORE(t0, 0(dst)) 651 ADDC(sum, t0) 652 .set reorder /* DADDI_WAR */ 653 ADD dst, dst, NBYTES 654 bne len, rem, 1b 655 .set noreorder 656 657.Lcopy_bytes_checklen\@: 658 beqz len, .Ldone\@ 659 nop 660.Lcopy_bytes\@: 661 /* 0 < len < NBYTES */ 662#ifdef CONFIG_CPU_LITTLE_ENDIAN 663#define SHIFT_START 0 664#define SHIFT_INC 8 665#else 666#define SHIFT_START 8*(NBYTES-1) 667#define SHIFT_INC -8 668#endif 669 move t2, zero # partial word 670 li t3, SHIFT_START # shift 671#define COPY_BYTE(N) \ 672 LOADBU(t0, N(src)); \ 673 SUB len, len, 1; \ 674 STOREB(t0, N(dst)); \ 675 SLLV t0, t0, t3; \ 676 addu t3, SHIFT_INC; \ 677 beqz len, .Lcopy_bytes_done\@; \ 678 or t2, t0 679 680 COPY_BYTE(0) 681 COPY_BYTE(1) 682#ifdef USE_DOUBLE 683 COPY_BYTE(2) 684 COPY_BYTE(3) 685 COPY_BYTE(4) 686 COPY_BYTE(5) 687#endif 688 LOADBU(t0, NBYTES-2(src)) 689 SUB len, len, 1 690 STOREB(t0, NBYTES-2(dst)) 691 SLLV t0, t0, t3 692 or t2, t0 693.Lcopy_bytes_done\@: 694 ADDC(sum, t2) 695.Ldone\@: 696 /* fold checksum */ 697 .set push 698 .set noat 699#ifdef USE_DOUBLE 700 dsll32 v1, sum, 0 701 daddu sum, v1 702 sltu v1, sum, v1 703 dsra32 sum, sum, 0 704 addu sum, v1 705#endif 706 707#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ 708 defined(CONFIG_CPU_LOONGSON64) 709 .set push 710 .set arch=mips32r2 711 wsbh v1, sum 712 movn sum, v1, odd 713 .set pop 714#else 715 beqz odd, 1f /* odd buffer alignment? */ 716 lui v1, 0x00ff 717 addu v1, 0x00ff 718 and t0, sum, v1 719 sll t0, t0, 8 720 srl sum, sum, 8 721 and sum, sum, v1 722 or sum, sum, t0 7231: 724#endif 725 .set pop 726 .set reorder 727 jr ra 728 .set noreorder 729 .endm 730 731 .set noreorder 732.L_exc: 733 jr ra 734 li v0, 0 735 736FEXPORT(__csum_partial_copy_nocheck) 737EXPORT_SYMBOL(__csum_partial_copy_nocheck) 738#ifndef CONFIG_EVA 739FEXPORT(__csum_partial_copy_to_user) 740EXPORT_SYMBOL(__csum_partial_copy_to_user) 741FEXPORT(__csum_partial_copy_from_user) 742EXPORT_SYMBOL(__csum_partial_copy_from_user) 743#endif 744__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 745 746#ifdef CONFIG_EVA 747LEAF(__csum_partial_copy_to_user) 748__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 749END(__csum_partial_copy_to_user) 750 751LEAF(__csum_partial_copy_from_user) 752__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 753END(__csum_partial_copy_from_user) 754#endif