checksum_64.S (3984B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* checksum.S: Sparc V9 optimized checksum code. 3 * 4 * Copyright(C) 1995 Linus Torvalds 5 * Copyright(C) 1995 Miguel de Icaza 6 * Copyright(C) 1996, 2000 David S. Miller 7 * Copyright(C) 1997 Jakub Jelinek 8 * 9 * derived from: 10 * Linux/Alpha checksum c-code 11 * Linux/ix86 inline checksum assembly 12 * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code) 13 * David Mosberger-Tang for optimized reference c-code 14 * BSD4.4 portable checksum routine 15 */ 16 17#include <asm/export.h> 18 .text 19 20csum_partial_fix_alignment: 21 /* We checked for zero length already, so there must be 22 * at least one byte. 23 */ 24 be,pt %icc, 1f 25 nop 26 ldub [%o0 + 0x00], %o4 27 add %o0, 1, %o0 28 sub %o1, 1, %o1 291: andcc %o0, 0x2, %g0 30 be,pn %icc, csum_partial_post_align 31 cmp %o1, 2 32 blu,pn %icc, csum_partial_end_cruft 33 nop 34 lduh [%o0 + 0x00], %o5 35 add %o0, 2, %o0 36 sub %o1, 2, %o1 37 ba,pt %xcc, csum_partial_post_align 38 add %o5, %o4, %o4 39 40 .align 32 41 .globl csum_partial 42 .type csum_partial,#function 43 EXPORT_SYMBOL(csum_partial) 44csum_partial: /* %o0=buff, %o1=len, %o2=sum */ 45 prefetch [%o0 + 0x000], #n_reads 46 clr %o4 47 prefetch [%o0 + 0x040], #n_reads 48 brz,pn %o1, csum_partial_finish 49 andcc %o0, 0x3, %g0 50 51 /* We "remember" whether the lowest bit in the address 52 * was set in %g7. Because if it is, we have to swap 53 * upper and lower 8 bit fields of the sum we calculate. 54 */ 55 bne,pn %icc, csum_partial_fix_alignment 56 andcc %o0, 0x1, %g7 57 58csum_partial_post_align: 59 prefetch [%o0 + 0x080], #n_reads 60 andncc %o1, 0x3f, %o3 61 62 prefetch [%o0 + 0x0c0], #n_reads 63 sub %o1, %o3, %o1 64 brz,pn %o3, 2f 65 prefetch [%o0 + 0x100], #n_reads 66 67 /* So that we don't need to use the non-pairing 68 * add-with-carry instructions we accumulate 32-bit 69 * values into a 64-bit register. At the end of the 70 * loop we fold it down to 32-bits and so on. 71 */ 72 prefetch [%o0 + 0x140], #n_reads 731: lduw [%o0 + 0x00], %o5 74 lduw [%o0 + 0x04], %g1 75 lduw [%o0 + 0x08], %g2 76 add %o4, %o5, %o4 77 lduw [%o0 + 0x0c], %g3 78 add %o4, %g1, %o4 79 lduw [%o0 + 0x10], %o5 80 add %o4, %g2, %o4 81 lduw [%o0 + 0x14], %g1 82 add %o4, %g3, %o4 83 lduw [%o0 + 0x18], %g2 84 add %o4, %o5, %o4 85 lduw [%o0 + 0x1c], %g3 86 add %o4, %g1, %o4 87 lduw [%o0 + 0x20], %o5 88 add %o4, %g2, %o4 89 lduw [%o0 + 0x24], %g1 90 add %o4, %g3, %o4 91 lduw [%o0 + 0x28], %g2 92 add %o4, %o5, %o4 93 lduw [%o0 + 0x2c], %g3 94 add %o4, %g1, %o4 95 lduw [%o0 + 0x30], %o5 96 add %o4, %g2, %o4 97 lduw [%o0 + 0x34], %g1 98 add %o4, %g3, %o4 99 lduw [%o0 + 0x38], %g2 100 add %o4, %o5, %o4 101 lduw [%o0 + 0x3c], %g3 102 add %o4, %g1, %o4 103 prefetch [%o0 + 0x180], #n_reads 104 add %o4, %g2, %o4 105 subcc %o3, 0x40, %o3 106 add %o0, 0x40, %o0 107 bne,pt %icc, 1b 108 add %o4, %g3, %o4 109 1102: and %o1, 0x3c, %o3 111 brz,pn %o3, 2f 112 sub %o1, %o3, %o1 1131: lduw [%o0 + 0x00], %o5 114 subcc %o3, 0x4, %o3 115 add %o0, 0x4, %o0 116 bne,pt %icc, 1b 117 add %o4, %o5, %o4 118 1192: 120 /* fold 64-->32 */ 121 srlx %o4, 32, %o5 122 srl %o4, 0, %o4 123 add %o4, %o5, %o4 124 srlx %o4, 32, %o5 125 srl %o4, 0, %o4 126 add %o4, %o5, %o4 127 128 /* fold 32-->16 */ 129 sethi %hi(0xffff0000), %g1 130 srl %o4, 16, %o5 131 andn %o4, %g1, %g2 132 add %o5, %g2, %o4 133 srl %o4, 16, %o5 134 andn %o4, %g1, %g2 135 add %o5, %g2, %o4 136 137csum_partial_end_cruft: 138 /* %o4 has the 16-bit sum we have calculated so-far. */ 139 cmp %o1, 2 140 blu,pt %icc, 1f 141 nop 142 lduh [%o0 + 0x00], %o5 143 sub %o1, 2, %o1 144 add %o0, 2, %o0 145 add %o4, %o5, %o4 1461: brz,pt %o1, 1f 147 nop 148 ldub [%o0 + 0x00], %o5 149 sub %o1, 1, %o1 150 add %o0, 1, %o0 151 sllx %o5, 8, %o5 152 add %o4, %o5, %o4 1531: 154 /* fold 32-->16 */ 155 sethi %hi(0xffff0000), %g1 156 srl %o4, 16, %o5 157 andn %o4, %g1, %g2 158 add %o5, %g2, %o4 159 srl %o4, 16, %o5 160 andn %o4, %g1, %g2 161 add %o5, %g2, %o4 162 1631: brz,pt %g7, 1f 164 nop 165 166 /* We started with an odd byte, byte-swap the result. */ 167 srl %o4, 8, %o5 168 and %o4, 0xff, %g1 169 sll %g1, 8, %g1 170 or %o5, %g1, %o4 171 1721: addcc %o2, %o4, %o2 173 addc %g0, %o2, %o2 174 175csum_partial_finish: 176 retl 177 srl %o2, 0, %o0