checksum_32.S (6420B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * This file contains assembly-language implementations 4 * of IP-style 1's complement checksum routines. 5 * 6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 7 * 8 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 9 */ 10 11#include <linux/sys.h> 12#include <asm/processor.h> 13#include <asm/cache.h> 14#include <asm/errno.h> 15#include <asm/ppc_asm.h> 16#include <asm/export.h> 17 18 .text 19 20/* 21 * computes the checksum of a memory block at buff, length len, 22 * and adds in "sum" (32-bit) 23 * 24 * __csum_partial(buff, len, sum) 25 */ 26_GLOBAL(__csum_partial) 27 subi r3,r3,4 28 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ 29 beq 3f /* if we're doing < 4 bytes */ 30 andi. r0,r3,2 /* Align buffer to longword boundary */ 31 beq+ 1f 32 lhz r0,4(r3) /* do 2 bytes to get aligned */ 33 subi r4,r4,2 34 addi r3,r3,2 35 srwi. r6,r4,2 /* # words to do */ 36 adde r5,r5,r0 37 beq 3f 381: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ 39 beq 21f 40 mtctr r6 412: lwzu r0,4(r3) 42 adde r5,r5,r0 43 bdnz 2b 4421: srwi. r6,r4,4 /* # blocks of 4 words to do */ 45 beq 3f 46 lwz r0,4(r3) 47 mtctr r6 48 lwz r6,8(r3) 49 adde r5,r5,r0 50 lwz r7,12(r3) 51 adde r5,r5,r6 52 lwzu r8,16(r3) 53 adde r5,r5,r7 54 bdz 23f 5522: lwz r0,4(r3) 56 adde r5,r5,r8 57 lwz r6,8(r3) 58 adde r5,r5,r0 59 lwz r7,12(r3) 60 adde r5,r5,r6 61 lwzu r8,16(r3) 62 adde r5,r5,r7 63 bdnz 22b 6423: adde r5,r5,r8 653: andi. r0,r4,2 66 beq+ 4f 67 lhz r0,4(r3) 68 addi r3,r3,2 69 adde r5,r5,r0 704: andi. r0,r4,1 71 beq+ 5f 72 lbz r0,4(r3) 73 slwi r0,r0,8 /* Upper byte of word */ 74 adde r5,r5,r0 755: addze r3,r5 /* add in final carry */ 76 blr 77EXPORT_SYMBOL(__csum_partial) 78 79/* 80 * Computes the checksum of a memory block at src, length len, 81 * and adds in 0xffffffff, while copying the block to dst. 82 * If an access exception occurs it returns zero. 83 * 84 * csum_partial_copy_generic(src, dst, len) 85 */ 86#define CSUM_COPY_16_BYTES_WITHEX(n) \ 878 ## n ## 0: \ 88 lwz r7,4(r4); \ 898 ## n ## 1: \ 90 lwz r8,8(r4); \ 918 ## n ## 2: \ 92 lwz r9,12(r4); \ 938 ## n ## 3: \ 94 lwzu r10,16(r4); \ 958 ## n ## 4: \ 96 stw r7,4(r6); \ 97 adde r12,r12,r7; \ 988 ## n ## 5: \ 99 stw r8,8(r6); \ 100 adde r12,r12,r8; \ 1018 ## n ## 6: \ 102 stw r9,12(r6); \ 103 adde r12,r12,r9; \ 1048 ## n ## 7: \ 105 stwu r10,16(r6); \ 106 adde r12,r12,r10 107 108#define CSUM_COPY_16_BYTES_EXCODE(n) \ 109 EX_TABLE(8 ## n ## 0b, fault); \ 110 EX_TABLE(8 ## n ## 1b, fault); \ 111 EX_TABLE(8 ## n ## 2b, fault); \ 112 EX_TABLE(8 ## n ## 3b, fault); \ 113 EX_TABLE(8 ## n ## 4b, fault); \ 114 EX_TABLE(8 ## n ## 5b, fault); \ 115 EX_TABLE(8 ## n ## 6b, fault); \ 116 EX_TABLE(8 ## n ## 7b, fault); 117 118 .text 119 120CACHELINE_BYTES = L1_CACHE_BYTES 121LG_CACHELINE_BYTES = L1_CACHE_SHIFT 122CACHELINE_MASK = (L1_CACHE_BYTES-1) 123 124_GLOBAL(csum_partial_copy_generic) 125 li r12,-1 126 addic r0,r0,0 /* clear carry */ 127 addi r6,r4,-4 128 neg r0,r4 129 addi r4,r3,-4 130 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 131 crset 4*cr7+eq 132 beq 58f 133 134 cmplw 0,r5,r0 /* is this more than total to do? */ 135 blt 63f /* if not much to do */ 136 rlwinm r7,r6,3,0x8 137 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ 138 cmplwi cr7,r7,0 /* is destination address even ? */ 139 andi. r8,r0,3 /* get it word-aligned first */ 140 mtctr r8 141 beq+ 61f 142 li r3,0 14370: lbz r9,4(r4) /* do some bytes */ 144 addi r4,r4,1 145 slwi r3,r3,8 146 rlwimi r3,r9,0,24,31 14771: stb r9,4(r6) 148 addi r6,r6,1 149 bdnz 70b 150 adde r12,r12,r3 15161: subf r5,r0,r5 152 srwi. r0,r0,2 153 mtctr r0 154 beq 58f 15572: lwzu r9,4(r4) /* do some words */ 156 adde r12,r12,r9 15773: stwu r9,4(r6) 158 bdnz 72b 159 16058: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 161 clrlwi r5,r5,32-LG_CACHELINE_BYTES 162 li r11,4 163 beq 63f 164 165 /* Here we decide how far ahead to prefetch the source */ 166 li r3,4 167 cmpwi r0,1 168 li r7,0 169 ble 114f 170 li r7,1 171#if MAX_COPY_PREFETCH > 1 172 /* Heuristically, for large transfers we prefetch 173 MAX_COPY_PREFETCH cachelines ahead. For small transfers 174 we prefetch 1 cacheline ahead. */ 175 cmpwi r0,MAX_COPY_PREFETCH 176 ble 112f 177 li r7,MAX_COPY_PREFETCH 178112: mtctr r7 179111: dcbt r3,r4 180 addi r3,r3,CACHELINE_BYTES 181 bdnz 111b 182#else 183 dcbt r3,r4 184 addi r3,r3,CACHELINE_BYTES 185#endif /* MAX_COPY_PREFETCH > 1 */ 186 187114: subf r8,r7,r0 188 mr r0,r7 189 mtctr r8 190 19153: dcbt r3,r4 19254: dcbz r11,r6 193/* the main body of the cacheline loop */ 194 CSUM_COPY_16_BYTES_WITHEX(0) 195#if L1_CACHE_BYTES >= 32 196 CSUM_COPY_16_BYTES_WITHEX(1) 197#if L1_CACHE_BYTES >= 64 198 CSUM_COPY_16_BYTES_WITHEX(2) 199 CSUM_COPY_16_BYTES_WITHEX(3) 200#if L1_CACHE_BYTES >= 128 201 CSUM_COPY_16_BYTES_WITHEX(4) 202 CSUM_COPY_16_BYTES_WITHEX(5) 203 CSUM_COPY_16_BYTES_WITHEX(6) 204 CSUM_COPY_16_BYTES_WITHEX(7) 205#endif 206#endif 207#endif 208 bdnz 53b 209 cmpwi r0,0 210 li r3,4 211 li r7,0 212 bne 114b 213 21463: srwi. r0,r5,2 215 mtctr r0 216 beq 64f 21730: lwzu r0,4(r4) 218 adde r12,r12,r0 21931: stwu r0,4(r6) 220 bdnz 30b 221 22264: andi. r0,r5,2 223 beq+ 65f 22440: lhz r0,4(r4) 225 addi r4,r4,2 22641: sth r0,4(r6) 227 adde r12,r12,r0 228 addi r6,r6,2 22965: andi. r0,r5,1 230 beq+ 66f 23150: lbz r0,4(r4) 23251: stb r0,4(r6) 233 slwi r0,r0,8 234 adde r12,r12,r0 23566: addze r3,r12 236 beqlr+ cr7 237 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ 238 blr 239 240fault: 241 li r3,0 242 blr 243 244 EX_TABLE(70b, fault); 245 EX_TABLE(71b, fault); 246 EX_TABLE(72b, fault); 247 EX_TABLE(73b, fault); 248 EX_TABLE(54b, fault); 249 250/* 251 * this stuff handles faults in the cacheline loop and branches to either 252 * fault (if in read part) or fault (if in write part) 253 */ 254 CSUM_COPY_16_BYTES_EXCODE(0) 255#if L1_CACHE_BYTES >= 32 256 CSUM_COPY_16_BYTES_EXCODE(1) 257#if L1_CACHE_BYTES >= 64 258 CSUM_COPY_16_BYTES_EXCODE(2) 259 CSUM_COPY_16_BYTES_EXCODE(3) 260#if L1_CACHE_BYTES >= 128 261 CSUM_COPY_16_BYTES_EXCODE(4) 262 CSUM_COPY_16_BYTES_EXCODE(5) 263 CSUM_COPY_16_BYTES_EXCODE(6) 264 CSUM_COPY_16_BYTES_EXCODE(7) 265#endif 266#endif 267#endif 268 269 EX_TABLE(30b, fault); 270 EX_TABLE(31b, fault); 271 EX_TABLE(40b, fault); 272 EX_TABLE(41b, fault); 273 EX_TABLE(50b, fault); 274 EX_TABLE(51b, fault); 275 276EXPORT_SYMBOL(csum_partial_copy_generic) 277 278/* 279 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, 280 * const struct in6_addr *daddr, 281 * __u32 len, __u8 proto, __wsum sum) 282 */ 283 284_GLOBAL(csum_ipv6_magic) 285 lwz r8, 0(r3) 286 lwz r9, 4(r3) 287 addc r0, r7, r8 288 lwz r10, 8(r3) 289 adde r0, r0, r9 290 lwz r11, 12(r3) 291 adde r0, r0, r10 292 lwz r8, 0(r4) 293 adde r0, r0, r11 294 lwz r9, 4(r4) 295 adde r0, r0, r8 296 lwz r10, 8(r4) 297 adde r0, r0, r9 298 lwz r11, 12(r4) 299 adde r0, r0, r10 300 add r5, r5, r6 /* assumption: len + proto doesn't carry */ 301 adde r0, r0, r11 302 adde r0, r0, r5 303 addze r0, r0 304 rotlwi r3, r0, 16 305 add r3, r0, r3 306 not r3, r3 307 rlwinm r3, r3, 16, 16, 31 308 blr 309EXPORT_SYMBOL(csum_ipv6_magic)