csumpartialcopygeneric.S (6834B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm/lib/csumpartialcopygeneric.S 4 * 5 * Copyright (C) 1995-2001 Russell King 6 */ 7#include <asm/assembler.h> 8 9/* 10 * unsigned int 11 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 12 * r0 = src, r1 = dst, r2 = len, r3 = sum 13 * Returns : r0 = checksum 14 * 15 * Note that 'tst' and 'teq' preserve the carry flag. 16 */ 17 18src .req r0 19dst .req r1 20len .req r2 21sum .req r3 22 23.Lzero: mov r0, sum 24 load_regs 25 26 /* 27 * Align an unaligned destination pointer. We know that 28 * we have >= 8 bytes here, so we don't need to check 29 * the length. Note that the source pointer hasn't been 30 * aligned yet. 31 */ 32.Ldst_unaligned: 33 tst dst, #1 34 beq .Ldst_16bit 35 36 load1b ip 37 sub len, len, #1 38 adcs sum, sum, ip, put_byte_1 @ update checksum 39 strb ip, [dst], #1 40 tst dst, #2 41 reteq lr @ dst is now 32bit aligned 42 43.Ldst_16bit: load2b r8, ip 44 sub len, len, #2 45 adcs sum, sum, r8, put_byte_0 46 strb r8, [dst], #1 47 adcs sum, sum, ip, put_byte_1 48 strb ip, [dst], #1 49 ret lr @ dst is now 32bit aligned 50 51 /* 52 * Handle 0 to 7 bytes, with any alignment of source and 53 * destination pointers. Note that when we get here, C = 0 54 */ 55.Lless8: teq len, #0 @ check for zero count 56 beq .Lzero 57 58 /* we must have at least one byte. */ 59 tst dst, #1 @ dst 16-bit aligned 60 beq .Lless8_aligned 61 62 /* Align dst */ 63 load1b ip 64 sub len, len, #1 65 adcs sum, sum, ip, put_byte_1 @ update checksum 66 strb ip, [dst], #1 67 tst len, #6 68 beq .Lless8_byteonly 69 701: load2b r8, ip 71 sub len, len, #2 72 adcs sum, sum, r8, put_byte_0 73 strb r8, [dst], #1 74 adcs sum, sum, ip, put_byte_1 75 strb ip, [dst], #1 76.Lless8_aligned: 77 tst len, #6 78 bne 1b 79.Lless8_byteonly: 80 tst len, #1 81 beq .Ldone 82 load1b r8 83 adcs sum, sum, r8, put_byte_0 @ update checksum 84 strb r8, [dst], #1 85 b .Ldone 86 87FN_ENTRY 88 save_regs 89 mov sum, #-1 90 91 cmp len, #8 @ Ensure that we have at least 92 blo .Lless8 @ 8 bytes to copy. 93 94 adds sum, sum, #0 @ C = 0 95 tst dst, #3 @ Test destination alignment 96 blne .Ldst_unaligned @ align destination, return here 97 98 /* 99 * Ok, the dst pointer is now 32bit aligned, and we know 100 * that we must have more than 4 bytes to copy. Note 101 * that C contains the carry from the dst alignment above. 102 */ 103 104 tst src, #3 @ Test source alignment 105 bne .Lsrc_not_aligned 106 107 /* Routine for src & dst aligned */ 108 109 bics ip, len, #15 110 beq 2f 111 1121: load4l r4, r5, r6, r7 113 stmia dst!, {r4, r5, r6, r7} 114 adcs sum, sum, r4 115 adcs sum, sum, r5 116 adcs sum, sum, r6 117 adcs sum, sum, r7 118 sub ip, ip, #16 119 teq ip, #0 120 bne 1b 121 1222: ands ip, len, #12 123 beq 4f 124 tst ip, #8 125 beq 3f 126 load2l r4, r5 127 stmia dst!, {r4, r5} 128 adcs sum, sum, r4 129 adcs sum, sum, r5 130 tst ip, #4 131 beq 4f 132 1333: load1l r4 134 str r4, [dst], #4 135 adcs sum, sum, r4 136 1374: ands len, len, #3 138 beq .Ldone 139 load1l r4 140 tst len, #2 141 mov r5, r4, get_byte_0 142 beq .Lexit 143 adcs sum, sum, r4, lspush #16 144 strb r5, [dst], #1 145 mov r5, r4, get_byte_1 146 strb r5, [dst], #1 147 mov r5, r4, get_byte_2 148.Lexit: tst len, #1 149 strbne r5, [dst], #1 150 andne r5, r5, #255 151 adcsne sum, sum, r5, put_byte_0 152 153 /* 154 * If the dst pointer was not 16-bit aligned, we 155 * need to rotate the checksum here to get around 156 * the inefficient byte manipulations in the 157 * architecture independent code. 158 */ 159.Ldone: adc r0, sum, #0 160 ldr sum, [sp, #0] @ dst 161 tst sum, #1 162 movne r0, r0, ror #8 163 load_regs 164 165.Lsrc_not_aligned: 166 adc sum, sum, #0 @ include C from dst alignment 167 and ip, src, #3 168 bic src, src, #3 169 load1l r5 170 cmp ip, #2 171 beq .Lsrc2_aligned 172 bhi .Lsrc3_aligned 173 mov r4, r5, lspull #8 @ C = 0 174 bics ip, len, #15 175 beq 2f 1761: load4l r5, r6, r7, r8 177 orr r4, r4, r5, lspush #24 178 mov r5, r5, lspull #8 179 orr r5, r5, r6, lspush #24 180 mov r6, r6, lspull #8 181 orr r6, r6, r7, lspush #24 182 mov r7, r7, lspull #8 183 orr r7, r7, r8, lspush #24 184 stmia dst!, {r4, r5, r6, r7} 185 adcs sum, sum, r4 186 adcs sum, sum, r5 187 adcs sum, sum, r6 188 adcs sum, sum, r7 189 mov r4, r8, lspull #8 190 sub ip, ip, #16 191 teq ip, #0 192 bne 1b 1932: ands ip, len, #12 194 beq 4f 195 tst ip, #8 196 beq 3f 197 load2l r5, r6 198 orr r4, r4, r5, lspush #24 199 mov r5, r5, lspull #8 200 orr r5, r5, r6, lspush #24 201 stmia dst!, {r4, r5} 202 adcs sum, sum, r4 203 adcs sum, sum, r5 204 mov r4, r6, lspull #8 205 tst ip, #4 206 beq 4f 2073: load1l r5 208 orr r4, r4, r5, lspush #24 209 str r4, [dst], #4 210 adcs sum, sum, r4 211 mov r4, r5, lspull #8 2124: ands len, len, #3 213 beq .Ldone 214 mov r5, r4, get_byte_0 215 tst len, #2 216 beq .Lexit 217 adcs sum, sum, r4, lspush #16 218 strb r5, [dst], #1 219 mov r5, r4, get_byte_1 220 strb r5, [dst], #1 221 mov r5, r4, get_byte_2 222 b .Lexit 223 224.Lsrc2_aligned: mov r4, r5, lspull #16 225 adds sum, sum, #0 226 bics ip, len, #15 227 beq 2f 2281: load4l r5, r6, r7, r8 229 orr r4, r4, r5, lspush #16 230 mov r5, r5, lspull #16 231 orr r5, r5, r6, lspush #16 232 mov r6, r6, lspull #16 233 orr r6, r6, r7, lspush #16 234 mov r7, r7, lspull #16 235 orr r7, r7, r8, lspush #16 236 stmia dst!, {r4, r5, r6, r7} 237 adcs sum, sum, r4 238 adcs sum, sum, r5 239 adcs sum, sum, r6 240 adcs sum, sum, r7 241 mov r4, r8, lspull #16 242 sub ip, ip, #16 243 teq ip, #0 244 bne 1b 2452: ands ip, len, #12 246 beq 4f 247 tst ip, #8 248 beq 3f 249 load2l r5, r6 250 orr r4, r4, r5, lspush #16 251 mov r5, r5, lspull #16 252 orr r5, r5, r6, lspush #16 253 stmia dst!, {r4, r5} 254 adcs sum, sum, r4 255 adcs sum, sum, r5 256 mov r4, r6, lspull #16 257 tst ip, #4 258 beq 4f 2593: load1l r5 260 orr r4, r4, r5, lspush #16 261 str r4, [dst], #4 262 adcs sum, sum, r4 263 mov r4, r5, lspull #16 2644: ands len, len, #3 265 beq .Ldone 266 mov r5, r4, get_byte_0 267 tst len, #2 268 beq .Lexit 269 adcs sum, sum, r4 270 strb r5, [dst], #1 271 mov r5, r4, get_byte_1 272 strb r5, [dst], #1 273 tst len, #1 274 beq .Ldone 275 load1b r5 276 b .Lexit 277 278.Lsrc3_aligned: mov r4, r5, lspull #24 279 adds sum, sum, #0 280 bics ip, len, #15 281 beq 2f 2821: load4l r5, r6, r7, r8 283 orr r4, r4, r5, lspush #8 284 mov r5, r5, lspull #24 285 orr r5, r5, r6, lspush #8 286 mov r6, r6, lspull #24 287 orr r6, r6, r7, lspush #8 288 mov r7, r7, lspull #24 289 orr r7, r7, r8, lspush #8 290 stmia dst!, {r4, r5, r6, r7} 291 adcs sum, sum, r4 292 adcs sum, sum, r5 293 adcs sum, sum, r6 294 adcs sum, sum, r7 295 mov r4, r8, lspull #24 296 sub ip, ip, #16 297 teq ip, #0 298 bne 1b 2992: ands ip, len, #12 300 beq 4f 301 tst ip, #8 302 beq 3f 303 load2l r5, r6 304 orr r4, r4, r5, lspush #8 305 mov r5, r5, lspull #24 306 orr r5, r5, r6, lspush #8 307 stmia dst!, {r4, r5} 308 adcs sum, sum, r4 309 adcs sum, sum, r5 310 mov r4, r6, lspull #24 311 tst ip, #4 312 beq 4f 3133: load1l r5 314 orr r4, r4, r5, lspush #8 315 str r4, [dst], #4 316 adcs sum, sum, r4 317 mov r4, r5, lspull #24 3184: ands len, len, #3 319 beq .Ldone 320 mov r5, r4, get_byte_0 321 tst len, #2 322 beq .Lexit 323 strb r5, [dst], #1 324 adcs sum, sum, r4 325 load1l r4 326 mov r5, r4, get_byte_0 327 strb r5, [dst], #1 328 adcs sum, sum, r4, lspush #24 329 mov r5, r4, get_byte_1 330 b .Lexit 331FN_EXIT