strlen_32.S (2691B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * strlen() for PPC32 4 * 5 * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information. 6 * 7 * Inspired from glibc implementation 8 */ 9#include <asm/ppc_asm.h> 10#include <asm/export.h> 11#include <asm/cache.h> 12 13 .text 14 15/* 16 * Algorithm: 17 * 18 * 1) Given a word 'x', we can test to see if it contains any 0 bytes 19 * by subtracting 0x01010101, and seeing if any of the high bits of each 20 * byte changed from 0 to 1. This works because the least significant 21 * 0 byte must have had no incoming carry (otherwise it's not the least 22 * significant), so it is 0x00 - 0x01 == 0xff. For all other 23 * byte values, either they have the high bit set initially, or when 24 * 1 is subtracted you get a value in the range 0x00-0x7f, none of which 25 * have their high bit set. The expression here is 26 * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when 27 * there were no 0x00 bytes in the word. You get 0x80 in bytes that 28 * match, but possibly false 0x80 matches in the next more significant 29 * byte to a true match due to carries. For little-endian this is 30 * of no consequence since the least significant match is the one 31 * we're interested in, but big-endian needs method 2 to find which 32 * byte matches. 33 * 2) Given a word 'x', we can test to see _which_ byte was zero by 34 * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080). 35 * This produces 0x80 in each byte that was zero, and 0x00 in all 36 * the other bytes. The '| ~0x80808080' clears the low 7 bits in each 37 * byte, and the '| x' part ensures that bytes with the high bit set 38 * produce 0x00. The addition will carry into the high bit of each byte 39 * iff that byte had one of its low 7 bits set. We can then just see 40 * which was the most significant bit set and divide by 8 to find how 41 * many to add to the index. 42 * This is from the book 'The PowerPC Compiler Writer's Guide', 43 * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. 44 */ 45 46_GLOBAL(strlen) 47 andi. r0, r3, 3 48 lis r7, 0x0101 49 addi r10, r3, -4 50 addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */ 51 rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */ 52 bne- 3f 53 .balign IFETCH_ALIGN_BYTES 541: lwzu r9, 4(r10) 552: subf r8, r7, r9 56 and. r8, r8, r6 57 beq+ 1b 58 andc. r8, r8, r9 59 beq+ 1b 60 andc r8, r9, r6 61 orc r9, r9, r6 62 subfe r8, r6, r8 63 nor r8, r8, r9 64 cntlzw r8, r8 65 subf r3, r3, r10 66 srwi r8, r8, 3 67 add r3, r3, r8 68 blr 69 70 /* Missaligned string: make sure bytes before string are seen not 0 */ 713: xor r10, r10, r0 72 orc r8, r8, r8 73 lwzu r9, 4(r10) 74 slwi r0, r0, 3 75 srw r8, r8, r0 76 orc r9, r9, r8 77 b 2b 78EXPORT_SYMBOL(strlen)