strcmp.S (2511B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 4 */ 5 6/* This is optimized primarily for the ARC700. 7 It would be possible to speed up the loops by one cycle / word 8 respective one cycle / byte by forcing double source 1 alignment, unrolling 9 by a factor of two, and speculatively loading the second word / byte of 10 source 1; however, that would increase the overhead for loop setup / finish, 11 and strcmp might often terminate early. */ 12 13#include <linux/linkage.h> 14 15ENTRY_CFI(strcmp) 16 or r2,r0,r1 17 bmsk_s r2,r2,1 18 brne r2,0,.Lcharloop 19 mov_s r12,0x01010101 20 ror r5,r12 21.Lwordloop: 22 ld.ab r2,[r0,4] 23 ld.ab r3,[r1,4] 24 nop_s 25 sub r4,r2,r12 26 bic r4,r4,r2 27 and r4,r4,r5 28 brne r4,0,.Lfound0 29 breq r2,r3,.Lwordloop 30#ifdef __LITTLE_ENDIAN__ 31 xor r0,r2,r3 ; mask for difference 32 sub_s r1,r0,1 33 bic_s r0,r0,r1 ; mask for least significant difference bit 34 sub r1,r5,r0 35 xor r0,r5,r1 ; mask for least significant difference byte 36 and_s r2,r2,r0 37 and_s r3,r3,r0 38#endif /* LITTLE ENDIAN */ 39 cmp_s r2,r3 40 mov_s r0,1 41 j_s.d [blink] 42 bset.lo r0,r0,31 43 44 .balign 4 45#ifdef __LITTLE_ENDIAN__ 46.Lfound0: 47 xor r0,r2,r3 ; mask for difference 48 or r0,r0,r4 ; or in zero indicator 49 sub_s r1,r0,1 50 bic_s r0,r0,r1 ; mask for least significant difference bit 51 sub r1,r5,r0 52 xor r0,r5,r1 ; mask for least significant difference byte 53 and_s r2,r2,r0 54 and_s r3,r3,r0 55 sub.f r0,r2,r3 56 mov.hi r0,1 57 j_s.d [blink] 58 bset.lo r0,r0,31 59#else /* BIG ENDIAN */ 60 /* The zero-detection above can mis-detect 0x01 bytes as zeroes 61 because of carry-propagateion from a lower significant zero byte. 62 We can compensate for this by checking that bit0 is zero. 63 This compensation is not necessary in the step where we 64 get a low estimate for r2, because in any affected bytes 65 we already have 0x00 or 0x01, which will remain unchanged 66 when bit 7 is cleared. */ 67 .balign 4 68.Lfound0: 69 lsr r0,r4,8 70 lsr_s r1,r2 71 bic_s r2,r2,r0 ; get low estimate for r2 and get ... 72 bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> 73 or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... 74 cmp_s r3,r2 ; ... be independent of trailing garbage 75 or_s r2,r2,r0 ; likewise for r3 > r2 76 bic_s r3,r3,r0 77 rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 78 cmp_s r2,r3 79 j_s.d [blink] 80 bset.lo r0,r0,31 81#endif /* ENDIAN */ 82 83 .balign 4 84.Lcharloop: 85 ldb.ab r2,[r0,1] 86 ldb.ab r3,[r1,1] 87 nop_s 88 breq r2,0,.Lcmpend 89 breq r2,r3,.Lcharloop 90.Lcmpend: 91 j_s.d [blink] 92 sub r0,r2,r3 93END_CFI(strcmp)