strnlen.S (4252B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (C) 2013 ARM Ltd. 4 * Copyright (C) 2013 Linaro. 5 * 6 * This code is based on glibc cortex strings work originally authored by Linaro 7 * be found @ 8 * 9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 * files/head:/src/aarch64/ 11 */ 12 13#include <linux/linkage.h> 14#include <asm/assembler.h> 15 16/* 17 * determine the length of a fixed-size string 18 * 19 * Parameters: 20 * x0 - const string pointer 21 * x1 - maximal string length 22 * Returns: 23 * x0 - the return length of specific string 24 */ 25 26/* Arguments and results. */ 27srcin .req x0 28len .req x0 29limit .req x1 30 31/* Locals and temporaries. */ 32src .req x2 33data1 .req x3 34data2 .req x4 35data2a .req x5 36has_nul1 .req x6 37has_nul2 .req x7 38tmp1 .req x8 39tmp2 .req x9 40tmp3 .req x10 41tmp4 .req x11 42zeroones .req x12 43pos .req x13 44limit_wd .req x14 45 46#define REP8_01 0x0101010101010101 47#define REP8_7f 0x7f7f7f7f7f7f7f7f 48#define REP8_80 0x8080808080808080 49 50SYM_FUNC_START(__pi_strnlen) 51 cbz limit, .Lhit_limit 52 mov zeroones, #REP8_01 53 bic src, srcin, #15 54 ands tmp1, srcin, #15 55 b.ne .Lmisaligned 56 /* Calculate the number of full and partial words -1. */ 57 sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */ 58 lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */ 59 60 /* 61 * NUL detection works on the principle that (X - 1) & (~X) & 0x80 62 * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 63 * can be done in parallel across the entire word. 64 */ 65 /* 66 * The inner loop deals with two Dwords at a time. This has a 67 * slightly higher start-up cost, but we should win quite quickly, 68 * especially on cores with a high number of issue slots per 69 * cycle, as we get much better parallelism out of the operations. 70 */ 71.Lloop: 72 ldp data1, data2, [src], #16 73.Lrealigned: 74 sub tmp1, data1, zeroones 75 orr tmp2, data1, #REP8_7f 76 sub tmp3, data2, zeroones 77 orr tmp4, data2, #REP8_7f 78 bic has_nul1, tmp1, tmp2 79 bic has_nul2, tmp3, tmp4 80 subs limit_wd, limit_wd, #1 81 orr tmp1, has_nul1, has_nul2 82 ccmp tmp1, #0, #0, pl /* NZCV = 0000 */ 83 b.eq .Lloop 84 85 cbz tmp1, .Lhit_limit /* No null in final Qword. */ 86 87 /* 88 * We know there's a null in the final Qword. The easiest thing 89 * to do now is work out the length of the string and return 90 * MIN (len, limit). 91 */ 92 sub len, src, srcin 93 cbz has_nul1, .Lnul_in_data2 94CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/ 95 96 sub len, len, #8 97 mov has_nul2, has_nul1 98.Lnul_in_data2: 99 /* 100 * For big-endian, carry propagation (if the final byte in the 101 * string is 0x01) means we cannot use has_nul directly. The 102 * easiest way to get the correct byte is to byte-swap the data 103 * and calculate the syndrome a second time. 104 */ 105CPU_BE( rev data2, data2 ) 106CPU_BE( sub tmp1, data2, zeroones ) 107CPU_BE( orr tmp2, data2, #REP8_7f ) 108CPU_BE( bic has_nul2, tmp1, tmp2 ) 109 110 sub len, len, #8 111 rev has_nul2, has_nul2 112 clz pos, has_nul2 113 add len, len, pos, lsr #3 /* Bits to bytes. */ 114 cmp len, limit 115 csel len, len, limit, ls /* Return the lower value. */ 116 ret 117 118.Lmisaligned: 119 /* 120 * Deal with a partial first word. 121 * We're doing two things in parallel here; 122 * 1) Calculate the number of words (but avoiding overflow if 123 * limit is near ULONG_MAX) - to do this we need to work out 124 * limit + tmp1 - 1 as a 65-bit value before shifting it; 125 * 2) Load and mask the initial data words - we force the bytes 126 * before the ones we are interested in to 0xff - this ensures 127 * early bytes will not hit any zero detection. 128 */ 129 ldp data1, data2, [src], #16 130 131 sub limit_wd, limit, #1 132 and tmp3, limit_wd, #15 133 lsr limit_wd, limit_wd, #4 134 135 add tmp3, tmp3, tmp1 136 add limit_wd, limit_wd, tmp3, lsr #4 137 138 neg tmp4, tmp1 139 lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */ 140 141 mov tmp2, #~0 142 /* Big-endian. Early bytes are at MSB. */ 143CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ 144 /* Little-endian. Early bytes are at LSB. */ 145CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ 146 147 cmp tmp1, #8 148 149 orr data1, data1, tmp2 150 orr data2a, data2, tmp2 151 152 csinv data1, data1, xzr, le 153 csel data2, data2, data2a, le 154 b .Lrealigned 155 156.Lhit_limit: 157 mov len, limit 158 ret 159SYM_FUNC_END(__pi_strnlen) 160 161SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen) 162EXPORT_SYMBOL_NOKASAN(strnlen)