cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

strcmp.S (4345B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Copyright (c) 2012-2022, Arm Limited.
      4 *
      5 * Adapted from the original at:
      6 * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
      7 */
      8
      9#include <linux/linkage.h>
     10#include <asm/assembler.h>
     11
     12/* Assumptions:
     13 *
     14 * ARMv8-a, AArch64.
     15 * MTE compatible.
     16 */
     17
     18#define L(label) .L ## label
     19
     20#define REP8_01 0x0101010101010101
     21#define REP8_7f 0x7f7f7f7f7f7f7f7f
     22
     23#define src1		x0
     24#define src2		x1
     25#define result		x0
     26
     27#define data1		x2
     28#define data1w		w2
     29#define data2		x3
     30#define data2w		w3
     31#define has_nul		x4
     32#define diff		x5
     33#define off1		x5
     34#define syndrome	x6
     35#define tmp		x6
     36#define data3		x7
     37#define zeroones	x8
     38#define shift		x9
     39#define off2		x10
     40
     41/* On big-endian early bytes are at MSB and on little-endian LSB.
     42   LS_FW means shifting towards early bytes.  */
     43#ifdef __AARCH64EB__
     44# define LS_FW lsl
     45#else
     46# define LS_FW lsr
     47#endif
     48
     49/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
     50   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
     51   can be done in parallel across the entire word.
     52   Since carry propagation makes 0x1 bytes before a NUL byte appear
     53   NUL too in big-endian, byte-reverse the data before the NUL check.  */
     54
     55
     56SYM_FUNC_START(__pi_strcmp)
     57	sub	off2, src2, src1
     58	mov	zeroones, REP8_01
     59	and	tmp, src1, 7
     60	tst	off2, 7
     61	b.ne	L(misaligned8)
     62	cbnz	tmp, L(mutual_align)
     63
     64	.p2align 4
     65
     66L(loop_aligned):
     67	ldr	data2, [src1, off2]
     68	ldr	data1, [src1], 8
     69L(start_realigned):
     70#ifdef __AARCH64EB__
     71	rev	tmp, data1
     72	sub	has_nul, tmp, zeroones
     73	orr	tmp, tmp, REP8_7f
     74#else
     75	sub	has_nul, data1, zeroones
     76	orr	tmp, data1, REP8_7f
     77#endif
     78	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
     79	ccmp	data1, data2, 0, eq
     80	b.eq	L(loop_aligned)
     81#ifdef __AARCH64EB__
     82	rev	has_nul, has_nul
     83#endif
     84	eor	diff, data1, data2
     85	orr	syndrome, diff, has_nul
     86L(end):
     87#ifndef __AARCH64EB__
     88	rev	syndrome, syndrome
     89	rev	data1, data1
     90	rev	data2, data2
     91#endif
     92	clz	shift, syndrome
     93	/* The most-significant-non-zero bit of the syndrome marks either the
     94	   first bit that is different, or the top bit of the first zero byte.
     95	   Shifting left now will bring the critical information into the
     96	   top bits.  */
     97	lsl	data1, data1, shift
     98	lsl	data2, data2, shift
     99	/* But we need to zero-extend (char is unsigned) the value and then
    100	   perform a signed 32-bit subtraction.  */
    101	lsr	data1, data1, 56
    102	sub	result, data1, data2, lsr 56
    103	ret
    104
    105	.p2align 4
    106
    107L(mutual_align):
    108	/* Sources are mutually aligned, but are not currently at an
    109	   alignment boundary.  Round down the addresses and then mask off
    110	   the bytes that precede the start point.  */
    111	bic	src1, src1, 7
    112	ldr	data2, [src1, off2]
    113	ldr	data1, [src1], 8
    114	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
    115	mov	tmp, -1
    116	LS_FW	tmp, tmp, shift
    117	orr	data1, data1, tmp
    118	orr	data2, data2, tmp
    119	b	L(start_realigned)
    120
    121L(misaligned8):
    122	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
    123	   checking to make sure that we don't access beyond the end of SRC2.  */
    124	cbz	tmp, L(src1_aligned)
    125L(do_misaligned):
    126	ldrb	data1w, [src1], 1
    127	ldrb	data2w, [src2], 1
    128	cmp	data1w, 0
    129	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
    130	b.ne	L(done)
    131	tst	src1, 7
    132	b.ne	L(do_misaligned)
    133
    134L(src1_aligned):
    135	neg	shift, src2, lsl 3
    136	bic	src2, src2, 7
    137	ldr	data3, [src2], 8
    138#ifdef __AARCH64EB__
    139	rev	data3, data3
    140#endif
    141	lsr	tmp, zeroones, shift
    142	orr	data3, data3, tmp
    143	sub	has_nul, data3, zeroones
    144	orr	tmp, data3, REP8_7f
    145	bics	has_nul, has_nul, tmp
    146	b.ne	L(tail)
    147
    148	sub	off1, src2, src1
    149
    150	.p2align 4
    151
    152L(loop_unaligned):
    153	ldr	data3, [src1, off1]
    154	ldr	data2, [src1, off2]
    155#ifdef __AARCH64EB__
    156	rev	data3, data3
    157#endif
    158	sub	has_nul, data3, zeroones
    159	orr	tmp, data3, REP8_7f
    160	ldr	data1, [src1], 8
    161	bics	has_nul, has_nul, tmp
    162	ccmp	data1, data2, 0, eq
    163	b.eq	L(loop_unaligned)
    164
    165	lsl	tmp, has_nul, shift
    166#ifdef __AARCH64EB__
    167	rev	tmp, tmp
    168#endif
    169	eor	diff, data1, data2
    170	orr	syndrome, diff, tmp
    171	cbnz	syndrome, L(end)
    172L(tail):
    173	ldr	data1, [src1]
    174	neg	shift, shift
    175	lsr	data2, data3, shift
    176	lsr	has_nul, has_nul, shift
    177#ifdef __AARCH64EB__
    178	rev     data2, data2
    179	rev	has_nul, has_nul
    180#endif
    181	eor	diff, data1, data2
    182	orr	syndrome, diff, has_nul
    183	b	L(end)
    184
    185L(done):
    186	sub	result, data1, data2
    187	ret
    188SYM_FUNC_END(__pi_strcmp)
    189SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
    190EXPORT_SYMBOL_NOKASAN(strcmp)