cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

memcmp.S (2951B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Copyright (c) 2013-2021, Arm Limited.
      4 *
      5 * Adapted from the original at:
      6 * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
      7 */
      8
      9#include <linux/linkage.h>
     10#include <asm/assembler.h>
     11
     12/* Assumptions:
     13 *
     14 * ARMv8-a, AArch64, unaligned accesses.
     15 */
     16
     17#define L(label) .L ## label
     18
     19/* Parameters and result.  */
     20#define src1		x0
     21#define src2		x1
     22#define limit		x2
     23#define result		w0
     24
     25/* Internal variables.  */
     26#define data1		x3
     27#define data1w		w3
     28#define data1h		x4
     29#define data2		x5
     30#define data2w		w5
     31#define data2h		x6
     32#define tmp1		x7
     33#define tmp2		x8
     34
     35SYM_FUNC_START(__pi_memcmp)
     36	subs	limit, limit, 8
     37	b.lo	L(less8)
     38
     39	ldr	data1, [src1], 8
     40	ldr	data2, [src2], 8
     41	cmp	data1, data2
     42	b.ne	L(return)
     43
     44	subs	limit, limit, 8
     45	b.gt	L(more16)
     46
     47	ldr	data1, [src1, limit]
     48	ldr	data2, [src2, limit]
     49	b	L(return)
     50
     51L(more16):
     52	ldr	data1, [src1], 8
     53	ldr	data2, [src2], 8
     54	cmp	data1, data2
     55	bne	L(return)
     56
     57	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
     58	   strings.  */
     59	subs	limit, limit, 16
     60	b.ls	L(last_bytes)
     61
     62	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
     63	   try to align, so limit it only to strings larger than 128 bytes.  */
     64	cmp	limit, 96
     65	b.ls	L(loop16)
     66
     67	/* Align src1 and adjust src2 with bytes not yet done.  */
     68	and	tmp1, src1, 15
     69	add	limit, limit, tmp1
     70	sub	src1, src1, tmp1
     71	sub	src2, src2, tmp1
     72
     73	/* Loop performing 16 bytes per iteration using aligned src1.
     74	   Limit is pre-decremented by 16 and must be larger than zero.
     75	   Exit if <= 16 bytes left to do or if the data is not equal.  */
     76	.p2align 4
     77L(loop16):
     78	ldp	data1, data1h, [src1], 16
     79	ldp	data2, data2h, [src2], 16
     80	subs	limit, limit, 16
     81	ccmp	data1, data2, 0, hi
     82	ccmp	data1h, data2h, 0, eq
     83	b.eq	L(loop16)
     84
     85	cmp	data1, data2
     86	bne	L(return)
     87	mov	data1, data1h
     88	mov	data2, data2h
     89	cmp	data1, data2
     90	bne	L(return)
     91
     92	/* Compare last 1-16 bytes using unaligned access.  */
     93L(last_bytes):
     94	add	src1, src1, limit
     95	add	src2, src2, limit
     96	ldp	data1, data1h, [src1]
     97	ldp	data2, data2h, [src2]
     98	cmp	data1, data2
     99	bne	L(return)
    100	mov	data1, data1h
    101	mov	data2, data2h
    102	cmp	data1, data2
    103
    104	/* Compare data bytes and set return value to 0, -1 or 1.  */
    105L(return):
    106#ifndef __AARCH64EB__
    107	rev	data1, data1
    108	rev	data2, data2
    109#endif
    110	cmp	data1, data2
    111L(ret_eq):
    112	cset	result, ne
    113	cneg	result, result, lo
    114	ret
    115
    116	.p2align 4
    117	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
    118L(less8):
    119	adds	limit, limit, 4
    120	b.lo	L(less4)
    121	ldr	data1w, [src1], 4
    122	ldr	data2w, [src2], 4
    123	cmp	data1w, data2w
    124	b.ne	L(return)
    125	sub	limit, limit, 4
    126L(less4):
    127	adds	limit, limit, 4
    128	beq	L(ret_eq)
    129L(byte_loop):
    130	ldrb	data1w, [src1], 1
    131	ldrb	data2w, [src2], 1
    132	subs	limit, limit, 1
    133	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
    134	b.eq	L(byte_loop)
    135	sub	result, data1w, data2w
    136	ret
    137SYM_FUNC_END(__pi_memcmp)
    138SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
    139EXPORT_SYMBOL_NOKASAN(memcmp)