cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

strlen_32.S (2691B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * strlen() for PPC32
      4 *
      5 * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
      6 *
      7 * Inspired from glibc implementation
      8 */
      9#include <asm/ppc_asm.h>
     10#include <asm/export.h>
     11#include <asm/cache.h>
     12
     13	.text
     14
     15/*
     16 * Algorithm:
     17 *
     18 * 1) Given a word 'x', we can test to see if it contains any 0 bytes
     19 *    by subtracting 0x01010101, and seeing if any of the high bits of each
     20 *    byte changed from 0 to 1. This works because the least significant
     21 *    0 byte must have had no incoming carry (otherwise it's not the least
     22 *    significant), so it is 0x00 - 0x01 == 0xff. For all other
     23 *    byte values, either they have the high bit set initially, or when
     24 *    1 is subtracted you get a value in the range 0x00-0x7f, none of which
     25 *    have their high bit set. The expression here is
     26 *    (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
     27 *    there were no 0x00 bytes in the word.  You get 0x80 in bytes that
     28 *    match, but possibly false 0x80 matches in the next more significant
     29 *    byte to a true match due to carries.  For little-endian this is
     30 *    of no consequence since the least significant match is the one
     31 *    we're interested in, but big-endian needs method 2 to find which
     32 *    byte matches.
     33 * 2) Given a word 'x', we can test to see _which_ byte was zero by
     34 *    calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
     35 *    This produces 0x80 in each byte that was zero, and 0x00 in all
     36 *    the other bytes. The '| ~0x80808080' clears the low 7 bits in each
     37 *    byte, and the '| x' part ensures that bytes with the high bit set
     38 *    produce 0x00. The addition will carry into the high bit of each byte
     39 *    iff that byte had one of its low 7 bits set. We can then just see
     40 *    which was the most significant bit set and divide by 8 to find how
     41 *    many to add to the index.
     42 *    This is from the book 'The PowerPC Compiler Writer's Guide',
     43 *    by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
     44 */
     45
     46_GLOBAL(strlen)
     47	andi.   r0, r3, 3
     48	lis	r7, 0x0101
     49	addi	r10, r3, -4
     50	addic	r7, r7, 0x0101	/* r7 = 0x01010101 (lomagic) & clear XER[CA] */
     51	rotlwi	r6, r7, 31 	/* r6 = 0x80808080 (himagic) */
     52	bne-	3f
     53	.balign IFETCH_ALIGN_BYTES
     541:	lwzu	r9, 4(r10)
     552:	subf	r8, r7, r9
     56	and.	r8, r8, r6
     57	beq+	1b
     58	andc.	r8, r8, r9
     59	beq+	1b
     60	andc	r8, r9, r6
     61	orc	r9, r9, r6
     62	subfe	r8, r6, r8
     63	nor	r8, r8, r9
     64	cntlzw	r8, r8
     65	subf	r3, r3, r10
     66	srwi	r8, r8, 3
     67	add	r3, r3, r8
     68	blr
     69
     70	/* Missaligned string: make sure bytes before string are seen not 0 */
     713:	xor	r10, r10, r0
     72	orc	r8, r8, r8
     73	lwzu	r9, 4(r10)
     74	slwi	r0, r0, 3
     75	srw	r8, r8, r0
     76	orc	r9, r9, r8
     77	b	2b
     78EXPORT_SYMBOL(strlen)