cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

memset.S (2118B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * OpenRISC memset.S
      4 *
      5 * Hand-optimized assembler version of memset for OpenRISC.
      6 * Algorithm inspired by several other arch-specific memset routines
      7 * in the kernel tree
      8 *
      9 * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
     10 */
     11
     12	.global memset
     13	.type	memset, @function
     14memset:
     15	/* arguments:
     16	 * r3 = *s
     17	 * r4 = c
     18	 * r5 = n
     19	 * r13, r15, r17, r19 used as temp regs
     20	*/
     21
     22	/* Exit if n == 0 */
     23	l.sfeqi		r5, 0
     24	l.bf		4f
     25
     26	/* Truncate c to char */
     27	l.andi  	r13, r4, 0xff
     28
     29	/* Skip word extension if c is 0 */
     30	l.sfeqi		r13, 0
     31	l.bf		1f
     32	/* Check for at least two whole words (8 bytes) */
     33	 l.sfleui	r5, 7
     34
     35	/* Extend char c to 32-bit word cccc in r13 */
     36	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
     37	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
     38	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
     39	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
     40
     411:	l.addi		r19, r3, 0 // Set r19 = src
     42	/* Jump to byte copy loop if less than two words */
     43	l.bf		3f
     44	 l.or		r17, r5, r0 // Set r17 = n
     45
     46	/* Mask out two LSBs to check alignment */
     47	l.andi		r15, r3, 0x3
     48
     49	/* lsb == 00, jump to word copy loop */
     50	l.sfeqi		r15, 0
     51	l.bf		2f
     52	 l.addi		r19, r3, 0 // Set r19 = src
     53
     54	/* lsb == 01,10 or 11 */
     55	l.sb		0(r3), r13   // *src = c
     56	l.addi		r17, r17, -1 // Decrease n
     57
     58	l.sfeqi		r15, 3
     59	l.bf		2f
     60	 l.addi		r19, r3, 1  // src += 1
     61
     62	/* lsb == 01 or 10 */
     63	l.sb		1(r3), r13   // *(src+1) = c
     64	l.addi		r17, r17, -1 // Decrease n
     65
     66	l.sfeqi		r15, 2
     67	l.bf		2f
     68	 l.addi		r19, r3, 2  // src += 2
     69
     70	/* lsb == 01 */
     71	l.sb		2(r3), r13   // *(src+2) = c
     72	l.addi		r17, r17, -1 // Decrease n
     73	l.addi		r19, r3, 3   // src += 3
     74
     75	/* Word copy loop */
     762:	l.sw		0(r19), r13  // *src = cccc
     77	l.addi		r17, r17, -4 // Decrease n
     78	l.sfgeui	r17, 4
     79	l.bf		2b
     80	 l.addi		r19, r19, 4  // Increase src
     81
     82	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
     83	l.sfeqi		r17, 0
     84	l.bf		4f
     85
     86	/* Byte copy loop */
     873:	l.addi		r17, r17, -1 // Decrease n
     88	l.sb		0(r19), r13  // *src = cccc
     89	l.sfnei		r17, 0
     90	l.bf		3b
     91	 l.addi		r19, r19, 1  // Increase src
     92
     934:	l.jr		r9
     94	 l.ori		r11, r3, 0