cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

memset-sh4.S (1668B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * "memset" implementation for SH4
      4 *
      5 * Copyright (C) 1999  Niibe Yutaka
      6 * Copyright (c) 2009  STMicroelectronics Limited
      7 * Author: Stuart Menefy <stuart.menefy:st.com>
      8 */
      9
     10/*
     11 *            void *memset(void *s, int c, size_t n);
     12 */
     13
     14#include <linux/linkage.h>
     15
     16ENTRY(memset)
     17	mov	#12,r0
     18	add	r6,r4
     19	cmp/gt	r6,r0
     20	bt/s	40f		! if it's too small, set a byte at once
     21	 mov	r4,r0
     22	and	#3,r0
     23	cmp/eq	#0,r0
     24	bt/s	2f		! It's aligned
     25	 sub	r0,r6
     261:
     27	dt	r0
     28	bf/s	1b
     29	 mov.b	r5,@-r4
     302:				! make VVVV
     31	extu.b	r5,r5
     32	swap.b	r5,r0		!   V0
     33	or	r0,r5		!   VV
     34	swap.w	r5,r0		! VV00
     35	or	r0,r5		! VVVV
     36
     37	! Check if enough bytes need to be copied to be worth the big loop
     38	mov	#0x40, r0	! (MT)
     39	cmp/gt	r6,r0		! (MT)  64 > len => slow loop
     40
     41	bt/s	22f
     42	 mov	r6,r0
     43
     44	! align the dst to the cache block size if necessary
     45	mov	r4, r3
     46	mov	#~(0x1f), r1
     47
     48	and	r3, r1
     49	cmp/eq	r3, r1
     50
     51	bt/s	11f		! dst is already aligned
     52	 sub	r1, r3		! r3-r1 -> r3
     53	shlr2	r3		! number of loops
     54
     5510:	mov.l	r5,@-r4
     56	dt	r3
     57	bf/s	10b
     58	 add	#-4, r6
     59
     6011:	! dst is 32byte aligned
     61	mov	r6,r2
     62	mov	#-5,r0
     63	shld	r0,r2		! number of loops
     64
     65	add	#-32, r4
     66	mov	r5, r0
     6712:
     68	movca.l	r0,@r4
     69	mov.l	r5,@(4, r4)
     70	mov.l	r5,@(8, r4)
     71	mov.l	r5,@(12,r4)
     72	mov.l	r5,@(16,r4)
     73	mov.l	r5,@(20,r4)
     74	add	#-0x20, r6
     75	mov.l	r5,@(24,r4)
     76	dt	r2
     77	mov.l	r5,@(28,r4)
     78	bf/s	12b
     79	 add	#-32, r4
     80
     81	add	#32, r4
     82	mov	#8, r0
     83	cmp/ge	r0, r6
     84	bf	40f
     85
     86	mov	r6,r0
     8722:
     88	shlr2	r0
     89	shlr	r0		! r0 = r6 >> 3
     903:
     91	dt	r0
     92	mov.l	r5,@-r4		! set 8-byte at once
     93	bf/s	3b
     94	 mov.l	r5,@-r4
     95	!
     96	mov	#7,r0
     97	and	r0,r6
     98
     99	! fill bytes (length may be zero)
    10040:	tst	r6,r6
    101	bt	5f
    1024:
    103	dt	r6
    104	bf/s	4b
    105	 mov.b	r5,@-r4
    1065:
    107	rts
    108	 mov	r4,r0