cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

NG4memset.S (2332B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/* NG4memset.S: Niagara-4 optimized memset/bzero.
      3 *
      4 * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
      5 */
      6
      7#include <asm/asi.h>
      8
      9	.register	%g2, #scratch
     10	.register	%g3, #scratch
     11
     12	.text
     13	.align		32
     14	.globl		NG4memset
     15NG4memset:
     16	andcc		%o1, 0xff, %o4
     17	be,pt		%icc, 1f
     18	 mov		%o2, %o1
     19	sllx		%o4, 8, %g1
     20	or		%g1, %o4, %o2
     21	sllx		%o2, 16, %g1
     22	or		%g1, %o2, %o2
     23	sllx		%o2, 32, %g1
     24	ba,pt		%icc, 1f
     25	 or		%g1, %o2, %o4
     26	.size		NG4memset,.-NG4memset
     27
     28	.align		32
     29	.globl		NG4bzero
     30NG4bzero:
     31	clr		%o4
     321:	cmp		%o1, 16
     33	ble		%icc, .Ltiny
     34	 mov		%o0, %o3
     35	sub		%g0, %o0, %g1
     36	and		%g1, 0x7, %g1
     37	brz,pt		%g1, .Laligned8
     38	 sub		%o1, %g1, %o1
     391:	stb		%o4, [%o0 + 0x00]
     40	subcc		%g1, 1, %g1
     41	bne,pt		%icc, 1b
     42	 add		%o0, 1, %o0
     43.Laligned8:
     44	cmp		%o1, 64 + (64 - 8)
     45	ble		.Lmedium
     46	 sub		%g0, %o0, %g1
     47	andcc		%g1, (64 - 1), %g1
     48	brz,pn		%g1, .Laligned64
     49	 sub		%o1, %g1, %o1
     501:	stx		%o4, [%o0 + 0x00]
     51	subcc		%g1, 8, %g1
     52	bne,pt		%icc, 1b
     53	 add		%o0, 0x8, %o0
     54.Laligned64:
     55	andn		%o1, 64 - 1, %g1
     56	sub		%o1, %g1, %o1
     57	brnz,pn		%o4, .Lnon_bzero_loop
     58	 mov		0x20, %g2
     591:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
     60	subcc		%g1, 0x40, %g1
     61	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
     62	bne,pt		%icc, 1b
     63	 add		%o0, 0x40, %o0
     64.Lpostloop:
     65	cmp		%o1, 8
     66	bl,pn		%icc, .Ltiny
     67	 membar		#StoreStore|#StoreLoad
     68.Lmedium:
     69	andn		%o1, 0x7, %g1
     70	sub		%o1, %g1, %o1
     711:	stx		%o4, [%o0 + 0x00]
     72	subcc		%g1, 0x8, %g1
     73	bne,pt		%icc, 1b
     74	 add		%o0, 0x08, %o0
     75	andcc		%o1, 0x4, %g1
     76	be,pt		%icc, .Ltiny
     77	 sub		%o1, %g1, %o1
     78	stw		%o4, [%o0 + 0x00]
     79	add		%o0, 0x4, %o0
     80.Ltiny:
     81	cmp		%o1, 0
     82	be,pn		%icc, .Lexit
     831:	 subcc		%o1, 1, %o1
     84	stb		%o4, [%o0 + 0x00]
     85	bne,pt		%icc, 1b
     86	 add		%o0, 1, %o0
     87.Lexit:
     88	retl
     89	 mov		%o3, %o0
     90.Lnon_bzero_loop:
     91	mov		0x08, %g3
     92	mov		0x28, %o5
     931:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
     94	subcc		%g1, 0x40, %g1
     95	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
     96	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
     97	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
     98	add		%o0, 0x10, %o0
     99	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
    100	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
    101	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
    102	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
    103	bne,pt		%icc, 1b
    104	 add		%o0, 0x30, %o0
    105	ba,a,pt		%icc, .Lpostloop
    106	 nop
    107	.size		NG4bzero,.-NG4bzero