cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

memset-archs.S (2793B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
      4 */
      5
      6#include <linux/linkage.h>
      7#include <asm/cache.h>
      8
      9/*
     10 * The memset implementation below is optimized to use prefetchw and prealloc
     11 * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
     12 * If you want to implement optimized memset for other possible L1 data cache
     13 * line lengths (32B and 128B) you should rewrite code carefully checking
     14 * we don't call any prefetchw/prealloc instruction for L1 cache lines which
     15 * don't belongs to memset area.
     16 */
     17
     18#if L1_CACHE_SHIFT == 6
     19
     20.macro PREALLOC_INSTR	reg, off
     21	prealloc	[\reg, \off]
     22.endm
     23
     24.macro PREFETCHW_INSTR	reg, off
     25	prefetchw	[\reg, \off]
     26.endm
     27
     28#else
     29
     30.macro PREALLOC_INSTR	reg, off
     31.endm
     32
     33.macro PREFETCHW_INSTR	reg, off
     34.endm
     35
     36#endif
     37
     38ENTRY_CFI(memset)
     39	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
     40	mov.f	0, r2
     41;;; if size is zero
     42	jz.d	[blink]
     43	mov	r3, r0		; don't clobber ret val
     44
     45;;; if length < 8
     46	brls.d.nt	r2, 8, .Lsmallchunk
     47	mov.f	lp_count,r2
     48
     49	and.f	r4, r0, 0x03
     50	rsub	lp_count, r4, 4
     51	lpnz	@.Laligndestination
     52	;; LOOP BEGIN
     53	stb.ab	r1, [r3,1]
     54	sub	r2, r2, 1
     55.Laligndestination:
     56
     57;;; Destination is aligned
     58	and	r1, r1, 0xFF
     59	asl	r4, r1, 8
     60	or	r4, r4, r1
     61	asl	r5, r4, 16
     62	or	r5, r5, r4
     63	mov	r4, r5
     64
     65	sub3	lp_count, r2, 8
     66	cmp     r2, 64
     67	bmsk.hi	r2, r2, 5
     68	mov.ls	lp_count, 0
     69	add3.hi	r2, r2, 8
     70
     71;;; Convert len to Dwords, unfold x8
     72	lsr.f	lp_count, lp_count, 6
     73
     74	lpnz	@.Lset64bytes
     75	;; LOOP START
     76	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
     77
     78#ifdef CONFIG_ARC_HAS_LL64
     79	std.ab	r4, [r3, 8]
     80	std.ab	r4, [r3, 8]
     81	std.ab	r4, [r3, 8]
     82	std.ab	r4, [r3, 8]
     83	std.ab	r4, [r3, 8]
     84	std.ab	r4, [r3, 8]
     85	std.ab	r4, [r3, 8]
     86	std.ab	r4, [r3, 8]
     87#else
     88	st.ab	r4, [r3, 4]
     89	st.ab	r4, [r3, 4]
     90	st.ab	r4, [r3, 4]
     91	st.ab	r4, [r3, 4]
     92	st.ab	r4, [r3, 4]
     93	st.ab	r4, [r3, 4]
     94	st.ab	r4, [r3, 4]
     95	st.ab	r4, [r3, 4]
     96	st.ab	r4, [r3, 4]
     97	st.ab	r4, [r3, 4]
     98	st.ab	r4, [r3, 4]
     99	st.ab	r4, [r3, 4]
    100	st.ab	r4, [r3, 4]
    101	st.ab	r4, [r3, 4]
    102	st.ab	r4, [r3, 4]
    103	st.ab	r4, [r3, 4]
    104#endif
    105.Lset64bytes:
    106
    107	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
    108	lpnz	.Lset32bytes
    109	;; LOOP START
    110#ifdef CONFIG_ARC_HAS_LL64
    111	std.ab	r4, [r3, 8]
    112	std.ab	r4, [r3, 8]
    113	std.ab	r4, [r3, 8]
    114	std.ab	r4, [r3, 8]
    115#else
    116	st.ab	r4, [r3, 4]
    117	st.ab	r4, [r3, 4]
    118	st.ab	r4, [r3, 4]
    119	st.ab	r4, [r3, 4]
    120	st.ab	r4, [r3, 4]
    121	st.ab	r4, [r3, 4]
    122	st.ab	r4, [r3, 4]
    123	st.ab	r4, [r3, 4]
    124#endif
    125.Lset32bytes:
    126
    127	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
    128.Lsmallchunk:
    129	lpnz	.Lcopy3bytes
    130	;; LOOP START
    131	stb.ab	r1, [r3, 1]
    132.Lcopy3bytes:
    133
    134	j	[blink]
    135
    136END_CFI(memset)
    137
    138ENTRY_CFI(memzero)
    139    ; adjust bzero args to memset args
    140    mov r2, r1
    141    b.d  memset    ;tail call so need to tinker with blink
    142    mov r1, 0
    143END_CFI(memzero)