cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

memset.S (4659B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Copyright (c) 2011, The Linux Foundation. All rights reserved.
      4 */
      5
      6
      7/* HEXAGON assembly optimized memset */
      8/* Replaces the standard library function memset */
      9
     10
     11        .macro HEXAGON_OPT_FUNC_BEGIN name
     12	.text
     13	.p2align 4
     14	.globl \name
     15	.type  \name, @function
     16\name:
     17	.endm
     18
     19	.macro HEXAGON_OPT_FUNC_FINISH name
     20	.size  \name, . - \name
     21	.endm
     22
     23/* FUNCTION: memset (v2 version) */
     24#if __HEXAGON_ARCH__ < 3
     25HEXAGON_OPT_FUNC_BEGIN memset
     26	{
     27		r6 = #8
     28		r7 = extractu(r0, #3 , #0)
     29		p0 = cmp.eq(r2, #0)
     30		p1 = cmp.gtu(r2, #7)
     31	}
     32	{
     33		r4 = vsplatb(r1)
     34		r8 = r0           /* leave r0 intact for return val  */
     35		r9 = sub(r6, r7)  /* bytes until double alignment  */
     36		if p0 jumpr r31   /* count == 0, so return  */
     37	}
     38	{
     39		r3 = #0
     40		r7 = #0
     41		p0 = tstbit(r9, #0)
     42		if p1 jump 2f /* skip byte loop */
     43	}
     44
     45/* less than 8 bytes to set, so just set a byte at a time and return  */
     46
     47		loop0(1f, r2) /* byte loop */
     48	.falign
     491: /* byte loop */
     50	{
     51		memb(r8++#1) = r4
     52	}:endloop0
     53		jumpr r31
     54	.falign
     552: /* skip byte loop */
     56	{
     57		r6 = #1
     58		p0 = tstbit(r9, #1)
     59		p1 = cmp.eq(r2, #1)
     60		if !p0 jump 3f /* skip initial byte store */
     61	}
     62	{
     63		memb(r8++#1) = r4
     64		r3:2 = sub(r3:2, r7:6)
     65		if p1 jumpr r31
     66	}
     67	.falign
     683: /* skip initial byte store */
     69	{
     70		r6 = #2
     71		p0 = tstbit(r9, #2)
     72		p1 = cmp.eq(r2, #2)
     73		if !p0 jump 4f /* skip initial half store */
     74	}
     75	{
     76		memh(r8++#2) = r4
     77		r3:2 = sub(r3:2, r7:6)
     78		if p1 jumpr r31
     79	}
     80	.falign
     814: /* skip initial half store */
     82	{
     83		r6 = #4
     84		p0 = cmp.gtu(r2, #7)
     85		p1 = cmp.eq(r2, #4)
     86		if !p0 jump 5f /* skip initial word store */
     87	}
     88	{
     89		memw(r8++#4) = r4
     90		r3:2 = sub(r3:2, r7:6)
     91		p0 = cmp.gtu(r2, #11)
     92		if p1 jumpr r31
     93	}
     94	.falign
     955: /* skip initial word store */
     96	{
     97		r10 = lsr(r2, #3)
     98		p1 = cmp.eq(r3, #1)
     99		if !p0 jump 7f /* skip double loop */
    100	}
    101	{
    102		r5 = r4
    103		r6 = #8
    104		loop0(6f, r10) /* double loop */
    105	}
    106
    107/* set bytes a double word at a time  */
    108
    109	.falign
    1106: /* double loop */
    111	{
    112		memd(r8++#8) = r5:4
    113		r3:2 = sub(r3:2, r7:6)
    114		p1 = cmp.eq(r2, #8)
    115	}:endloop0
    116	.falign
    1177: /* skip double loop */
    118	{
    119		p0 = tstbit(r2, #2)
    120		if p1 jumpr r31
    121	}
    122	{
    123		r6 = #4
    124		p0 = tstbit(r2, #1)
    125		p1 = cmp.eq(r2, #4)
    126		if !p0 jump 8f /* skip final word store */
    127	}
    128	{
    129		memw(r8++#4) = r4
    130		r3:2 = sub(r3:2, r7:6)
    131		if p1 jumpr r31
    132	}
    133	.falign
    1348: /* skip final word store */
    135	{
    136		p1 = cmp.eq(r2, #2)
    137		if !p0 jump 9f /* skip final half store */
    138	}
    139	{
    140		memh(r8++#2) = r4
    141		if p1 jumpr r31
    142	}
    143	.falign
    1449: /* skip final half store */
    145	{
    146		memb(r8++#1) = r4
    147		jumpr r31
    148	}
    149HEXAGON_OPT_FUNC_FINISH memset
    150#endif
    151
    152
    153/*  FUNCTION: memset (v3 and higher version)  */
    154#if __HEXAGON_ARCH__ >= 3
    155HEXAGON_OPT_FUNC_BEGIN memset
    156	{
    157		r7=vsplatb(r1)
    158		r6 = r0
    159		if (r2==#0) jump:nt .L1
    160	}
    161	{
    162		r5:4=combine(r7,r7)
    163		p0 = cmp.gtu(r2,#8)
    164		if (p0.new) jump:nt .L3
    165	}
    166	{
    167		r3 = r0
    168		loop0(.L47,r2)
    169	}
    170	.falign
    171.L47:
    172	{
    173		memb(r3++#1) = r1
    174	}:endloop0 /* start=.L47 */
    175		jumpr r31
    176.L3:
    177	{
    178		p0 = tstbit(r0,#0)
    179		if (!p0.new) jump:nt .L8
    180		p1 = cmp.eq(r2, #1)
    181	}
    182	{
    183		r6 = add(r0, #1)
    184		r2 = add(r2,#-1)
    185		memb(r0) = r1
    186		if (p1) jump .L1
    187	}
    188.L8:
    189	{
    190		p0 = tstbit(r6,#1)
    191		if (!p0.new) jump:nt .L10
    192	}
    193	{
    194		r2 = add(r2,#-2)
    195		memh(r6++#2) = r7
    196		p0 = cmp.eq(r2, #2)
    197		if (p0.new) jump:nt .L1
    198	}
    199.L10:
    200	{
    201		p0 = tstbit(r6,#2)
    202		if (!p0.new) jump:nt .L12
    203	}
    204	{
    205		r2 = add(r2,#-4)
    206		memw(r6++#4) = r7
    207		p0 = cmp.eq(r2, #4)
    208		if (p0.new) jump:nt .L1
    209	}
    210.L12:
    211	{
    212		p0 = cmp.gtu(r2,#127)
    213		if (!p0.new) jump:nt .L14
    214	}
    215		r3 = and(r6,#31)
    216		if (r3==#0) jump:nt .L17
    217	{
    218		memd(r6++#8) = r5:4
    219		r2 = add(r2,#-8)
    220	}
    221		r3 = and(r6,#31)
    222		if (r3==#0) jump:nt .L17
    223	{
    224		memd(r6++#8) = r5:4
    225		r2 = add(r2,#-8)
    226	}
    227		r3 = and(r6,#31)
    228		if (r3==#0) jump:nt .L17
    229	{
    230		memd(r6++#8) = r5:4
    231		r2 = add(r2,#-8)
    232	}
    233.L17:
    234	{
    235		r3 = lsr(r2,#5)
    236		if (r1!=#0) jump:nt .L18
    237	}
    238	{
    239		r8 = r3
    240		r3 = r6
    241		loop0(.L46,r3)
    242	}
    243	.falign
    244.L46:
    245	{
    246		dczeroa(r6)
    247		r6 = add(r6,#32)
    248		r2 = add(r2,#-32)
    249	}:endloop0 /* start=.L46 */
    250.L14:
    251	{
    252		p0 = cmp.gtu(r2,#7)
    253		if (!p0.new) jump:nt .L28
    254		r8 = lsr(r2,#3)
    255	}
    256		loop0(.L44,r8)
    257	.falign
    258.L44:
    259	{
    260		memd(r6++#8) = r5:4
    261		r2 = add(r2,#-8)
    262	}:endloop0 /* start=.L44 */
    263.L28:
    264	{
    265		p0 = tstbit(r2,#2)
    266		if (!p0.new) jump:nt .L33
    267	}
    268	{
    269		r2 = add(r2,#-4)
    270		memw(r6++#4) = r7
    271	}
    272.L33:
    273	{
    274		p0 = tstbit(r2,#1)
    275		if (!p0.new) jump:nt .L35
    276	}
    277	{
    278		r2 = add(r2,#-2)
    279		memh(r6++#2) = r7
    280	}
    281.L35:
    282		p0 = cmp.eq(r2,#1)
    283		if (p0) memb(r6) = r1
    284.L1:
    285		jumpr r31
    286.L18:
    287		loop0(.L45,r3)
    288	.falign
    289.L45:
    290		dczeroa(r6)
    291	{
    292		memd(r6++#8) = r5:4
    293		r2 = add(r2,#-32)
    294	}
    295		memd(r6++#8) = r5:4
    296		memd(r6++#8) = r5:4
    297	{
    298		memd(r6++#8) = r5:4
    299	}:endloop0 /* start=.L45  */
    300		jump .L14
    301HEXAGON_OPT_FUNC_FINISH memset
    302#endif