cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sha3-ce-core.S (6227B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
      4 *
      5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
      6 *
      7 * This program is free software; you can redistribute it and/or modify
      8 * it under the terms of the GNU General Public License version 2 as
      9 * published by the Free Software Foundation.
     10 */
     11
     12#include <linux/linkage.h>
     13#include <asm/assembler.h>
     14
     15	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
     16	.set	.Lv\b\().2d, \b
     17	.set	.Lv\b\().16b, \b
     18	.endr
     19
     20	/*
     21	 * ARMv8.2 Crypto Extensions instructions
     22	 */
     23	.macro	eor3, rd, rn, rm, ra
     24	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
     25	.endm
     26
     27	.macro	rax1, rd, rn, rm
     28	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
     29	.endm
     30
     31	.macro	bcax, rd, rn, rm, ra
     32	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
     33	.endm
     34
     35	.macro	xar, rd, rn, rm, imm6
     36	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
     37	.endm
     38
     39	/*
     40	 * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
     41	 */
     42	.text
     43SYM_FUNC_START(sha3_ce_transform)
     44	/* load state */
     45	add	x8, x0, #32
     46	ld1	{ v0.1d- v3.1d}, [x0]
     47	ld1	{ v4.1d- v7.1d}, [x8], #32
     48	ld1	{ v8.1d-v11.1d}, [x8], #32
     49	ld1	{v12.1d-v15.1d}, [x8], #32
     50	ld1	{v16.1d-v19.1d}, [x8], #32
     51	ld1	{v20.1d-v23.1d}, [x8], #32
     52	ld1	{v24.1d}, [x8]
     53
     540:	sub	w2, w2, #1
     55	mov	w8, #24
     56	adr_l	x9, .Lsha3_rcon
     57
     58	/* load input */
     59	ld1	{v25.8b-v28.8b}, [x1], #32
     60	ld1	{v29.8b-v31.8b}, [x1], #24
     61	eor	v0.8b, v0.8b, v25.8b
     62	eor	v1.8b, v1.8b, v26.8b
     63	eor	v2.8b, v2.8b, v27.8b
     64	eor	v3.8b, v3.8b, v28.8b
     65	eor	v4.8b, v4.8b, v29.8b
     66	eor	v5.8b, v5.8b, v30.8b
     67	eor	v6.8b, v6.8b, v31.8b
     68
     69	tbnz	x3, #6, 2f		// SHA3-512
     70
     71	ld1	{v25.8b-v28.8b}, [x1], #32
     72	ld1	{v29.8b-v30.8b}, [x1], #16
     73	eor	 v7.8b,  v7.8b, v25.8b
     74	eor	 v8.8b,  v8.8b, v26.8b
     75	eor	 v9.8b,  v9.8b, v27.8b
     76	eor	v10.8b, v10.8b, v28.8b
     77	eor	v11.8b, v11.8b, v29.8b
     78	eor	v12.8b, v12.8b, v30.8b
     79
     80	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224
     81
     82	// SHA3-256
     83	ld1	{v25.8b-v28.8b}, [x1], #32
     84	eor	v13.8b, v13.8b, v25.8b
     85	eor	v14.8b, v14.8b, v26.8b
     86	eor	v15.8b, v15.8b, v27.8b
     87	eor	v16.8b, v16.8b, v28.8b
     88	b	3f
     89
     901:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384
     91
     92	// SHA3-224
     93	ld1	{v25.8b-v28.8b}, [x1], #32
     94	ld1	{v29.8b}, [x1], #8
     95	eor	v13.8b, v13.8b, v25.8b
     96	eor	v14.8b, v14.8b, v26.8b
     97	eor	v15.8b, v15.8b, v27.8b
     98	eor	v16.8b, v16.8b, v28.8b
     99	eor	v17.8b, v17.8b, v29.8b
    100	b	3f
    101
    102	// SHA3-512
    1032:	ld1	{v25.8b-v26.8b}, [x1], #16
    104	eor	 v7.8b,  v7.8b, v25.8b
    105	eor	 v8.8b,  v8.8b, v26.8b
    106
    1073:	sub	w8, w8, #1
    108
    109	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
    110	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
    111	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
    112	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
    113	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
    114	eor3	v29.16b, v29.16b, v19.16b, v24.16b
    115	eor3	v26.16b, v26.16b, v16.16b, v21.16b
    116	eor3	v28.16b, v28.16b, v18.16b, v23.16b
    117	eor3	v25.16b, v25.16b, v15.16b, v20.16b
    118	eor3	v27.16b, v27.16b, v17.16b, v22.16b
    119
    120	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
    121	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
    122	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
    123	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
    124	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
    125
    126	eor	 v0.16b,  v0.16b, v30.16b
    127	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
    128	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
    129	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
    130	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
    131	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
    132	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
    133	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
    134	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
    135	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
    136	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
    137	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
    138	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
    139	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
    140	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
    141	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
    142	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
    143	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
    144	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
    145	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
    146	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
    147	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
    148	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
    149	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
    150	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
    151
    152	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
    153	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
    154	bcax	v22.16b, v22.16b, v24.16b, v23.16b
    155	bcax	v23.16b, v23.16b, v31.16b, v24.16b
    156	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
    157
    158	ld1r	{v31.2d}, [x9], #8
    159
    160	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
    161	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
    162	bcax	v19.16b, v19.16b, v16.16b, v15.16b
    163	bcax	v15.16b, v15.16b, v25.16b, v16.16b
    164	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
    165
    166	bcax	v10.16b, v29.16b, v12.16b, v26.16b
    167	bcax	v11.16b, v26.16b, v13.16b, v12.16b
    168	bcax	v12.16b, v12.16b, v14.16b, v13.16b
    169	bcax	v13.16b, v13.16b, v29.16b, v14.16b
    170	bcax	v14.16b, v14.16b, v26.16b, v29.16b
    171
    172	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
    173	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
    174	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
    175	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
    176	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
    177
    178	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
    179	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
    180	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
    181	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
    182	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
    183
    184	eor	 v0.16b,  v0.16b, v31.16b
    185
    186	cbnz	w8, 3b
    187	cond_yield 4f, x8, x9
    188	cbnz	w2, 0b
    189
    190	/* save state */
    1914:	st1	{ v0.1d- v3.1d}, [x0], #32
    192	st1	{ v4.1d- v7.1d}, [x0], #32
    193	st1	{ v8.1d-v11.1d}, [x0], #32
    194	st1	{v12.1d-v15.1d}, [x0], #32
    195	st1	{v16.1d-v19.1d}, [x0], #32
    196	st1	{v20.1d-v23.1d}, [x0], #32
    197	st1	{v24.1d}, [x0]
    198	mov	w0, w2
    199	ret
    200SYM_FUNC_END(sha3_ce_transform)
    201
    202	.section	".rodata", "a"
    203	.align		8
    204.Lsha3_rcon:
    205	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
    206	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
    207	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
    208	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
    209	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
    210	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
    211	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
    212	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008