cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aes-neon.S (7628B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
      4 *
      5 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
      6 */
      7
      8#include <linux/linkage.h>
      9#include <asm/assembler.h>
     10
     11#define AES_FUNC_START(func)		SYM_FUNC_START(neon_ ## func)
     12#define AES_FUNC_END(func)		SYM_FUNC_END(neon_ ## func)
     13
     14	xtsmask		.req	v7
     15	cbciv		.req	v7
     16	vctr		.req	v4
     17
     18	.macro		xts_reload_mask, tmp
     19	xts_load_mask	\tmp
     20	.endm
     21
     22	/* special case for the neon-bs driver calling into this one for CTS */
     23	.macro		xts_cts_skip_tw, reg, lbl
     24	tbnz		\reg, #1, \lbl
     25	.endm
     26
     27	/* multiply by polynomial 'x' in GF(2^8) */
     28	.macro		mul_by_x, out, in, temp, const
     29	sshr		\temp, \in, #7
     30	shl		\out, \in, #1
     31	and		\temp, \temp, \const
     32	eor		\out, \out, \temp
     33	.endm
     34
     35	/* multiply by polynomial 'x^2' in GF(2^8) */
     36	.macro		mul_by_x2, out, in, temp, const
     37	ushr		\temp, \in, #6
     38	shl		\out, \in, #2
     39	pmul		\temp, \temp, \const
     40	eor		\out, \out, \temp
     41	.endm
     42
     43	/* preload the entire Sbox */
     44	.macro		prepare, sbox, shiftrows, temp
     45	movi		v12.16b, #0x1b
     46	ldr_l		q13, \shiftrows, \temp
     47	ldr_l		q14, .Lror32by8, \temp
     48	adr_l		\temp, \sbox
     49	ld1		{v16.16b-v19.16b}, [\temp], #64
     50	ld1		{v20.16b-v23.16b}, [\temp], #64
     51	ld1		{v24.16b-v27.16b}, [\temp], #64
     52	ld1		{v28.16b-v31.16b}, [\temp]
     53	.endm
     54
     55	/* do preload for encryption */
     56	.macro		enc_prepare, ignore0, ignore1, temp
     57	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
     58	.endm
     59
     60	.macro		enc_switch_key, ignore0, ignore1, temp
     61	/* do nothing */
     62	.endm
     63
     64	/* do preload for decryption */
     65	.macro		dec_prepare, ignore0, ignore1, temp
     66	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
     67	.endm
     68
     69	/* apply SubBytes transformation using the the preloaded Sbox */
     70	.macro		sub_bytes, in
     71	sub		v9.16b, \in\().16b, v15.16b
     72	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
     73	sub		v10.16b, v9.16b, v15.16b
     74	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
     75	sub		v11.16b, v10.16b, v15.16b
     76	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
     77	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
     78	.endm
     79
     80	/* apply MixColumns transformation */
     81	.macro		mix_columns, in, enc
     82	.if		\enc == 0
     83	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
     84	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
     85	eor		\in\().16b, \in\().16b, v8.16b
     86	rev32		v8.8h, v8.8h
     87	eor		\in\().16b, \in\().16b, v8.16b
     88	.endif
     89
     90	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
     91	rev32		v8.8h, \in\().8h
     92	eor		v8.16b, v8.16b, v9.16b
     93	eor		\in\().16b, \in\().16b, v8.16b
     94	tbl		\in\().16b, {\in\().16b}, v14.16b
     95	eor		\in\().16b, \in\().16b, v8.16b
     96	.endm
     97
     98	.macro		do_block, enc, in, rounds, rk, rkp, i
     99	ld1		{v15.4s}, [\rk]
    100	add		\rkp, \rk, #16
    101	mov		\i, \rounds
    1021111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
    103	movi		v15.16b, #0x40
    104	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
    105	sub_bytes	\in
    106	subs		\i, \i, #1
    107	ld1		{v15.4s}, [\rkp], #16
    108	beq		2222f
    109	mix_columns	\in, \enc
    110	b		1111b
    1112222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
    112	.endm
    113
    114	.macro		encrypt_block, in, rounds, rk, rkp, i
    115	do_block	1, \in, \rounds, \rk, \rkp, \i
    116	.endm
    117
    118	.macro		decrypt_block, in, rounds, rk, rkp, i
    119	do_block	0, \in, \rounds, \rk, \rkp, \i
    120	.endm
    121
    122	/*
    123	 * Interleaved versions: functionally equivalent to the
    124	 * ones above, but applied to AES states in parallel.
    125	 */
    126
    127	.macro		sub_bytes_4x, in0, in1, in2, in3
    128	sub		v8.16b, \in0\().16b, v15.16b
    129	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
    130	sub		v9.16b, \in1\().16b, v15.16b
    131	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
    132	sub		v10.16b, \in2\().16b, v15.16b
    133	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
    134	sub		v11.16b, \in3\().16b, v15.16b
    135	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
    136	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
    137	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
    138	sub		v8.16b, v8.16b, v15.16b
    139	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
    140	sub		v9.16b, v9.16b, v15.16b
    141	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
    142	sub		v10.16b, v10.16b, v15.16b
    143	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
    144	sub		v11.16b, v11.16b, v15.16b
    145	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
    146	sub		v8.16b, v8.16b, v15.16b
    147	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
    148	sub		v9.16b, v9.16b, v15.16b
    149	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
    150	sub		v10.16b, v10.16b, v15.16b
    151	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
    152	sub		v11.16b, v11.16b, v15.16b
    153	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
    154	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
    155	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
    156	.endm
    157
    158	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
    159	sshr		\tmp0\().16b, \in0\().16b, #7
    160	shl		\out0\().16b, \in0\().16b, #1
    161	sshr		\tmp1\().16b, \in1\().16b, #7
    162	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
    163	shl		\out1\().16b, \in1\().16b, #1
    164	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
    165	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
    166	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
    167	.endm
    168
    169	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
    170	ushr		\tmp0\().16b, \in0\().16b, #6
    171	shl		\out0\().16b, \in0\().16b, #2
    172	ushr		\tmp1\().16b, \in1\().16b, #6
    173	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
    174	shl		\out1\().16b, \in1\().16b, #2
    175	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
    176	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
    177	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
    178	.endm
    179
    180	.macro		mix_columns_2x, in0, in1, enc
    181	.if		\enc == 0
    182	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
    183	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
    184	eor		\in0\().16b, \in0\().16b, v8.16b
    185	rev32		v8.8h, v8.8h
    186	eor		\in1\().16b, \in1\().16b, v9.16b
    187	rev32		v9.8h, v9.8h
    188	eor		\in0\().16b, \in0\().16b, v8.16b
    189	eor		\in1\().16b, \in1\().16b, v9.16b
    190	.endif
    191
    192	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
    193	rev32		v10.8h, \in0\().8h
    194	rev32		v11.8h, \in1\().8h
    195	eor		v10.16b, v10.16b, v8.16b
    196	eor		v11.16b, v11.16b, v9.16b
    197	eor		\in0\().16b, \in0\().16b, v10.16b
    198	eor		\in1\().16b, \in1\().16b, v11.16b
    199	tbl		\in0\().16b, {\in0\().16b}, v14.16b
    200	tbl		\in1\().16b, {\in1\().16b}, v14.16b
    201	eor		\in0\().16b, \in0\().16b, v10.16b
    202	eor		\in1\().16b, \in1\().16b, v11.16b
    203	.endm
    204
    205	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
    206	ld1		{v15.4s}, [\rk]
    207	add		\rkp, \rk, #16
    208	mov		\i, \rounds
    2091111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
    210	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
    211	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
    212	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
    213	movi		v15.16b, #0x40
    214	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
    215	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
    216	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
    217	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
    218	sub_bytes_4x	\in0, \in1, \in2, \in3
    219	subs		\i, \i, #1
    220	ld1		{v15.4s}, [\rkp], #16
    221	beq		2222f
    222	mix_columns_2x	\in0, \in1, \enc
    223	mix_columns_2x	\in2, \in3, \enc
    224	b		1111b
    2252222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
    226	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
    227	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
    228	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
    229	.endm
    230
    231	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
    232	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
    233	.endm
    234
    235	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
    236	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
    237	.endm
    238
    239#include "aes-modes.S"
    240
    241	.section	".rodata", "a"
    242	.align		4
    243.LForward_ShiftRows:
    244	.octa		0x0b06010c07020d08030e09040f0a0500
    245
    246.LReverse_ShiftRows:
    247	.octa		0x0306090c0f0205080b0e0104070a0d00
    248
    249.Lror32by8:
    250	.octa		0x0c0f0e0d080b0a090407060500030201