cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aes-ce-ccm-core.S (5964B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
      4 *
      5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
      6 */
      7
      8#include <linux/linkage.h>
      9#include <asm/assembler.h>
     10
     11	.text
     12	.arch	armv8-a+crypto
     13
     14	/*
     15	 * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
     16	 *			    u32 macp, u8 const rk[], u32 rounds);
     17	 */
     18SYM_FUNC_START(ce_aes_ccm_auth_data)
     19	ld1	{v0.16b}, [x0]			/* load mac */
     20	cbz	w3, 1f
     21	sub	w3, w3, #16
     22	eor	v1.16b, v1.16b, v1.16b
     230:	ldrb	w7, [x1], #1			/* get 1 byte of input */
     24	subs	w2, w2, #1
     25	add	w3, w3, #1
     26	ins	v1.b[0], w7
     27	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
     28	beq	8f				/* out of input? */
     29	cbnz	w3, 0b
     30	eor	v0.16b, v0.16b, v1.16b
     311:	ld1	{v3.4s}, [x4]			/* load first round key */
     32	prfm	pldl1strm, [x1]
     33	cmp	w5, #12				/* which key size? */
     34	add	x6, x4, #16
     35	sub	w7, w5, #2			/* modified # of rounds */
     36	bmi	2f
     37	bne	5f
     38	mov	v5.16b, v3.16b
     39	b	4f
     402:	mov	v4.16b, v3.16b
     41	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
     423:	aese	v0.16b, v4.16b
     43	aesmc	v0.16b, v0.16b
     444:	ld1	{v3.4s}, [x6], #16		/* load next round key */
     45	aese	v0.16b, v5.16b
     46	aesmc	v0.16b, v0.16b
     475:	ld1	{v4.4s}, [x6], #16		/* load next round key */
     48	subs	w7, w7, #3
     49	aese	v0.16b, v3.16b
     50	aesmc	v0.16b, v0.16b
     51	ld1	{v5.4s}, [x6], #16		/* load next round key */
     52	bpl	3b
     53	aese	v0.16b, v4.16b
     54	subs	w2, w2, #16			/* last data? */
     55	eor	v0.16b, v0.16b, v5.16b		/* final round */
     56	bmi	6f
     57	ld1	{v1.16b}, [x1], #16		/* load next input block */
     58	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
     59	bne	1b
     606:	st1	{v0.16b}, [x0]			/* store mac */
     61	beq	10f
     62	adds	w2, w2, #16
     63	beq	10f
     64	mov	w3, w2
     657:	ldrb	w7, [x1], #1
     66	umov	w6, v0.b[0]
     67	eor	w6, w6, w7
     68	strb	w6, [x0], #1
     69	subs	w2, w2, #1
     70	beq	10f
     71	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
     72	b	7b
     738:	cbz	w3, 91f
     74	mov	w7, w3
     75	add	w3, w3, #16
     769:	ext	v1.16b, v1.16b, v1.16b, #1
     77	adds	w7, w7, #1
     78	bne	9b
     7991:	eor	v0.16b, v0.16b, v1.16b
     80	st1	{v0.16b}, [x0]
     8110:	mov	w0, w3
     82	ret
     83SYM_FUNC_END(ce_aes_ccm_auth_data)
     84
     85	/*
     86	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
     87	 * 			 u32 rounds);
     88	 */
     89SYM_FUNC_START(ce_aes_ccm_final)
     90	ld1	{v3.4s}, [x2], #16		/* load first round key */
     91	ld1	{v0.16b}, [x0]			/* load mac */
     92	cmp	w3, #12				/* which key size? */
     93	sub	w3, w3, #2			/* modified # of rounds */
     94	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
     95	bmi	0f
     96	bne	3f
     97	mov	v5.16b, v3.16b
     98	b	2f
     990:	mov	v4.16b, v3.16b
    1001:	ld1	{v5.4s}, [x2], #16		/* load next round key */
    101	aese	v0.16b, v4.16b
    102	aesmc	v0.16b, v0.16b
    103	aese	v1.16b, v4.16b
    104	aesmc	v1.16b, v1.16b
    1052:	ld1	{v3.4s}, [x2], #16		/* load next round key */
    106	aese	v0.16b, v5.16b
    107	aesmc	v0.16b, v0.16b
    108	aese	v1.16b, v5.16b
    109	aesmc	v1.16b, v1.16b
    1103:	ld1	{v4.4s}, [x2], #16		/* load next round key */
    111	subs	w3, w3, #3
    112	aese	v0.16b, v3.16b
    113	aesmc	v0.16b, v0.16b
    114	aese	v1.16b, v3.16b
    115	aesmc	v1.16b, v1.16b
    116	bpl	1b
    117	aese	v0.16b, v4.16b
    118	aese	v1.16b, v4.16b
    119	/* final round key cancels out */
    120	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
    121	st1	{v0.16b}, [x0]			/* store result */
    122	ret
    123SYM_FUNC_END(ce_aes_ccm_final)
    124
    125	.macro	aes_ccm_do_crypt,enc
    126	cbz	x2, 5f
    127	ldr	x8, [x6, #8]			/* load lower ctr */
    128	ld1	{v0.16b}, [x5]			/* load mac */
    129CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
    1300:	/* outer loop */
    131	ld1	{v1.8b}, [x6]			/* load upper ctr */
    132	prfm	pldl1strm, [x1]
    133	add	x8, x8, #1
    134	rev	x9, x8
    135	cmp	w4, #12				/* which key size? */
    136	sub	w7, w4, #2			/* get modified # of rounds */
    137	ins	v1.d[1], x9			/* no carry in lower ctr */
    138	ld1	{v3.4s}, [x3]			/* load first round key */
    139	add	x10, x3, #16
    140	bmi	1f
    141	bne	4f
    142	mov	v5.16b, v3.16b
    143	b	3f
    1441:	mov	v4.16b, v3.16b
    145	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
    1462:	/* inner loop: 3 rounds, 2x interleaved */
    147	aese	v0.16b, v4.16b
    148	aesmc	v0.16b, v0.16b
    149	aese	v1.16b, v4.16b
    150	aesmc	v1.16b, v1.16b
    1513:	ld1	{v3.4s}, [x10], #16		/* load next round key */
    152	aese	v0.16b, v5.16b
    153	aesmc	v0.16b, v0.16b
    154	aese	v1.16b, v5.16b
    155	aesmc	v1.16b, v1.16b
    1564:	ld1	{v4.4s}, [x10], #16		/* load next round key */
    157	subs	w7, w7, #3
    158	aese	v0.16b, v3.16b
    159	aesmc	v0.16b, v0.16b
    160	aese	v1.16b, v3.16b
    161	aesmc	v1.16b, v1.16b
    162	ld1	{v5.4s}, [x10], #16		/* load next round key */
    163	bpl	2b
    164	aese	v0.16b, v4.16b
    165	aese	v1.16b, v4.16b
    166	subs	w2, w2, #16
    167	bmi	6f				/* partial block? */
    168	ld1	{v2.16b}, [x1], #16		/* load next input block */
    169	.if	\enc == 1
    170	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
    171	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
    172	.else
    173	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
    174	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
    175	.endif
    176	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
    177	st1	{v1.16b}, [x0], #16		/* write output block */
    178	bne	0b
    179CPU_LE(	rev	x8, x8			)
    180	st1	{v0.16b}, [x5]			/* store mac */
    181	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
    1825:	ret
    183
    1846:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
    185	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
    186	st1	{v0.16b}, [x5]			/* store mac */
    187	add	w2, w2, #16			/* process partial tail block */
    1887:	ldrb	w9, [x1], #1			/* get 1 byte of input */
    189	umov	w6, v1.b[0]			/* get top crypted ctr byte */
    190	umov	w7, v0.b[0]			/* get top mac byte */
    191	.if	\enc == 1
    192	eor	w7, w7, w9
    193	eor	w9, w9, w6
    194	.else
    195	eor	w9, w9, w6
    196	eor	w7, w7, w9
    197	.endif
    198	strb	w9, [x0], #1			/* store out byte */
    199	strb	w7, [x5], #1			/* store mac byte */
    200	subs	w2, w2, #1
    201	beq	5b
    202	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
    203	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
    204	b	7b
    205	.endm
    206
    207	/*
    208	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
    209	 * 			   u8 const rk[], u32 rounds, u8 mac[],
    210	 * 			   u8 ctr[]);
    211	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
    212	 * 			   u8 const rk[], u32 rounds, u8 mac[],
    213	 * 			   u8 ctr[]);
    214	 */
    215SYM_FUNC_START(ce_aes_ccm_encrypt)
    216	aes_ccm_do_crypt	1
    217SYM_FUNC_END(ce_aes_ccm_encrypt)
    218
    219SYM_FUNC_START(ce_aes_ccm_decrypt)
    220	aes_ccm_do_crypt	0
    221SYM_FUNC_END(ce_aes_ccm_decrypt)