cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aes-cipher-core.S (4440B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Scalar AES core transform
      4 *
      5 * Copyright (C) 2017 Linaro Ltd.
      6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
      7 */
      8
      9#include <linux/linkage.h>
     10#include <asm/assembler.h>
     11#include <asm/cache.h>
     12
     13	.text
     14	.align		5
     15
     16	rk		.req	r0
     17	rounds		.req	r1
     18	in		.req	r2
     19	out		.req	r3
     20	ttab		.req	ip
     21
     22	t0		.req	lr
     23	t1		.req	r2
     24	t2		.req	r3
     25
     26	.macro		__select, out, in, idx
     27	.if		__LINUX_ARM_ARCH__ < 7
     28	and		\out, \in, #0xff << (8 * \idx)
     29	.else
     30	ubfx		\out, \in, #(8 * \idx), #8
     31	.endif
     32	.endm
     33
     34	.macro		__load, out, in, idx, sz, op
     35	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
     36	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
     37	.else
     38	ldr\op		\out, [ttab, \in, lsl #\sz]
     39	.endif
     40	.endm
     41
     42	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
     43	__select	\out0, \in0, 0
     44	__select	t0, \in1, 1
     45	__load		\out0, \out0, 0, \sz, \op
     46	__load		t0, t0, 1, \sz, \op
     47
     48	.if		\enc
     49	__select	\out1, \in1, 0
     50	__select	t1, \in2, 1
     51	.else
     52	__select	\out1, \in3, 0
     53	__select	t1, \in0, 1
     54	.endif
     55	__load		\out1, \out1, 0, \sz, \op
     56	__select	t2, \in2, 2
     57	__load		t1, t1, 1, \sz, \op
     58	__load		t2, t2, 2, \sz, \op
     59
     60	eor		\out0, \out0, t0, ror #24
     61
     62	__select	t0, \in3, 3
     63	.if		\enc
     64	__select	\t3, \in3, 2
     65	__select	\t4, \in0, 3
     66	.else
     67	__select	\t3, \in1, 2
     68	__select	\t4, \in2, 3
     69	.endif
     70	__load		\t3, \t3, 2, \sz, \op
     71	__load		t0, t0, 3, \sz, \op
     72	__load		\t4, \t4, 3, \sz, \op
     73
     74	.ifnb		\oldcpsr
     75	/*
     76	 * This is the final round and we're done with all data-dependent table
     77	 * lookups, so we can safely re-enable interrupts.
     78	 */
     79	restore_irqs	\oldcpsr
     80	.endif
     81
     82	eor		\out1, \out1, t1, ror #24
     83	eor		\out0, \out0, t2, ror #16
     84	ldm		rk!, {t1, t2}
     85	eor		\out1, \out1, \t3, ror #16
     86	eor		\out0, \out0, t0, ror #8
     87	eor		\out1, \out1, \t4, ror #8
     88	eor		\out0, \out0, t1
     89	eor		\out1, \out1, t2
     90	.endm
     91
     92	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
     93	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
     94	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
     95	.endm
     96
     97	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
     98	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
     99	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
    100	.endm
    101
    102	.macro		do_crypt, round, ttab, ltab, bsz
    103	push		{r3-r11, lr}
    104
    105	// Load keys first, to reduce latency in case they're not cached yet.
    106	ldm		rk!, {r8-r11}
    107
    108	ldr		r4, [in]
    109	ldr		r5, [in, #4]
    110	ldr		r6, [in, #8]
    111	ldr		r7, [in, #12]
    112
    113#ifdef CONFIG_CPU_BIG_ENDIAN
    114	rev_l		r4, t0
    115	rev_l		r5, t0
    116	rev_l		r6, t0
    117	rev_l		r7, t0
    118#endif
    119
    120	eor		r4, r4, r8
    121	eor		r5, r5, r9
    122	eor		r6, r6, r10
    123	eor		r7, r7, r11
    124
    125	mov_l		ttab, \ttab
    126	/*
    127	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
    128	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
    129	 * intended to make cache-timing attacks more difficult.  They may not
    130	 * be fully prevented, however; see the paper
    131	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
    132	 * ("Cache-timing attacks on AES") for a discussion of the many
    133	 * difficulties involved in writing truly constant-time AES software.
    134	 */
    135	 save_and_disable_irqs	t0
    136	.set		i, 0
    137	.rept		1024 / 128
    138	ldr		r8, [ttab, #i + 0]
    139	ldr		r9, [ttab, #i + 32]
    140	ldr		r10, [ttab, #i + 64]
    141	ldr		r11, [ttab, #i + 96]
    142	.set		i, i + 128
    143	.endr
    144	push		{t0}		// oldcpsr
    145
    146	tst		rounds, #2
    147	bne		1f
    148
    1490:	\round		r8, r9, r10, r11, r4, r5, r6, r7
    150	\round		r4, r5, r6, r7, r8, r9, r10, r11
    151
    1521:	subs		rounds, rounds, #4
    153	\round		r8, r9, r10, r11, r4, r5, r6, r7
    154	bls		2f
    155	\round		r4, r5, r6, r7, r8, r9, r10, r11
    156	b		0b
    157
    1582:	.ifb		\ltab
    159	add		ttab, ttab, #1
    160	.else
    161	mov_l		ttab, \ltab
    162	// Prefetch inverse S-box for final round; see explanation above
    163	.set		i, 0
    164	.rept		256 / 64
    165	ldr		t0, [ttab, #i + 0]
    166	ldr		t1, [ttab, #i + 32]
    167	.set		i, i + 64
    168	.endr
    169	.endif
    170
    171	pop		{rounds}	// oldcpsr
    172	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
    173
    174#ifdef CONFIG_CPU_BIG_ENDIAN
    175	rev_l		r4, t0
    176	rev_l		r5, t0
    177	rev_l		r6, t0
    178	rev_l		r7, t0
    179#endif
    180
    181	ldr		out, [sp]
    182
    183	str		r4, [out]
    184	str		r5, [out, #4]
    185	str		r6, [out, #8]
    186	str		r7, [out, #12]
    187
    188	pop		{r3-r11, pc}
    189
    190	.align		3
    191	.ltorg
    192	.endm
    193
    194ENTRY(__aes_arm_encrypt)
    195	do_crypt	fround, crypto_ft_tab,, 2
    196ENDPROC(__aes_arm_encrypt)
    197
    198	.align		5
    199ENTRY(__aes_arm_decrypt)
    200	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
    201ENDPROC(__aes_arm_decrypt)