cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aes-spe-core.S (7630B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * Fast AES implementation for SPE instruction set (PPC)
      4 *
      5 * This code makes use of the SPE SIMD instruction set as defined in
      6 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
      7 * Implementation is based on optimization guide notes from
      8 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
      9 *
     10 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
     11 */
     12
     13#include <asm/ppc_asm.h>
     14#include "aes-spe-regs.h"
     15
     16#define	EAD(in, bpos) \
     17	rlwimi		rT0,in,28-((bpos+3)%4)*8,20,27;
     18
     19#define DAD(in, bpos) \
     20	rlwimi		rT1,in,24-((bpos+3)%4)*8,24,31;
     21
     22#define LWH(out, off) \
     23	evlwwsplat	out,off(rT0);	/* load word high		*/
     24
     25#define LWL(out, off) \
     26	lwz		out,off(rT0);	/* load word low		*/
     27
     28#define LBZ(out, tab, off) \
     29	lbz		out,off(tab);	/* load byte			*/
     30
     31#define LAH(out, in, bpos, off) \
     32	EAD(in, bpos)			/* calc addr + load word high	*/ \
     33	LWH(out, off)
     34
     35#define LAL(out, in, bpos, off) \
     36	EAD(in, bpos)			/* calc addr + load word low	*/ \
     37	LWL(out, off)
     38
     39#define LAE(out, in, bpos) \
     40	EAD(in, bpos)			/* calc addr + load enc byte	*/ \
     41	LBZ(out, rT0, 8)
     42
     43#define LBE(out) \
     44	LBZ(out, rT0, 8)		/* load enc byte		*/
     45
     46#define LAD(out, in, bpos) \
     47	DAD(in, bpos)			/* calc addr + load dec byte	*/ \
     48	LBZ(out, rT1, 0)
     49
     50#define LBD(out) \
     51	LBZ(out, rT1, 0)
     52
     53/*
     54 * ppc_encrypt_block: The central encryption function for a single 16 bytes
     55 * block. It does no stack handling or register saving to support fast calls
     56 * via bl/blr. It expects that caller has pre-xored input data with first
     57 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
     58 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
     59 * and rW0-rW3 and caller must execute a final xor on the output registers.
     60 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
     61 *
     62 */
     63_GLOBAL(ppc_encrypt_block)
     64	LAH(rW4, rD1, 2, 4)
     65	LAH(rW6, rD0, 3, 0)
     66	LAH(rW3, rD0, 1, 8)
     67ppc_encrypt_block_loop:
     68	LAH(rW0, rD3, 0, 12)
     69	LAL(rW0, rD0, 0, 12)
     70	LAH(rW1, rD1, 0, 12)
     71	LAH(rW2, rD2, 1, 8)
     72	LAL(rW2, rD3, 1, 8)
     73	LAL(rW3, rD1, 1, 8)
     74	LAL(rW4, rD2, 2, 4)
     75	LAL(rW6, rD1, 3, 0)
     76	LAH(rW5, rD3, 2, 4)
     77	LAL(rW5, rD0, 2, 4)
     78	LAH(rW7, rD2, 3, 0)
     79	evldw		rD1,16(rKP)
     80	EAD(rD3, 3)
     81	evxor		rW2,rW2,rW4
     82	LWL(rW7, 0)
     83	evxor		rW2,rW2,rW6
     84	EAD(rD2, 0)
     85	evxor		rD1,rD1,rW2
     86	LWL(rW1, 12)
     87	evxor		rD1,rD1,rW0
     88	evldw		rD3,24(rKP)
     89	evmergehi	rD0,rD0,rD1
     90	EAD(rD1, 2)
     91	evxor		rW3,rW3,rW5
     92	LWH(rW4, 4)
     93	evxor		rW3,rW3,rW7
     94	EAD(rD0, 3)
     95	evxor		rD3,rD3,rW3
     96	LWH(rW6, 0)
     97	evxor		rD3,rD3,rW1
     98	EAD(rD0, 1)
     99	evmergehi	rD2,rD2,rD3
    100	LWH(rW3, 8)
    101	LAH(rW0, rD3, 0, 12)
    102	LAL(rW0, rD0, 0, 12)
    103	LAH(rW1, rD1, 0, 12)
    104	LAH(rW2, rD2, 1, 8)
    105	LAL(rW2, rD3, 1, 8)
    106	LAL(rW3, rD1, 1, 8)
    107	LAL(rW4, rD2, 2, 4)
    108	LAL(rW6, rD1, 3, 0)
    109	LAH(rW5, rD3, 2, 4)
    110	LAL(rW5, rD0, 2, 4)
    111	LAH(rW7, rD2, 3, 0)
    112	evldw		rD1,32(rKP)
    113	EAD(rD3, 3)
    114	evxor		rW2,rW2,rW4
    115	LWL(rW7, 0)
    116	evxor		rW2,rW2,rW6
    117	EAD(rD2, 0)
    118	evxor		rD1,rD1,rW2
    119	LWL(rW1, 12)
    120	evxor		rD1,rD1,rW0
    121	evldw		rD3,40(rKP)
    122	evmergehi	rD0,rD0,rD1
    123	EAD(rD1, 2)
    124	evxor		rW3,rW3,rW5
    125	LWH(rW4, 4)
    126	evxor		rW3,rW3,rW7
    127	EAD(rD0, 3)
    128	evxor		rD3,rD3,rW3
    129	LWH(rW6, 0)
    130	evxor		rD3,rD3,rW1
    131	EAD(rD0, 1)
    132	evmergehi	rD2,rD2,rD3
    133	LWH(rW3, 8)
    134	addi		rKP,rKP,32
    135	bdnz		ppc_encrypt_block_loop
    136	LAH(rW0, rD3, 0, 12)
    137	LAL(rW0, rD0, 0, 12)
    138	LAH(rW1, rD1, 0, 12)
    139	LAH(rW2, rD2, 1, 8)
    140	LAL(rW2, rD3, 1, 8)
    141	LAL(rW3, rD1, 1, 8)
    142	LAL(rW4, rD2, 2, 4)
    143	LAH(rW5, rD3, 2, 4)
    144	LAL(rW6, rD1, 3, 0)
    145	LAL(rW5, rD0, 2, 4)
    146	LAH(rW7, rD2, 3, 0)
    147	evldw		rD1,16(rKP)
    148	EAD(rD3, 3)
    149	evxor		rW2,rW2,rW4
    150	LWL(rW7, 0)
    151	evxor		rW2,rW2,rW6
    152	EAD(rD2, 0)
    153	evxor		rD1,rD1,rW2
    154	LWL(rW1, 12)
    155	evxor		rD1,rD1,rW0
    156	evldw		rD3,24(rKP)
    157	evmergehi	rD0,rD0,rD1
    158	EAD(rD1, 0)
    159	evxor		rW3,rW3,rW5
    160	LBE(rW2)
    161	evxor		rW3,rW3,rW7
    162	EAD(rD0, 1)
    163	evxor		rD3,rD3,rW3
    164	LBE(rW6)
    165	evxor		rD3,rD3,rW1
    166	EAD(rD0, 0)
    167	evmergehi	rD2,rD2,rD3
    168	LBE(rW1)
    169	LAE(rW0, rD3, 0)
    170	LAE(rW1, rD0, 0)
    171	LAE(rW4, rD2, 1)
    172	LAE(rW5, rD3, 1)
    173	LAE(rW3, rD2, 0)
    174	LAE(rW7, rD1, 1)
    175	rlwimi		rW0,rW4,8,16,23
    176	rlwimi		rW1,rW5,8,16,23
    177	LAE(rW4, rD1, 2)
    178	LAE(rW5, rD2, 2)
    179	rlwimi		rW2,rW6,8,16,23
    180	rlwimi		rW3,rW7,8,16,23
    181	LAE(rW6, rD3, 2)
    182	LAE(rW7, rD0, 2)
    183	rlwimi		rW0,rW4,16,8,15
    184	rlwimi		rW1,rW5,16,8,15
    185	LAE(rW4, rD0, 3)
    186	LAE(rW5, rD1, 3)
    187	rlwimi		rW2,rW6,16,8,15
    188	lwz		rD0,32(rKP)
    189	rlwimi		rW3,rW7,16,8,15
    190	lwz		rD1,36(rKP)
    191	LAE(rW6, rD2, 3)
    192	LAE(rW7, rD3, 3)
    193	rlwimi		rW0,rW4,24,0,7
    194	lwz		rD2,40(rKP)
    195	rlwimi		rW1,rW5,24,0,7
    196	lwz		rD3,44(rKP)
    197	rlwimi		rW2,rW6,24,0,7
    198	rlwimi		rW3,rW7,24,0,7
    199	blr
    200
    201/*
    202 * ppc_decrypt_block: The central decryption function for a single 16 bytes
    203 * block. It does no stack handling or register saving to support fast calls
    204 * via bl/blr. It expects that caller has pre-xored input data with first
    205 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
    206 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
    207 * and rW0-rW3 and caller must execute a final xor on the output registers.
    208 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
    209 *
    210 */
    211_GLOBAL(ppc_decrypt_block)
    212	LAH(rW0, rD1, 0, 12)
    213	LAH(rW6, rD0, 3, 0)
    214	LAH(rW3, rD0, 1, 8)
    215ppc_decrypt_block_loop:
    216	LAH(rW1, rD3, 0, 12)
    217	LAL(rW0, rD2, 0, 12)
    218	LAH(rW2, rD2, 1, 8)
    219	LAL(rW2, rD3, 1, 8)
    220	LAH(rW4, rD3, 2, 4)
    221	LAL(rW4, rD0, 2, 4)
    222	LAL(rW6, rD1, 3, 0)
    223	LAH(rW5, rD1, 2, 4)
    224	LAH(rW7, rD2, 3, 0)
    225	LAL(rW7, rD3, 3, 0)
    226	LAL(rW3, rD1, 1, 8)
    227	evldw		rD1,16(rKP)
    228	EAD(rD0, 0)
    229	evxor		rW4,rW4,rW6
    230	LWL(rW1, 12)
    231	evxor		rW0,rW0,rW4
    232	EAD(rD2, 2)
    233	evxor		rW0,rW0,rW2
    234	LWL(rW5, 4)
    235	evxor		rD1,rD1,rW0
    236	evldw		rD3,24(rKP)
    237	evmergehi	rD0,rD0,rD1
    238	EAD(rD1, 0)
    239	evxor		rW3,rW3,rW7
    240	LWH(rW0, 12)
    241	evxor		rW3,rW3,rW1
    242	EAD(rD0, 3)
    243	evxor		rD3,rD3,rW3
    244	LWH(rW6, 0)
    245	evxor		rD3,rD3,rW5
    246	EAD(rD0, 1)
    247	evmergehi	rD2,rD2,rD3
    248	LWH(rW3, 8)
    249	LAH(rW1, rD3, 0, 12)
    250	LAL(rW0, rD2, 0, 12)
    251	LAH(rW2, rD2, 1, 8)
    252	LAL(rW2, rD3, 1, 8)
    253	LAH(rW4, rD3, 2, 4)
    254	LAL(rW4, rD0, 2, 4)
    255	LAL(rW6, rD1, 3, 0)
    256	LAH(rW5, rD1, 2, 4)
    257	LAH(rW7, rD2, 3, 0)
    258	LAL(rW7, rD3, 3, 0)
    259	LAL(rW3, rD1, 1, 8)
    260	evldw		 rD1,32(rKP)
    261	EAD(rD0, 0)
    262	evxor		rW4,rW4,rW6
    263	LWL(rW1, 12)
    264	evxor		rW0,rW0,rW4
    265	EAD(rD2, 2)
    266	evxor		rW0,rW0,rW2
    267	LWL(rW5, 4)
    268	evxor		rD1,rD1,rW0
    269	evldw		rD3,40(rKP)
    270	evmergehi	rD0,rD0,rD1
    271	EAD(rD1, 0)
    272	evxor		rW3,rW3,rW7
    273	LWH(rW0, 12)
    274	evxor		rW3,rW3,rW1
    275	EAD(rD0, 3)
    276	evxor		rD3,rD3,rW3
    277	LWH(rW6, 0)
    278	evxor		rD3,rD3,rW5
    279	EAD(rD0, 1)
    280	evmergehi	rD2,rD2,rD3
    281	LWH(rW3, 8)
    282	addi		rKP,rKP,32
    283	bdnz		ppc_decrypt_block_loop
    284	LAH(rW1, rD3, 0, 12)
    285	LAL(rW0, rD2, 0, 12)
    286	LAH(rW2, rD2, 1, 8)
    287	LAL(rW2, rD3, 1, 8)
    288	LAH(rW4, rD3, 2, 4)
    289	LAL(rW4, rD0, 2, 4)
    290	LAL(rW6, rD1, 3, 0)
    291	LAH(rW5, rD1, 2, 4)
    292	LAH(rW7, rD2, 3, 0)
    293	LAL(rW7, rD3, 3, 0)
    294	LAL(rW3, rD1, 1, 8)
    295	evldw		 rD1,16(rKP)
    296	EAD(rD0, 0)
    297	evxor		rW4,rW4,rW6
    298	LWL(rW1, 12)
    299	evxor		rW0,rW0,rW4
    300	EAD(rD2, 2)
    301	evxor		rW0,rW0,rW2
    302	LWL(rW5, 4)
    303	evxor		rD1,rD1,rW0
    304	evldw		rD3,24(rKP)
    305	evmergehi	rD0,rD0,rD1
    306	DAD(rD1, 0)
    307	evxor		rW3,rW3,rW7
    308	LBD(rW0)
    309	evxor		rW3,rW3,rW1
    310	DAD(rD0, 1)
    311	evxor		rD3,rD3,rW3
    312	LBD(rW6)
    313	evxor		rD3,rD3,rW5
    314	DAD(rD0, 0)
    315	evmergehi	rD2,rD2,rD3
    316	LBD(rW3)
    317	LAD(rW2, rD3, 0)
    318	LAD(rW1, rD2, 0)
    319	LAD(rW4, rD2, 1)
    320	LAD(rW5, rD3, 1)
    321	LAD(rW7, rD1, 1)
    322	rlwimi		rW0,rW4,8,16,23
    323	rlwimi		rW1,rW5,8,16,23
    324	LAD(rW4, rD3, 2)
    325	LAD(rW5, rD0, 2)
    326	rlwimi		rW2,rW6,8,16,23
    327	rlwimi		rW3,rW7,8,16,23
    328	LAD(rW6, rD1, 2)
    329	LAD(rW7, rD2, 2)
    330	rlwimi		rW0,rW4,16,8,15
    331	rlwimi		rW1,rW5,16,8,15
    332	LAD(rW4, rD0, 3)
    333	LAD(rW5, rD1, 3)
    334	rlwimi		rW2,rW6,16,8,15
    335	lwz		rD0,32(rKP)
    336	rlwimi		rW3,rW7,16,8,15
    337	lwz		rD1,36(rKP)
    338	LAD(rW6, rD2, 3)
    339	LAD(rW7, rD3, 3)
    340	rlwimi		rW0,rW4,24,0,7
    341	lwz		rD2,40(rKP)
    342	rlwimi		rW1,rW5,24,0,7
    343	lwz		rD3,44(rKP)
    344	rlwimi		rW2,rW6,24,0,7
    345	rlwimi		rW3,rW7,24,0,7
    346	blr