cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aes-spe-modes.S (14691B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
      4 *
      5 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
      6 */
      7
      8#include <asm/ppc_asm.h>
      9#include "aes-spe-regs.h"
     10
     11#ifdef __BIG_ENDIAN__			/* Macros for big endian builds	*/
     12
     13#define LOAD_DATA(reg, off) \
     14	lwz		reg,off(rSP);	/* load with offset		*/
     15#define SAVE_DATA(reg, off) \
     16	stw		reg,off(rDP);	/* save with offset		*/
     17#define NEXT_BLOCK \
     18	addi		rSP,rSP,16;	/* increment pointers per bloc	*/ \
     19	addi		rDP,rDP,16;
     20#define LOAD_IV(reg, off) \
     21	lwz		reg,off(rIP);	/* IV loading with offset	*/
     22#define SAVE_IV(reg, off) \
     23	stw		reg,off(rIP);	/* IV saving with offset	*/
     24#define START_IV			/* nothing to reset		*/
     25#define CBC_DEC 16			/* CBC decrement per block	*/
     26#define CTR_DEC 1			/* CTR decrement one byte	*/
     27
     28#else					/* Macros for little endian	*/
     29
     30#define LOAD_DATA(reg, off) \
     31	lwbrx		reg,0,rSP;	/* load reversed		*/ \
     32	addi		rSP,rSP,4;	/* and increment pointer	*/
     33#define SAVE_DATA(reg, off) \
     34	stwbrx		reg,0,rDP;	/* save reversed		*/ \
     35	addi		rDP,rDP,4;	/* and increment pointer	*/
     36#define NEXT_BLOCK			/* nothing todo			*/
     37#define LOAD_IV(reg, off) \
     38	lwbrx		reg,0,rIP;	/* load reversed		*/ \
     39	addi		rIP,rIP,4;	/* and increment pointer	*/
     40#define SAVE_IV(reg, off) \
     41	stwbrx		reg,0,rIP;	/* load reversed		*/ \
     42	addi		rIP,rIP,4;	/* and increment pointer	*/
     43#define START_IV \
     44	subi		rIP,rIP,16;	/* must reset pointer		*/
     45#define CBC_DEC 32			/* 2 blocks because of incs	*/
     46#define CTR_DEC 17			/* 1 block because of incs	*/
     47
     48#endif
     49
     50#define SAVE_0_REGS
     51#define LOAD_0_REGS
     52
     53#define SAVE_4_REGS \
     54	stw		rI0,96(r1);	/* save 32 bit registers	*/ \
     55	stw		rI1,100(r1);					   \
     56	stw		rI2,104(r1);					   \
     57	stw		rI3,108(r1);
     58
     59#define LOAD_4_REGS \
     60	lwz		rI0,96(r1);	/* restore 32 bit registers	*/ \
     61	lwz		rI1,100(r1);					   \
     62	lwz		rI2,104(r1);					   \
     63	lwz		rI3,108(r1);
     64
     65#define SAVE_8_REGS \
     66	SAVE_4_REGS							   \
     67	stw		rG0,112(r1);	/* save 32 bit registers	*/ \
     68	stw		rG1,116(r1);					   \
     69	stw		rG2,120(r1);					   \
     70	stw		rG3,124(r1);
     71
     72#define LOAD_8_REGS \
     73	LOAD_4_REGS							   \
     74	lwz		rG0,112(r1);	/* restore 32 bit registers	*/ \
     75	lwz		rG1,116(r1);					   \
     76	lwz		rG2,120(r1);					   \
     77	lwz		rG3,124(r1);
     78
     79#define INITIALIZE_CRYPT(tab,nr32bitregs) \
     80	mflr		r0;						   \
     81	stwu		r1,-160(r1);	/* create stack frame		*/ \
     82	lis		rT0,tab@h;	/* en-/decryption table pointer	*/ \
     83	stw		r0,8(r1);	/* save link register		*/ \
     84	ori		rT0,rT0,tab@l;					   \
     85	evstdw		r14,16(r1);					   \
     86	mr		rKS,rKP;					   \
     87	evstdw		r15,24(r1);	/* We must save non volatile	*/ \
     88	evstdw		r16,32(r1);	/* registers. Take the chance	*/ \
     89	evstdw		r17,40(r1);	/* and save the SPE part too	*/ \
     90	evstdw		r18,48(r1);					   \
     91	evstdw		r19,56(r1);					   \
     92	evstdw		r20,64(r1);					   \
     93	evstdw		r21,72(r1);					   \
     94	evstdw		r22,80(r1);					   \
     95	evstdw		r23,88(r1);					   \
     96	SAVE_##nr32bitregs##_REGS
     97
     98#define FINALIZE_CRYPT(nr32bitregs) \
     99	lwz		r0,8(r1);					   \
    100	evldw		r14,16(r1);	/* restore SPE registers	*/ \
    101	evldw		r15,24(r1);					   \
    102	evldw		r16,32(r1);					   \
    103	evldw		r17,40(r1);					   \
    104	evldw		r18,48(r1);					   \
    105	evldw		r19,56(r1);					   \
    106	evldw		r20,64(r1);					   \
    107	evldw		r21,72(r1);					   \
    108	evldw		r22,80(r1);					   \
    109	evldw		r23,88(r1);					   \
    110	LOAD_##nr32bitregs##_REGS					   \
    111	mtlr		r0;		/* restore link register	*/ \
    112	xor		r0,r0,r0;					   \
    113	stw		r0,16(r1);	/* delete sensitive data	*/ \
    114	stw		r0,24(r1);	/* that we might have pushed	*/ \
    115	stw		r0,32(r1);	/* from other context that runs	*/ \
    116	stw		r0,40(r1);	/* the same code		*/ \
    117	stw		r0,48(r1);					   \
    118	stw		r0,56(r1);					   \
    119	stw		r0,64(r1);					   \
    120	stw		r0,72(r1);					   \
    121	stw		r0,80(r1);					   \
    122	stw		r0,88(r1);					   \
    123	addi		r1,r1,160;	/* cleanup stack frame		*/
    124
    125#define ENDIAN_SWAP(t0, t1, s0, s1) \
    126	rotrwi		t0,s0,8;	/* swap endianness for 2 GPRs	*/ \
    127	rotrwi		t1,s1,8;					   \
    128	rlwimi		t0,s0,8,8,15;					   \
    129	rlwimi		t1,s1,8,8,15;					   \
    130	rlwimi		t0,s0,8,24,31;					   \
    131	rlwimi		t1,s1,8,24,31;
    132
    133#define GF128_MUL(d0, d1, d2, d3, t0) \
    134	li		t0,0x87;	/* multiplication in GF128	*/ \
    135	cmpwi		d3,-1;						   \
    136	iselgt		t0,0,t0;					   \
    137	rlwimi		d3,d2,0,0,0;	/* propagate "carry" bits	*/ \
    138	rotlwi		d3,d3,1;					   \
    139	rlwimi		d2,d1,0,0,0;					   \
    140	rotlwi		d2,d2,1;					   \
    141	rlwimi		d1,d0,0,0,0;					   \
    142	slwi		d0,d0,1;	/* shift left 128 bit		*/ \
    143	rotlwi		d1,d1,1;					   \
    144	xor		d0,d0,t0;
    145
    146#define START_KEY(d0, d1, d2, d3) \
    147	lwz		rW0,0(rKP);					   \
    148	mtctr		rRR;						   \
    149	lwz		rW1,4(rKP);					   \
    150	lwz		rW2,8(rKP);					   \
    151	lwz		rW3,12(rKP);					   \
    152	xor		rD0,d0,rW0;					   \
    153	xor		rD1,d1,rW1;					   \
    154	xor		rD2,d2,rW2;					   \
    155	xor		rD3,d3,rW3;
    156
    157/*
    158 * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
    159 *		   u32 rounds)
    160 *
    161 * called from glue layer to encrypt a single 16 byte block
    162 * round values are AES128 = 4, AES192 = 5, AES256 = 6
    163 *
    164 */
    165_GLOBAL(ppc_encrypt_aes)
    166	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
    167	LOAD_DATA(rD0, 0)
    168	LOAD_DATA(rD1, 4)
    169	LOAD_DATA(rD2, 8)
    170	LOAD_DATA(rD3, 12)
    171	START_KEY(rD0, rD1, rD2, rD3)
    172	bl		ppc_encrypt_block
    173	xor		rD0,rD0,rW0
    174	SAVE_DATA(rD0, 0)
    175	xor		rD1,rD1,rW1
    176	SAVE_DATA(rD1, 4)
    177	xor		rD2,rD2,rW2
    178	SAVE_DATA(rD2, 8)
    179	xor		rD3,rD3,rW3
    180	SAVE_DATA(rD3, 12)
    181	FINALIZE_CRYPT(0)
    182	blr
    183
    184/*
    185 * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
    186 *		   u32 rounds)
    187 *
    188 * called from glue layer to decrypt a single 16 byte block
    189 * round values are AES128 = 4, AES192 = 5, AES256 = 6
    190 *
    191 */
    192_GLOBAL(ppc_decrypt_aes)
    193	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
    194	LOAD_DATA(rD0, 0)
    195	addi		rT1,rT0,4096
    196	LOAD_DATA(rD1, 4)
    197	LOAD_DATA(rD2, 8)
    198	LOAD_DATA(rD3, 12)
    199	START_KEY(rD0, rD1, rD2, rD3)
    200	bl		ppc_decrypt_block
    201	xor		rD0,rD0,rW0
    202	SAVE_DATA(rD0, 0)
    203	xor		rD1,rD1,rW1
    204	SAVE_DATA(rD1, 4)
    205	xor		rD2,rD2,rW2
    206	SAVE_DATA(rD2, 8)
    207	xor		rD3,rD3,rW3
    208	SAVE_DATA(rD3, 12)
    209	FINALIZE_CRYPT(0)
    210	blr
    211
    212/*
    213 * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
    214 *		   u32 rounds, u32 bytes);
    215 *
    216 * called from glue layer to encrypt multiple blocks via ECB
    217 * Bytes must be larger or equal 16 and only whole blocks are
    218 * processed. round values are AES128 = 4, AES192 = 5 and
    219 * AES256 = 6
    220 *
    221 */
    222_GLOBAL(ppc_encrypt_ecb)
    223	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
    224ppc_encrypt_ecb_loop:
    225	LOAD_DATA(rD0, 0)
    226	mr		rKP,rKS
    227	LOAD_DATA(rD1, 4)
    228	subi		rLN,rLN,16
    229	LOAD_DATA(rD2, 8)
    230	cmpwi		rLN,15
    231	LOAD_DATA(rD3, 12)
    232	START_KEY(rD0, rD1, rD2, rD3)
    233	bl		ppc_encrypt_block
    234	xor		rD0,rD0,rW0
    235	SAVE_DATA(rD0, 0)
    236	xor		rD1,rD1,rW1
    237	SAVE_DATA(rD1, 4)
    238	xor		rD2,rD2,rW2
    239	SAVE_DATA(rD2, 8)
    240	xor		rD3,rD3,rW3
    241	SAVE_DATA(rD3, 12)
    242	NEXT_BLOCK
    243	bt		gt,ppc_encrypt_ecb_loop
    244	FINALIZE_CRYPT(0)
    245	blr
    246
    247/*
    248 * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
    249 *		   u32 rounds, u32 bytes);
    250 *
    251 * called from glue layer to decrypt multiple blocks via ECB
    252 * Bytes must be larger or equal 16 and only whole blocks are
    253 * processed. round values are AES128 = 4, AES192 = 5 and
    254 * AES256 = 6
    255 *
    256 */
    257_GLOBAL(ppc_decrypt_ecb)
    258	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
    259	addi		rT1,rT0,4096
    260ppc_decrypt_ecb_loop:
    261	LOAD_DATA(rD0, 0)
    262	mr		rKP,rKS
    263	LOAD_DATA(rD1, 4)
    264	subi		rLN,rLN,16
    265	LOAD_DATA(rD2, 8)
    266	cmpwi		rLN,15
    267	LOAD_DATA(rD3, 12)
    268	START_KEY(rD0, rD1, rD2, rD3)
    269	bl		ppc_decrypt_block
    270	xor		rD0,rD0,rW0
    271	SAVE_DATA(rD0, 0)
    272	xor		rD1,rD1,rW1
    273	SAVE_DATA(rD1, 4)
    274	xor		rD2,rD2,rW2
    275	SAVE_DATA(rD2, 8)
    276	xor		rD3,rD3,rW3
    277	SAVE_DATA(rD3, 12)
    278	NEXT_BLOCK
    279	bt		gt,ppc_decrypt_ecb_loop
    280	FINALIZE_CRYPT(0)
    281	blr
    282
    283/*
    284 * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
    285 *		   32 rounds, u32 bytes, u8 *iv);
    286 *
    287 * called from glue layer to encrypt multiple blocks via CBC
    288 * Bytes must be larger or equal 16 and only whole blocks are
    289 * processed. round values are AES128 = 4, AES192 = 5 and
    290 * AES256 = 6
    291 *
    292 */
    293_GLOBAL(ppc_encrypt_cbc)
    294	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
    295	LOAD_IV(rI0, 0)
    296	LOAD_IV(rI1, 4)
    297	LOAD_IV(rI2, 8)
    298	LOAD_IV(rI3, 12)
    299ppc_encrypt_cbc_loop:
    300	LOAD_DATA(rD0, 0)
    301	mr		rKP,rKS
    302	LOAD_DATA(rD1, 4)
    303	subi		rLN,rLN,16
    304	LOAD_DATA(rD2, 8)
    305	cmpwi		rLN,15
    306	LOAD_DATA(rD3, 12)
    307	xor		rD0,rD0,rI0
    308	xor		rD1,rD1,rI1
    309	xor		rD2,rD2,rI2
    310	xor		rD3,rD3,rI3
    311	START_KEY(rD0, rD1, rD2, rD3)
    312	bl		ppc_encrypt_block
    313	xor		rI0,rD0,rW0
    314	SAVE_DATA(rI0, 0)
    315	xor		rI1,rD1,rW1
    316	SAVE_DATA(rI1, 4)
    317	xor		rI2,rD2,rW2
    318	SAVE_DATA(rI2, 8)
    319	xor		rI3,rD3,rW3
    320	SAVE_DATA(rI3, 12)
    321	NEXT_BLOCK
    322	bt		gt,ppc_encrypt_cbc_loop
    323	START_IV
    324	SAVE_IV(rI0, 0)
    325	SAVE_IV(rI1, 4)
    326	SAVE_IV(rI2, 8)
    327	SAVE_IV(rI3, 12)
    328	FINALIZE_CRYPT(4)
    329	blr
    330
    331/*
    332 * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
    333 *		   u32 rounds, u32 bytes, u8 *iv);
    334 *
    335 * called from glue layer to decrypt multiple blocks via CBC
    336 * round values are AES128 = 4, AES192 = 5, AES256 = 6
    337 *
    338 */
    339_GLOBAL(ppc_decrypt_cbc)
    340	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
    341	li		rT1,15
    342	LOAD_IV(rI0, 0)
    343	andc		rLN,rLN,rT1
    344	LOAD_IV(rI1, 4)
    345	subi		rLN,rLN,16
    346	LOAD_IV(rI2, 8)
    347	add		rSP,rSP,rLN	/* reverse processing		*/
    348	LOAD_IV(rI3, 12)
    349	add		rDP,rDP,rLN
    350	LOAD_DATA(rD0, 0)
    351	addi		rT1,rT0,4096
    352	LOAD_DATA(rD1, 4)
    353	LOAD_DATA(rD2, 8)
    354	LOAD_DATA(rD3, 12)
    355	START_IV
    356	SAVE_IV(rD0, 0)
    357	SAVE_IV(rD1, 4)
    358	SAVE_IV(rD2, 8)
    359	cmpwi		rLN,16
    360	SAVE_IV(rD3, 12)
    361	bt		lt,ppc_decrypt_cbc_end
    362ppc_decrypt_cbc_loop:
    363	mr		rKP,rKS
    364	START_KEY(rD0, rD1, rD2, rD3)
    365	bl		ppc_decrypt_block
    366	subi		rLN,rLN,16
    367	subi		rSP,rSP,CBC_DEC
    368	xor		rW0,rD0,rW0
    369	LOAD_DATA(rD0, 0)
    370	xor		rW1,rD1,rW1
    371	LOAD_DATA(rD1, 4)
    372	xor		rW2,rD2,rW2
    373	LOAD_DATA(rD2, 8)
    374	xor		rW3,rD3,rW3
    375	LOAD_DATA(rD3, 12)
    376	xor		rW0,rW0,rD0
    377	SAVE_DATA(rW0, 0)
    378	xor		rW1,rW1,rD1
    379	SAVE_DATA(rW1, 4)
    380	xor		rW2,rW2,rD2
    381	SAVE_DATA(rW2, 8)
    382	xor		rW3,rW3,rD3
    383	SAVE_DATA(rW3, 12)
    384	cmpwi		rLN,15
    385	subi		rDP,rDP,CBC_DEC
    386	bt		gt,ppc_decrypt_cbc_loop
    387ppc_decrypt_cbc_end:
    388	mr		rKP,rKS
    389	START_KEY(rD0, rD1, rD2, rD3)
    390	bl		ppc_decrypt_block
    391	xor		rW0,rW0,rD0
    392	xor		rW1,rW1,rD1
    393	xor		rW2,rW2,rD2
    394	xor		rW3,rW3,rD3
    395	xor		rW0,rW0,rI0	/* decrypt with initial IV	*/
    396	SAVE_DATA(rW0, 0)
    397	xor		rW1,rW1,rI1
    398	SAVE_DATA(rW1, 4)
    399	xor		rW2,rW2,rI2
    400	SAVE_DATA(rW2, 8)
    401	xor		rW3,rW3,rI3
    402	SAVE_DATA(rW3, 12)
    403	FINALIZE_CRYPT(4)
    404	blr
    405
    406/*
    407 * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
    408 *		 u32 rounds, u32 bytes, u8 *iv);
    409 *
    410 * called from glue layer to encrypt/decrypt multiple blocks
    411 * via CTR. Number of bytes does not need to be a multiple of
    412 * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
    413 *
    414 */
    415_GLOBAL(ppc_crypt_ctr)
    416	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
    417	LOAD_IV(rI0, 0)
    418	LOAD_IV(rI1, 4)
    419	LOAD_IV(rI2, 8)
    420	cmpwi		rLN,16
    421	LOAD_IV(rI3, 12)
    422	START_IV
    423	bt		lt,ppc_crypt_ctr_partial
    424ppc_crypt_ctr_loop:
    425	mr		rKP,rKS
    426	START_KEY(rI0, rI1, rI2, rI3)
    427	bl		ppc_encrypt_block
    428	xor		rW0,rD0,rW0
    429	xor		rW1,rD1,rW1
    430	xor		rW2,rD2,rW2
    431	xor		rW3,rD3,rW3
    432	LOAD_DATA(rD0, 0)
    433	subi		rLN,rLN,16
    434	LOAD_DATA(rD1, 4)
    435	LOAD_DATA(rD2, 8)
    436	LOAD_DATA(rD3, 12)
    437	xor		rD0,rD0,rW0
    438	SAVE_DATA(rD0, 0)
    439	xor		rD1,rD1,rW1
    440	SAVE_DATA(rD1, 4)
    441	xor		rD2,rD2,rW2
    442	SAVE_DATA(rD2, 8)
    443	xor		rD3,rD3,rW3
    444	SAVE_DATA(rD3, 12)
    445	addic		rI3,rI3,1	/* increase counter			*/
    446	addze		rI2,rI2
    447	addze		rI1,rI1
    448	addze		rI0,rI0
    449	NEXT_BLOCK
    450	cmpwi		rLN,15
    451	bt		gt,ppc_crypt_ctr_loop
    452ppc_crypt_ctr_partial:
    453	cmpwi		rLN,0
    454	bt		eq,ppc_crypt_ctr_end
    455	mr		rKP,rKS
    456	START_KEY(rI0, rI1, rI2, rI3)
    457	bl		ppc_encrypt_block
    458	xor		rW0,rD0,rW0
    459	SAVE_IV(rW0, 0)
    460	xor		rW1,rD1,rW1
    461	SAVE_IV(rW1, 4)
    462	xor		rW2,rD2,rW2
    463	SAVE_IV(rW2, 8)
    464	xor		rW3,rD3,rW3
    465	SAVE_IV(rW3, 12)
    466	mtctr		rLN
    467	subi		rIP,rIP,CTR_DEC
    468	subi		rSP,rSP,1
    469	subi		rDP,rDP,1
    470ppc_crypt_ctr_xorbyte:
    471	lbzu		rW4,1(rIP)	/* bytewise xor for partial block	*/
    472	lbzu		rW5,1(rSP)
    473	xor		rW4,rW4,rW5
    474	stbu		rW4,1(rDP)
    475	bdnz		ppc_crypt_ctr_xorbyte
    476	subf		rIP,rLN,rIP
    477	addi		rIP,rIP,1
    478	addic		rI3,rI3,1
    479	addze		rI2,rI2
    480	addze		rI1,rI1
    481	addze		rI0,rI0
    482ppc_crypt_ctr_end:
    483	SAVE_IV(rI0, 0)
    484	SAVE_IV(rI1, 4)
    485	SAVE_IV(rI2, 8)
    486	SAVE_IV(rI3, 12)
    487	FINALIZE_CRYPT(4)
    488	blr
    489
    490/*
    491 * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
    492 *		   u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
    493 *
    494 * called from glue layer to encrypt multiple blocks via XTS
    495 * If key_twk is given, the initial IV encryption will be
    496 * processed too. Round values are AES128 = 4, AES192 = 5,
    497 * AES256 = 6
    498 *
    499 */
    500_GLOBAL(ppc_encrypt_xts)
    501	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
    502	LOAD_IV(rI0, 0)
    503	LOAD_IV(rI1, 4)
    504	LOAD_IV(rI2, 8)
    505	cmpwi		rKT,0
    506	LOAD_IV(rI3, 12)
    507	bt		eq,ppc_encrypt_xts_notweak
    508	mr		rKP,rKT
    509	START_KEY(rI0, rI1, rI2, rI3)
    510	bl		ppc_encrypt_block
    511	xor		rI0,rD0,rW0
    512	xor		rI1,rD1,rW1
    513	xor		rI2,rD2,rW2
    514	xor		rI3,rD3,rW3
    515ppc_encrypt_xts_notweak:
    516	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
    517	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
    518ppc_encrypt_xts_loop:
    519	LOAD_DATA(rD0, 0)
    520	mr		rKP,rKS
    521	LOAD_DATA(rD1, 4)
    522	subi		rLN,rLN,16
    523	LOAD_DATA(rD2, 8)
    524	LOAD_DATA(rD3, 12)
    525	xor		rD0,rD0,rI0
    526	xor		rD1,rD1,rI1
    527	xor		rD2,rD2,rI2
    528	xor		rD3,rD3,rI3
    529	START_KEY(rD0, rD1, rD2, rD3)
    530	bl		ppc_encrypt_block
    531	xor		rD0,rD0,rW0
    532	xor		rD1,rD1,rW1
    533	xor		rD2,rD2,rW2
    534	xor		rD3,rD3,rW3
    535	xor		rD0,rD0,rI0
    536	SAVE_DATA(rD0, 0)
    537	xor		rD1,rD1,rI1
    538	SAVE_DATA(rD1, 4)
    539	xor		rD2,rD2,rI2
    540	SAVE_DATA(rD2, 8)
    541	xor		rD3,rD3,rI3
    542	SAVE_DATA(rD3, 12)
    543	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
    544	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
    545	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
    546	cmpwi		rLN,0
    547	NEXT_BLOCK
    548	bt		gt,ppc_encrypt_xts_loop
    549	START_IV
    550	SAVE_IV(rI0, 0)
    551	SAVE_IV(rI1, 4)
    552	SAVE_IV(rI2, 8)
    553	SAVE_IV(rI3, 12)
    554	FINALIZE_CRYPT(8)
    555	blr
    556
    557/*
    558 * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
    559 *		   u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
    560 *
    561 * called from glue layer to decrypt multiple blocks via XTS
    562 * If key_twk is given, the initial IV encryption will be
    563 * processed too. Round values are AES128 = 4, AES192 = 5,
    564 * AES256 = 6
    565 *
    566 */
    567_GLOBAL(ppc_decrypt_xts)
    568	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
    569	LOAD_IV(rI0, 0)
    570	addi		rT1,rT0,4096
    571	LOAD_IV(rI1, 4)
    572	LOAD_IV(rI2, 8)
    573	cmpwi		rKT,0
    574	LOAD_IV(rI3, 12)
    575	bt		eq,ppc_decrypt_xts_notweak
    576	subi		rT0,rT0,4096
    577	mr		rKP,rKT
    578	START_KEY(rI0, rI1, rI2, rI3)
    579	bl		ppc_encrypt_block
    580	xor		rI0,rD0,rW0
    581	xor		rI1,rD1,rW1
    582	xor		rI2,rD2,rW2
    583	xor		rI3,rD3,rW3
    584	addi		rT0,rT0,4096
    585ppc_decrypt_xts_notweak:
    586	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
    587	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
    588ppc_decrypt_xts_loop:
    589	LOAD_DATA(rD0, 0)
    590	mr		rKP,rKS
    591	LOAD_DATA(rD1, 4)
    592	subi		rLN,rLN,16
    593	LOAD_DATA(rD2, 8)
    594	LOAD_DATA(rD3, 12)
    595	xor		rD0,rD0,rI0
    596	xor		rD1,rD1,rI1
    597	xor		rD2,rD2,rI2
    598	xor		rD3,rD3,rI3
    599	START_KEY(rD0, rD1, rD2, rD3)
    600	bl		ppc_decrypt_block
    601	xor		rD0,rD0,rW0
    602	xor		rD1,rD1,rW1
    603	xor		rD2,rD2,rW2
    604	xor		rD3,rD3,rW3
    605	xor		rD0,rD0,rI0
    606	SAVE_DATA(rD0, 0)
    607	xor		rD1,rD1,rI1
    608	SAVE_DATA(rD1, 4)
    609	xor		rD2,rD2,rI2
    610	SAVE_DATA(rD2, 8)
    611	xor		rD3,rD3,rI3
    612	SAVE_DATA(rD3, 12)
    613	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
    614	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
    615	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
    616	cmpwi		rLN,0
    617	NEXT_BLOCK
    618	bt		gt,ppc_decrypt_xts_loop
    619	START_IV
    620	SAVE_IV(rI0, 0)
    621	SAVE_IV(rI1, 4)
    622	SAVE_IV(rI2, 8)
    623	SAVE_IV(rI3, 12)
    624	FINALIZE_CRYPT(8)
    625	blr